Token exhilar exceeds length of provided text sized 3801

classic Classic list List threaded Threaded
1 message Options
Reply | Threaded
Open this post in threaded view
|

Token exhilar exceeds length of provided text sized 3801

OldSkoolMark
Having some trouble clustering my data ... These symptoms are similar to some problems that were fixed last year. Possible regression? Suggestions on how to proceed? Thanks in advance!

https://issues.apache.org/jira/browse/SOLR-1883
https://issues.apache.org/jira/browse/SOLR-1404

Nov 7, 2011 8:15:35 AM org.apache.solr.handler.clustering.carrot2.CarrotClusteringEngine cluster
SEVERE: Carrot2 clustering failed
org.apache.solr.common.SolrException: org.apache.lucene.search.highlight.InvalidTokenOffsetsException: Token exhilar exceeds length of provided text sized 3801
        at org.apache.solr.highlight.DefaultSolrHighlighter.doHighlightingByHighlighter(DefaultSolrHighlighter.java:475)
        at org.apache.solr.highlight.DefaultSolrHighlighter.doHighlighting(DefaultSolrHighlighter.java:379)
        at org.apache.solr.handler.clustering.carrot2.CarrotClusteringEngine.getDocuments(CarrotClusteringEngine.java:303)
        at org.apache.solr.handler.clustering.carrot2.CarrotClusteringEngine.cluster(CarrotClusteringEngine.java:124)
        at org.apache.solr.handler.clustering.ClusteringComponent.process(ClusteringComponent.java:91)
        at org.apache.solr.handler.component.SearchHandler.handleRequestBody(SearchHandler.java:194)
        at org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:129)
        at org.apache.solr.core.RequestHandlers$LazyRequestHandlerWrapper.handleRequest(RequestHandlers.java:241)
        at org.apache.solr.core.SolrCore.execute(SolrCore.java:1368)
        at org.apache.solr.servlet.SolrDispatchFilter.execute(SolrDispatchFilter.java:356)
        at org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:252)
        at org.mortbay.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1212)
        at org.mortbay.jetty.servlet.ServletHandler.handle(ServletHandler.java:399)
        at org.mortbay.jetty.security.SecurityHandler.handle(SecurityHandler.java:216)
        at org.mortbay.jetty.servlet.SessionHandler.handle(SessionHandler.java:182)
        at org.mortbay.jetty.handler.ContextHandler.handle(ContextHandler.java:766)
        at org.mortbay.jetty.webapp.WebAppContext.handle(WebAppContext.java:450)
        at org.mortbay.jetty.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:230)
        at org.mortbay.jetty.handler.HandlerCollection.handle(HandlerCollection.java:114)
        at org.mortbay.jetty.handler.HandlerWrapper.handle(HandlerWrapper.java:152)
        at org.mortbay.jetty.Server.handle(Server.java:326)
        at org.mortbay.jetty.HttpConnection.handleRequest(HttpConnection.java:542)
        at org.mortbay.jetty.HttpConnection$RequestHandler.headerComplete(HttpConnection.java:928)
        at org.mortbay.jetty.HttpParser.parseNext(HttpParser.java:549)
        at org.mortbay.jetty.HttpParser.parseAvailable(HttpParser.java:212)
        at org.mortbay.jetty.HttpConnection.handle(HttpConnection.java:404)
        at org.mortbay.jetty.bio.SocketConnector$Connection.run(SocketConnector.java:228)
        at org.mortbay.thread.QueuedThreadPool$PoolThread.run(QueuedThreadPool.java:582)
Caused by: org.apache.lucene.search.highlight.InvalidTokenOffsetsException: Token exhilar exceeds length of provided text sized 3801
        at org.apache.lucene.search.highlight.Highlighter.getBestTextFragments(Highlighter.java:233)
        at org.apache.solr.highlight.DefaultSolrHighlighter.doHighlightingByHighlighter(DefaultSolrHighlighter.java:468)
        ... 27 more

Here is the relevant portion of my solrconfig.

  <requestHandler name="/clustering"
                  startup="lazy"
                  enable="${solr.clustering.enabled:false}"
                  class="solr.SearchHandler">
    <lst name="defaults">
      <bool name="clustering">true</bool>
      <str name="clustering.engine">default</str>
      <bool name="clustering.results">true</bool>
     
      <str name="carrot.title">title</str>
      <str name="carrot.url">url</str>
     
       <str name="carrot.snippet">description</str>
       
       <bool name="carrot.produceSummary">true</bool>
       
       
       
       <bool name="carrot.outputSubClusters">false</bool>
       
       <str name="defType">edismax</str>
       <str name="qf">
          text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
       </str>
       <str name="q.alt">*:*</str>
       <str name="rows">10</str>
       <str name="fl">*,score</str>
    </lst>     
    <arr name="last-components">
      <str>clustering</str>
    </arr>
  </requestHandler>

Also my data-config.xml as I my data is in an sqlite3 DB.

<dataConfig>
  <dataSource type="JdbcDataSource"
              driver="org.sqlite.JDBC"
              name="user-ds"
              url="jdbc:sqlite://home/wowmobile/public_fm/public/sisa/public/fmedia/db/smarthomegroupservices.db"
              user=""
              password=""/>
 <document>
    <entity name="useractivity" dataSource="user-ds" transformer="HTMLStripTransformer"
            query="SELECT id,user_id,content_type_id,content_id FROM user_activity WHERE content_type_id=6">
      <field column="user_id" name="user_id" />
      <entity name="webcontent"
              query="SELECT description,title,url FROM wa_feed_items WHERE id IS NOT NULL AND id=${useractivity.content_id}">
        <field column="description" name="description" />
        <field column="title" name="title" />
        <field column="url" name="url" />
      </entity>
    </entity>
  </document>
</dataConfig>

schema.xml has the standard description and title fields.
  <field name="title" type="text_general" indexed="true" stored="true" multiValued="true"/>
   <field name="description" type="text_general" indexed="true" stored="true"/>