2012-04-17 66 views
4

处调用自定义标记器solr我创建了一个自定义标记器,它通过使用admin/analysis.jsp和system.out日志进行检查似乎工作正常。但是,当我在使用此自定义标记器的字段中执行查询时,我发现仅为第一个查询字符串(通过system.out log检查)调用了自定义标记器solr。你能帮我指出我错了什么吗? 这是我的代码:只在第一个

package com.fosp.searchengine; 
import java.io.Reader; 
import org.apache.lucene.analysis.WhitespaceTokenizer; 
import org.apache.solr.analysis.WhitespaceTokenizerFactory; 

public class JvnTextProTokenizerFactory extends WhitespaceTokenizerFactory{ 
    @Override 
    public WhitespaceTokenizer create(Reader input) { 
     System.out.println("WhitespaceTokenizer create(Reader input)"); 
     Reader processedStringReader = new ProcessedStringReader(input); 
     return new WhitespaceTokenizer(processedStringReader); 
    } 

} 


package com.fosp.searchengine; 
import java.io.IOException; 
import java.io.Reader; 

public class ProcessedStringReader extends java.io.Reader { 

    private static final int BUFFER_SIZE = 1024 * 8; 
    private static TextProcess m_textProcess = null; 
    private char[] m_inputData = null; 
    private int m_offset = 0; 
    private int m_length = 0; 
    public ProcessedStringReader(Reader input){ 
     char[] arr = new char[BUFFER_SIZE]; 
     StringBuffer buf = new StringBuffer(); 
     int numChars; 

     try { 
      while ((numChars = input.read(arr, 0, arr.length)) > 0) { 
       buf.append(arr, 0, numChars); 
      } 
     } catch (IOException e) { 
      e.printStackTrace(); 
     } 
     if(m_textProcess == null){ 
      try { 
       m_textProcess = new TextProcess(); 
      } catch (IOException e) { 
       e.printStackTrace(); 
      } 
     } 
     m_inputData = m_textProcess.processText(buf.toString()).toCharArray(); 
     m_offset = 0; 
     m_length = m_inputData.length; 
    } 

    @Override 
    public int read(char[] cbuf, int off, int len) throws IOException { 
     int charNumber = 0; 
     for(int i = m_offset + off;i<m_length && charNumber< len; i++){ 
      cbuf[charNumber] = m_inputData[i]; 
      m_offset ++; 
      charNumber++; 
     } 
     if(charNumber == 0){ 
      return -1; 
     } 
     return charNumber; 
    } 

    @Override 
    public void close() throws IOException { 
     m_inputData = null; 
     m_offset = 0; 
     m_length = 0; 
    } 

} 

Schema.xml的

<fieldType name="text_jvnTextPro" class="solr.TextField" positionIncrementGap="100"> 
    <analyzer type="index"> 
     <tokenizer class="com.fosp.searchengine.JvnTextProTokenizerFactory"/> 
    <filter class="solr.LowerCaseFilterFactory"/> 
    </analyzer> 
    <analyzer type="query"> 
     <tokenizer class="com.fosp.searchengine.JvnTextProTokenizerFactory"/> 
    <filter class="solr.LowerCaseFilterFactory"/>  
    </analyzer> 
</fieldType> 

回答

0

没有什么错在这里。工厂实例化的类被重新使用。这在分析/管理页面中是不同的。不同之处在于。