2013-02-20 71 views
0

只是将Lucene 3.6的代码库更新到Lucene 4.1,似乎我的测试使用NormalizeCharMap替换分析器中的字符不起作用。没有得到匹配使用CharMappingFilter与Lucene 4.1的分析器

下面伊夫创造了一个自包含的测试案例,这是输出,当我运行它

--term=and-- 
--term=gold-- 
--term=platinum-- 
name:"platinum and gold" 
Size1 
name:"platinum & gold" 
Size0 

java.lang.AssertionError: 
Expected :1 
Actual :0 
<Click to see difference> 
    at org.junit.Assert.fail(Assert.java:93) 
    at org.junit.Assert.failNotEquals(Assert.java:647) 
    at org.junit.Assert.assertEquals(Assert.java:128) 
    at org.junit.Assert.assertEquals(Assert.java:472) 
    at org.junit.Assert.assertEquals(Assert.java:456) 
    at org.musicbrainz.search.analysis.Lucene41CharFilterTest. 
    testAmpersandSearching(Lucene41CharFilterTest.java:89) 

正如你可以看到charfilter似乎工作,因为文本“铂&金”转换到三个术语“平台和金子”。其实搜索工作“platimum和黄金”,但不工作的原“白金&金”即使两个索引和搜索使用相同的分析仪

package org.musicbrainz.search.analysis; 

import org.apache.lucene.analysis.Analyzer; 
import org.apache.lucene.analysis.TokenStream; 
import org.apache.lucene.analysis.Tokenizer; 
import org.apache.lucene.analysis.charfilter.MappingCharFilter; 
import org.apache.lucene.analysis.charfilter.NormalizeCharMap; 
import org.apache.lucene.analysis.core.LowerCaseFilter; 
import org.apache.lucene.document.Document; 
import org.apache.lucene.document.Field; 
import org.apache.lucene.index.*; 
import org.apache.lucene.queryparser.classic.QueryParser; 
import org.apache.lucene.search.IndexSearcher; 
import org.apache.lucene.search.Query; 
import org.apache.lucene.search.TopDocs; 
import org.apache.lucene.store.RAMDirectory; 
import org.apache.lucene.util.BytesRef; 
import org.apache.lucene.util.Version; 
import org.junit.Test; 
import java.io.Reader; 

import static org.junit.Assert.assertEquals; 

public class Lucene41CharFilterTest 
{ 
    class SimpleAnalyzer extends Analyzer { 

     protected NormalizeCharMap charConvertMap; 

     protected void setCharConvertMap() { 

      NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder(); 
      builder.add("&","and"); 
      charConvertMap = builder.build(); 
     } 

     public SimpleAnalyzer() { 
      setCharConvertMap(); 
     } 

     @Override 
     protected TokenStreamComponents createComponents(String fieldName, 
                 Reader reader) { 
      Tokenizer source = new MusicbrainzTokenizer(Version.LUCENE_41, 

        new MappingCharFilter(charConvertMap, reader)); 
      TokenStream filter = new LowerCaseFilter(Version.LUCENE_41,source); 
      return new TokenStreamComponents(source, filter); 
     } 
    } 

    @Test 
    public void testAmpersandSearching() throws Exception { 

     Analyzer analyzer = new SimpleAnalyzer(); 
     RAMDirectory dir = new RAMDirectory(); 
     IndexWriterConfig writerConfig = new 
      IndexWriterConfig(Version.LUCENE_41,analyzer); 
     IndexWriter writer = new IndexWriter(dir, writerConfig); 
     { 
      Document doc = new Document(); 
      doc.add(new Field("name", "platinum & gold", Field.Store.YES, 
        Field.Index.ANALYZED)); 
      writer.addDocument(doc); 
     } 
     writer.close(); 

     IndexReader ir = DirectoryReader.open(dir); 
     Fields fields = MultiFields.getFields(ir); 
     Terms terms = fields.terms("name"); 
     TermsEnum termsEnum = terms.iterator(null); 
     BytesRef text; 
     while((text = termsEnum.next()) != null) { 
      System.out.println("--term=" + text.utf8ToString()+"--"); 
     } 
     ir.close(); 

     IndexSearcher searcher = new IndexSearcher(IndexReader.open(dir)); 
     { 
      Query q = new QueryParser(Version.LUCENE_41, "name", analyzer) 
       .parse("\"platinum and gold\""); 
      System.out.println(q); 
      TopDocs td = searcher.search(q, 10); 
      System.out.println("Size"+td.scoreDocs.length); 
      assertEquals(1, searcher.search(q, 10).totalHits); 
     } 

     searcher = new IndexSearcher(IndexReader.open(dir)); 
     { 
      Query q = new QueryParser(Version.LUCENE_41, "name", analyzer) 
       .parse("\"platinum & gold\""); 
      System.out.println(q); 
      TopDocs td = searcher.search(q, 10); 
      System.out.println("Size"+td.scoreDocs.length); 
      assertEquals(1, searcher.search(q, 10).totalHits); 
     } 
    } 
} 

回答

0

我发现我已经来覆盖它新initReader()方法的问题当分析器与QueryParser一起使用时应用。不知道过滤器是否需要在createComponents()中构造,但如果我只是添加到initReader,则测试工作。

class SimpleAnalyzer extends Analyzer { 

     protected NormalizeCharMap charConvertMap; 

     protected void setCharConvertMap() { 

      NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder(); 
      builder.add("&","and"); 
      charConvertMap = builder.build(); 
     } 

     public SimpleAnalyzer() { 
      setCharConvertMap(); 
     } 

     @Override 
     protected TokenStreamComponents createComponents(String fieldName, Reader reader) { 
      Tokenizer source = new MusicbrainzTokenizer(Version.LUCENE_35, 
        reader); 
      TokenStream filter = new LowerCaseFilter(Version.LUCENE_35,source); 
      return new TokenStreamComponents(source, filter); 
     } 

     @Override 
     protected Reader initReader(String fieldName, 
            Reader reader) 
     { 
      return new MappingCharFilter(charConvertMap, reader); 
     } 
    }