2010-07-27 123 views
0

我在我的数据库行(每行是一个文档)上构建了一个索引,这些索引在MySQL中是unicode类型的(即字符集:utf8和Collat​​ion:utf8-bin)。但是当我搜索任何单词英语或非英语时,它不会给我答案。它说:Lucene搜索问题

0总匹配文档

我的代码的Lucene的不同之处在于我已经改变字段名我插入的列名搜索的演示代码。无论如何,它在到达那部分代码之前打印此消息。而且我还将读取的查询编码更改为UTF-8。

我检查了数据库部分的阅读。没关系。

有什么问题?

如果有帮助,这是我的插入代码:

static void indexDocs(IndexWriter writer, Connection conn) throws SQLException, CorruptIndexException, IOException { 
    String sql = "select id, name, description, text from users"; 
    Statement stmt = conn.createStatement(); 

    ResultSet rs = stmt.executeQuery(sql); 
    while (rs.next()) { 
     Document d = new Document(); 
     d.add(new Field("id", rs.getString("id"), Field.Store.YES, Field.Index.NOT_ANALYZED)); 
     d.add(new Field("name", rs.getString("name"), Field.Store.NO, Field.Index.NOT_ANALYZED)); 
     String tmp = rs.getString("description"); 
     if (tmp == null) { 
      tmp = ""; 
     } 
     d.add(new Field("description", tmp, Field.Store.NO, Field.Index.ANALYZED)); 
     tmp = rs.getString("text"); 
     if (tmp == null) { 
      tmp = ""; 
     } 
     d.add(new Field("text", tmp, Field.Store.NO, Field.Index.ANALYZED)); 
     writer.addDocument(d); 
    } 
} 

而且这是我的搜索代码:

import java.io.BufferedReader; 
import java.io.File; 
import java.io.FileReader; 
import java.io.IOException; 
import java.io.InputStreamReader; 
import java.util.Date; 

import org.apache.lucene.analysis.Analyzer; 
import org.apache.lucene.analysis.standard.StandardAnalyzer; 
import org.apache.lucene.document.Document; 
import org.apache.lucene.index.FilterIndexReader; 
import org.apache.lucene.index.IndexReader; 
import org.apache.lucene.queryParser.QueryParser; 
import org.apache.lucene.search.Collector; 
import org.apache.lucene.search.IndexSearcher; 
import org.apache.lucene.search.Query; 
import org.apache.lucene.search.ScoreDoc; 
import org.apache.lucene.search.Scorer; 
import org.apache.lucene.search.Searcher; 
import org.apache.lucene.search.TopScoreDocCollector; 
import org.apache.lucene.store.FSDirectory; 
import org.apache.lucene.util.Version; 

/** Simple command-line based search demo. */ 
public class Search { 

    /** Use the norms from one field for all fields. Norms are read into memory, 
    * using a byte of memory per document per searched field. This can cause 
    * search of large collections with a large number of fields to run out of 
    * memory. If all of the fields contain only a single token, then the norms 
    * are all identical, then single norm vector may be shared. */ 
    private static class OneNormsReader extends FilterIndexReader { 

     private String field; 

     public OneNormsReader(IndexReader in, String field) { 
      super(in); 
      this.field = field; 
     } 

     @Override 
     public byte[] norms(String field) throws IOException { 
      return in.norms(this.field); 
     } 
    } 

    private Search() { 
    } 

    /** Simple command-line based search demo. */ 
    public static void main(String[] args) throws Exception { 
     String usage = 
       "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-raw] [-norms field] [-paging hitsPerPage]"; 
     usage += "\n\tSpecify 'false' for hitsPerPage to use streaming instead of paging search."; 
     if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) { 
      System.out.println(usage); 
      System.exit(0); 
     } 

     String index = "index"; 
     String field = "contents"; 
     String queries = null; 
     int repeat = 0; 
     boolean raw = false; 
     String normsField = null; 
     boolean paging = true; 
     int hitsPerPage = 10; 

     for (int i = 0; i < args.length; i++) { 
      if ("-index".equals(args[i])) { 
       index = args[i + 1]; 
       i++; 
      } else if ("-field".equals(args[i])) { 
       field = args[i + 1]; 
       i++; 
      } else if ("-queries".equals(args[i])) { 
       queries = args[i + 1]; 
       i++; 
      } else if ("-repeat".equals(args[i])) { 
       repeat = Integer.parseInt(args[i + 1]); 
       i++; 
      } else if ("-raw".equals(args[i])) { 
       raw = true; 
      } else if ("-norms".equals(args[i])) { 
       normsField = args[i + 1]; 
       i++; 
      } else if ("-paging".equals(args[i])) { 
       if (args[i + 1].equals("false")) { 
        paging = false; 
       } else { 
        hitsPerPage = Integer.parseInt(args[i + 1]); 
        if (hitsPerPage == 0) { 
         paging = false; 
        } 
       } 
       i++; 
      } 
     } 

     IndexReader reader = IndexReader.open(FSDirectory.open(new File(index)), true); // only searching, so read-only=true 

     if (normsField != null) { 
      reader = new OneNormsReader(reader, normsField); 
     } 

     Searcher searcher = new IndexSearcher(reader); 
     Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT); 

     BufferedReader in = null; 
     if (queries != null) { 
      in = new BufferedReader(new FileReader(queries)); 
     } else { 
      in = new BufferedReader(new InputStreamReader(System.in, "UTF-8")); 
     } 
     QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, field, analyzer); 
     while (true) { 
      if (queries == null) // prompt the user 
      { 
       System.out.println("Enter query: "); 
      } 

      String line = in.readLine(); 
      line = new String(line.getBytes("8859_1"), "UTF8"); 

      if (line == null || line.length() == -1) { 
       break; 
      } 

      line = line.trim(); 
      if (line.length() == 0) { 
       break; 
      } 

      Query query = parser.parse(line); 
      System.out.println("Searching for: " + query.toString(field)); 


      if (repeat > 0) {       // repeat & time as benchmark 
       Date start = new Date(); 
       for (int i = 0; i < repeat; i++) { 
        searcher.search(query, null, 100); 
       } 
       Date end = new Date(); 
       System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms"); 
      } 

      if (paging) { 
       doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null); 
      } else { 
       doStreamingSearch(searcher, query); 
      } 
     } 
     reader.close(); 
    } 

    /** 
    * This method uses a custom HitCollector implementation which simply prints out 
    * the docId and score of every matching document. 
    * 
    * This simulates the streaming search use case, where all hits are supposed to 
    * be processed, regardless of their relevance. 
    */ 
    public static void doStreamingSearch(final Searcher searcher, Query query) throws IOException { 
     Collector streamingHitCollector = new Collector() { 

      private Scorer scorer; 
      private int docBase; 

      // simply print docId and score of every matching document 
      @Override 
      public void collect(int doc) throws IOException { 
       System.out.println("doc=" + doc + docBase + " score=" + scorer.score()); 
      } 

      @Override 
      public boolean acceptsDocsOutOfOrder() { 
       return true; 
      } 

      @Override 
      public void setNextReader(IndexReader reader, int docBase) 
        throws IOException { 
       this.docBase = docBase; 
      } 

      @Override 
      public void setScorer(Scorer scorer) throws IOException { 
       this.scorer = scorer; 
      } 
     }; 

     searcher.search(query, streamingHitCollector); 
    } 

    /** 
    * This demonstrates a typical paging search scenario, where the search engine presents 
    * pages of size n to the user. The user can then go to the next page if interested in 
    * the next hits. 
    * 
    * When the query is executed for the first time, then only enough results are collected 
    * to fill 5 result pages. If the user wants to page beyond this limit, then the query 
    * is executed another time and all hits are collected. 
    * 
    */ 
    public static void doPagingSearch(BufferedReader in, Searcher searcher, Query query, 
      int hitsPerPage, boolean raw, boolean interactive) throws IOException { 

     // Collect enough docs to show 5 pages 
     TopScoreDocCollector collector = TopScoreDocCollector.create(
       5 * hitsPerPage, false); 
     searcher.search(query, collector); 
     ScoreDoc[] hits = collector.topDocs().scoreDocs; 

     int numTotalHits = collector.getTotalHits(); 
     System.out.println(numTotalHits + " total matching documents"); 

     int start = 0; 
     int end = Math.min(numTotalHits, hitsPerPage); 

     while (true) { 
      if (end > hits.length) { 
       System.out.println("Only results 1 - " + hits.length + " of " + numTotalHits + " total matching documents collected."); 
       System.out.println("Collect more (y/n) ?"); 
       String line = in.readLine(); 
       if (line.length() == 0 || line.charAt(0) == 'n') { 
        break; 
       } 

       collector = TopScoreDocCollector.create(numTotalHits, false); 
       searcher.search(query, collector); 
       hits = collector.topDocs().scoreDocs; 
      } 

      end = Math.min(hits.length, start + hitsPerPage); 

      for (int i = start; i < end; i++) { 
       if (raw) {        // output raw format 
        System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score); 
        continue; 
       } 

       Document doc = searcher.doc(hits[i].doc); 
       String id = doc.get("id"); 
       if (id != null) { 
        System.out.println((i + 1) + ". " + id); 
        String name = doc.get("name"); 
        if (name != null) { 
         System.out.println(" name: " + doc.get("name")); 
        } 
        String description = doc.get("description"); 
        if (description != null) { 
         System.out.println(" description: " + doc.get("description")); 
        } 
        String text= doc.get("text"); 
        if (text != null) { 
         System.out.println(" text: " + doc.get("text")); 
        } 
       } else { 
        System.out.println((i + 1) + ". " + "No path for this document"); 
       } 

      } 

      if (!interactive) { 
       break; 
      } 

      if (numTotalHits >= end) { 
       boolean quit = false; 
       while (true) { 
        System.out.print("Press "); 
        if (start - hitsPerPage >= 0) { 
         System.out.print("(p)revious page, "); 
        } 
        if (start + hitsPerPage < numTotalHits) { 
         System.out.print("(n)ext page, "); 
        } 
        System.out.println("(q)uit or enter number to jump to a page."); 

        String line = in.readLine(); 
        if (line.length() == 0 || line.charAt(0) == 'q') { 
         quit = true; 
         break; 
        } 
        if (line.charAt(0) == 'p') { 
         start = Math.max(0, start - hitsPerPage); 
         break; 
        } else if (line.charAt(0) == 'n') { 
         if (start + hitsPerPage < numTotalHits) { 
          start += hitsPerPage; 
         } 
         break; 
        } else { 
         int page = Integer.parseInt(line); 
         if ((page - 1) * hitsPerPage < numTotalHits) { 
          start = (page - 1) * hitsPerPage; 
          break; 
         } else { 
          System.out.println("No such page"); 
         } 
        } 
       } 
       if (quit) { 
        break; 
       } 
       end = Math.min(numTotalHits, start + hitsPerPage); 
      } 

     } 

    } 
} 

谢谢。

回答

0

我找到了。我应该指定我想要搜索的列。例如对于在文本字段中搜索,我应该说:“text:MyWord”