2014-01-08 51 views
0

我试图从包含多个单词的短语实现自动完成功能。Elasticsearch以多个单词开始

我想只能匹配单词的开头(edgeNGram?),但是对于搜索到的每个单词。例如,如果我搜索“监视器”,我应该收到所有带有监视器单词的短语,但是如果我搜索“监视器”,我应该从下面的数据集中找不到任何匹配。另外,对于“mon ap”的搜索应该给我例如“APNEA MONITOR-SCHULTE Vital Signs Monitor”和“mon rrr”应该没有结果。

所以我的问题是我应该如何去实施它?

因此简而言之:匹配短语应包含以搜索条件开头的单词。

这里是我的映射:

{ 
    "quicksearch2" : { 
     "results" : { 
      "properties" : {  
       "phrase" : { 
        "type" : "string", 
        "index_analyzer" : "quicksearch_index_analyzer", 
        "search_analyzer" : "quicksearch_search_analyzer" 
       }   
      } 
     } 
    } 
} 

这里是我的设置:

{ 
    "quicksearch2" : { 
     "settings" : { 
      "index.analysis.analyzer.quicksearch_index_analyzer.filter.4" : "left_ngram", 
      "index.analysis.analyzer.quicksearch_search_analyzer.filter.3" : "unique", 
      "index.analysis.analyzer.quicksearch_index_analyzer.filter.3" : "unique", 
      "index.analysis.filter.left_ngram.max_gram" : "20", 
      "index.analysis.analyzer.quicksearch_search_analyzer.filter.2" : "asciifolding", 
      "index.analysis.analyzer.quicksearch_search_analyzer.tokenizer" : "keyword", 
      "index.analysis.analyzer.quicksearch_search_analyzer.filter.1" : "lowercase", 
      "index.number_of_replicas" : "0", 
      "index.analysis.analyzer.quicksearch_search_analyzer.filter.0" : "trim", 
      "index.analysis.filter.left_ngram.type" : "edgeNGram", 
      "index.analysis.analyzer.quicksearch_search_analyzer.type" : "custom", 
      "index.analysis.analyzer.quicksearch_index_analyzer.filter.0" : "trim", 
      "index.analysis.analyzer.quicksearch_index_analyzer.filter.2" : "asciifolding", 
      "index.analysis.analyzer.quicksearch_index_analyzer.filter.1" : "lowercase", 
      "index.analysis.analyzer.quicksearch_index_analyzer.type" : "custom", 
      "index.analysis.filter.left_ngram.side" : "front", 
      "index.analysis.analyzer.quicksearch_index_analyzer.tokenizer" : "keyword", 
      "index.number_of_shards" : "1", 
      "index.version.created" : "900899", 
      "index.uuid" : "Lb7vC-eHQB-u_Okm3ERLow" 
     } 
    } 
} 

这里是我的查询:

query: { 
    match: { 
     phrase: { 
      query: term, 
      operator: 'and' 
     } 
} 

一些样本数据:

{ 
    "took" : 133, 
    "timed_out" : false, 
    "_shards" : { 
     "total" : 1, 
     "successful" : 1, 
     "failed" : 0 
    }, 
    "hits" : { 
     "total" : 6197, 
     "max_score" : 1.491863, 
     "hits" : [ { 
      "_index" : "quicksearch2", 
      "_type" : "results", 
      "_id" : "emCydgTfQwuKkl4sSZoosQ", 
      "_score" : 1.491863, 
      "fields" : { 
       "phrase" : "APNEA MONITOR- SCHULTE Apnea Monitor" 
      } 
     }, { 
      "_index" : "quicksearch2", 
      "_type" : "results", 
      "_id" : "AXCO5rUxRwC9SebXcQxXeQ", 
      "_score" : 1.491863, 
      "fields" : { 
       "phrase" : "APNEA MONITOR- SCHULTE Apnea Monitor, Neonatal" 
      } 
     }, { 
      "_index" : "quicksearch2", 
      "_type" : "results", 
      "_id" : "tjJq3klPTsmP8akOc18Htw", 
      "_score" : 1.491863, 
      "fields" : { 
       "phrase" : "APNEA MONITOR- SCHULTE Apnea Monitor, Recording" 
      } 
     }, { 
      "_index" : "quicksearch2", 
      "_type" : "results", 
      "_id" : "-FjKWxl9Rm6-byn-wlpoIw", 
      "_score" : 1.491863, 
      "fields" : { 
       "phrase" : "APNEA MONITOR- SCHULTE Cardiorespiratory Monitor" 
      } 
     }, { 
      "_index" : "quicksearch2", 
      "_type" : "results", 
      "_id" : "Q19k6V6VQ6ulZOLCfESQ6w", 
      "_score" : 1.491863, 
      "fields" : { 
       "phrase" : "APNEA MONITOR- SCHULTE Impedance Pneumograph Bedside Monitor" 
      } 
     }, { 
      "_index" : "quicksearch2", 
      "_type" : "results", 
      "_id" : "YLI1er3cRjSyGumWNVi0pg", 
      "_score" : 1.491863, 
      "fields" : { 
       "phrase" : "APNEA MONITOR- SCHULTE Impedance Pneumograph Monitor" 
      } 
     }, { 
      "_index" : "quicksearch2", 
      "_type" : "results", 
      "_id" : "n5j1SaXeS2W6NymaYAYD6A", 
      "_score" : 1.491863, 
      "fields" : { 
       "phrase" : "APNEA MONITOR- SCHULTE Neonatal Monitor" 
      } 
     }, { 
      "_index" : "quicksearch2", 
      "_type" : "results", 
      "_id" : "U7Q5XrrHRbKOIwfRWO6RTQ", 
      "_score" : 1.491863, 
      "fields" : { 
       "phrase" : "APNEA MONITOR- SCHULTE Pulmonary Function Monitor" 
      } 
     }, { 
      "_index" : "quicksearch2", 
      "_type" : "results", 
      "_id" : "aF_THiCKRIyzunCbBxJTEg", 
      "_score" : 1.491863, 
      "fields" : { 
       "phrase" : "APNEA MONITOR- SCHULTE Vital Signs Monitor" 
      } 
     }, { 
      "_index" : "quicksearch2", 
      "_type" : "results", 
      "_id" : "8BAjZfwMQjWmrkqCO7o6gg", 
      "_score" : 1.491863, 
      "fields" : { 
       "phrase" : "P.P.M. - PORTABLE PRECISION MONITOR Gas Monitor, Atmospheric" 
      } 
     } ] 
    } 
} 
+0

你读过这个吗? http://jontai.me/blog/2013/02/adding-autocomplete-to-an-elasticsearch-search-application/ – phoet

回答

0

从关键字更改断词(包括索引和搜索)标准似乎已经做到了。

1

我不太清楚为什么你在做什么不工作,但这里有一种方法似乎是做你想做的事情。

我创建了这些设置索引:

curl -XPUT "http://localhost:9200/test_index " -d' 
{ 
    "settings": { 
     "analysis": { 
     "filter": { 
      "my_edge_ngram_filter": { 
       "type": "edgeNGram", 
       "min_gram": 2, 
       "max_gram": 20, 
       "token_chars": [ 
        "letter", 
        "digit" 
       ] 
      } 
     }, 
     "analyzer": { 
      "my_ngram_analyzer": { 
       "type": "custom", 
       "tokenizer": "whitespace", 
       "filter": [ 
        "lowercase", 
        "asciifolding", 
        "my_edge_ngram_filter" 
       ] 
      }, 
      "my_whitespace_analyzer": { 
       "type": "custom", 
       "tokenizer": "whitespace", 
       "filter": [ 
        "lowercase", 
        "asciifolding" 
       ] 
      } 
     } 
     } 
    }, 
    "mappings": { 
     "docs": { 
     "properties": { 
      "phrase": { 
       "type": "string", 
       "index_analyzer": "my_ngram_analyzer", 
       "search_analyzer": "my_whitespace_analyzer" 
      } 
     } 
     } 
    } 
}' 

然后添加你列出的文档:

curl -XPOST "http://localhost:9200/test_index/_bulk " -d' 
{ "index" : { "_index" : "test_index", "_type" : "docs", "_id" : "1" } } 
{ "phrase" : "APNEA MONITOR- SCHULTE Apnea Monitor" } 
{ "index" : { "_index" : "test_index", "_type" : "docs", "_id" : "2" } } 
{ "phrase" : "APNEA MONITOR- SCHULTE Apnea Monitor, Neonatal" } 
{ "index" : { "_index" : "test_index", "_type" : "docs", "_id" : "3" } } 
{ "phrase" : "APNEA MONITOR- SCHULTE Apnea Monitor, Recording" } 
{ "index" : { "_index" : "test_index", "_type" : "docs", "_id" : "4" } } 
{ "phrase" : "APNEA MONITOR- SCHULTE Cardiorespiratory Monitor" } 
{ "index" : { "_index" : "test_index", "_type" : "docs", "_id" : "5" } } 
{ "phrase" : "APNEA MONITOR- SCHULTE Impedance Pneumograph Bedside Monitor" } 
{ "index" : { "_index" : "test_index", "_type" : "docs", "_id" : "6" } } 
{ "phrase" : "APNEA MONITOR- SCHULTE Impedance Pneumograph Monitor" } 
{ "index" : { "_index" : "test_index", "_type" : "docs", "_id" : "7" } } 
{ "phrase" : "APNEA MONITOR- SCHULTE Neonatal Monitor" } 
{ "index" : { "_index" : "test_index", "_type" : "docs", "_id" : "8" } } 
{ "phrase" : "APNEA MONITOR- SCHULTE Pulmonary Function Monitor" } 
{ "index" : { "_index" : "test_index", "_type" : "docs", "_id" : "9" } } 
{ "phrase" : "APNEA MONITOR- SCHULTE Vital Signs Monitor" } 
{ "index" : { "_index" : "test_index", "_type" : "docs", "_id" : "10" } } 
{ "phrase" : "P.P.M. - PORTABLE PRECISION MONITOR Gas Monitor, Atmospheric" } 
' 

而下面的搜索似乎回到你期望的结果:

curl -XPOST "http://localhost:9200/test_index/_search" -d' 
{ 
    "query": { 
     "match": { 
      "phrase" : { 
       "query": "monitor", 
       "operator": "and" 
      } 
     } 
    } 
}' 

返回所有文档,

curl -XPOST "http://localhost:9200/test_index/_search" -d' 
{ 
    "query": { 
     "match": { 
      "phrase" : { 
       "query": "onitor", 
       "operator": "and" 
      } 
     } 
    } 
}' 

不返回任何和

curl -XPOST "http://localhost:9200/test_index/_search" -d' 
{ 
    "query": { 
     "match": { 
      "phrase" : { 
       "query": "mon ap", 
       "operator": "and" 
      } 
     } 
    } 
}' 

回报所有,但文件"10"

这是一个可运行的例子,你可以玩(你需要ES安装在本地主机上运行:9200,或提供另一个端点):http://sense.qbox.io/gist/19fdcdb20c24436c64b7656c3b8002fe78667b12

相关问题