2017-03-02 45 views
0

我有许多领域的指标,一个场“ServiceCategories”有类似这样的数据:自定义“选项卡”标记者在ElasticSearch NEST 2.4

|管理案例|发育残疾

我需要通过分隔符“|”分解数据我试图用这个可以这样做:

var descriptor = new CreateIndexDescriptor(_DataSource.ToLower()) 
     .Mappings(ms => ms 
      .Map<ProviderContent>(m => m 
       .AutoMap() 
       .Properties(p => p 
        .String(s => s 
         .Name(n => n.OrganizationName) 
         .Fields(f => f 
          .String(ss => ss.Name("raw").NotAnalyzed()))) 
        .String(s => s 
         .Name(n => n.ServiceCategories) 
         .Analyzer("tab_delim_analyzer")) 
        .GeoPoint(g => g.Name(n => n.Location).LatLon(true))))) 
     .Settings(st => st 
      .Analysis(an => an 
       .Analyzers(anz => anz 
        .Custom("tab_delim_analyzer", td => td 
         .Filters("lowercase") 
        .Tokenizer("tab_delim_tokenizer"))) 
       .Tokenizers(t => t 
        .Pattern("tab_delim_tokenizer", tdt => tdt 
         .Pattern("|"))))); 
    _elasticClientWrapper.CreateIndex(descriptor); 

我对ServiceCategories搜索代码(serviceCategories到ES)使用一个简单TermQuery设置为小写的值。

它没有得到使用这个搜索参数的结果(其他工作正常)。预期的结果是至少从上述一个术语得到完全匹配。

我试图得到它使用的是经典标记生成器以及工作:

var descriptor = new CreateIndexDescriptor(_DataSource.ToLower()) 
     .Mappings(ms => ms 
      .Map<ProviderContent>(m => m 
       .AutoMap() 
       .Properties(p => p 
        .String(s => s 
         .Name(n => n.OrganizationName) 
         .Fields(f => f 
          .String(ss => ss.Name("raw").NotAnalyzed()))) 
        .String(s => s 
         .Name(n => n.ServiceCategories) 
         .Analyzer("classic_tokenizer") 
         .SearchAnalyzer("standard")) 
        .GeoPoint(g => g.Name(n => n.Location).LatLon(true))))) 
     .Settings(s => s 
      .Analysis(an => an 
       .Analyzers(a => a.Custom("classic_tokenizer", ca => ca 
        .Tokenizer("classic"))))); 

这也不工作。任何人都可以帮助我确定我要出错的地方吗?

这里的搜索请求:

### ES REQEUST ### 
{ 
    "from": 0, 
    "size": 10, 
    "sort": [ 
    { 
     "organizationName": { 
     "order": "asc" 
     } 
    } 
    ], 
    "query": { 
    "bool": { 
     "must": [ 
     { 
      "match_all": {} 
     }, 
     { 
      "term": { 
      "serviceCategories": { 
       "value": "developmental disabilities" 
      } 
      } 
     } 
     ] 
    } 
    } 
} 

回答

1

您的tab_delim_tokenizer模式接近,但并不完全正确:)看到这是使用分析API理解的分析将如何记号化一块最简单的方法的文字。有了你的第一个映射,我们可以检查一下自定义分析仪确实

client.Analyze(a => a 
    .Index(_DataSource.ToLower()) 
    .Analyzer("tab_delim_analyzer") 
    .Text("|Case Management|Developmental Disabilities") 
); 

返回(剪断,为了简洁)

{ 
    "tokens" : [ { 
    "token" : "|", 
    "start_offset" : 0, 
    "end_offset" : 1, 
    "type" : "word", 
    "position" : 0 
    }, { 
    "token" : "c", 
    "start_offset" : 1, 
    "end_offset" : 2, 
    "type" : "word", 
    "position" : 1 
    }, { 
    "token" : "a", 
    "start_offset" : 2, 
    "end_offset" : 3, 
    "type" : "word", 
    "position" : 2 
    }, { 
    "token" : "s", 
    "start_offset" : 3, 
    "end_offset" : 4, 
    "type" : "word", 
    "position" : 3 
    }, ... ] 
} 

证明该tab_delim_tokenizer没有标记化而我们如何期待。一个小小的改变通过在\模式中转义|来修复此问题,并通过以@为前缀使该模式成为逐字字符串文字。

这里有一个完整的例子

void Main() 
{ 
    var pool = new SingleNodeConnectionPool(new Uri("http://localhost:9200")); 
    var defaultIndex = "default-index"; 
    var connectionSettings = new ConnectionSettings(pool) 
      .DefaultIndex(defaultIndex); 

    var client = new ElasticClient(connectionSettings); 

    if (client.IndexExists(defaultIndex).Exists) 
     client.DeleteIndex(defaultIndex); 

    var descriptor = new CreateIndexDescriptor(defaultIndex) 
     .Mappings(ms => ms 
      .Map<ProviderContent>(m => m 
       .AutoMap() 
       .Properties(p => p 
        .String(s => s 
         .Name(n => n.OrganizationName) 
         .Fields(f => f 
          .String(ss => ss.Name("raw").NotAnalyzed()))) 
        .String(s => s 
         .Name(n => n.ServiceCategories) 
         .Analyzer("tab_delim_analyzer") 
        ) 
        .GeoPoint(g => g 
         .Name(n => n.Location) 
         .LatLon(true) 
        ) 
       ) 
      ) 
     ) 
     .Settings(st => st 
      .Analysis(an => an 
       .Analyzers(anz => anz 
        .Custom("tab_delim_analyzer", td => td 
         .Filters("lowercase") 
         .Tokenizer("tab_delim_tokenizer") 
        ) 
       ) 
       .Tokenizers(t => t 
        .Pattern("tab_delim_tokenizer", tdt => tdt 
         .Pattern(@"\|") 
        ) 
       ) 
      ) 
     ); 

    client.CreateIndex(descriptor); 

    // check our custom analyzer does what we think it should 
    client.Analyze(a => a 
     .Index(defaultIndex) 
     .Analyzer("tab_delim_analyzer") 
     .Text("|Case Management|Developmental Disabilities") 
    ); 

    // index a document and make it immediately available for search 
    client.Index(new ProviderContent 
    { 
     OrganizationName = "Elastic", 
     ServiceCategories = "|Case Management|Developmental Disabilities" 
    }, i => i.Refresh()); 


    // search for our document. Use a term query in a bool filter clause 
    // as we don't need scoring (probably) 
    client.Search<ProviderContent>(s => s 
     .From(0) 
     .Size(10) 
     .Sort(so => so 
      .Ascending(f => f.OrganizationName) 
     ) 
     .Query(q => +q 
      .Term(f => f.ServiceCategories, "developmental disabilities")   
     ) 
    ); 

} 

public class ProviderContent 
{ 
    public string OrganizationName { get; set; } 

    public string ServiceCategories { get; set; } 

    public GeoLocation Location { get; set; } 
} 

搜索结果返回

{ 
    "took" : 2, 
    "timed_out" : false, 
    "_shards" : { 
    "total" : 5, 
    "successful" : 5, 
    "failed" : 0 
    }, 
    "hits" : { 
    "total" : 1, 
    "max_score" : null, 
    "hits" : [ { 
     "_index" : "default-index", 
     "_type" : "providercontent", 
     "_id" : "AVqNNqlQpAW_5iHrnIDQ", 
     "_score" : null, 
     "_source" : { 
     "organizationName" : "Elastic", 
     "serviceCategories" : "|Case Management|Developmental Disabilities" 
     }, 
     "sort" : [ "elastic" ] 
    } ] 
    } 
} 
+0

完善和简单!我在这一段时间里一直在绞尽脑汁!谢谢。最后一个问题是......分析仪 - 这个回归的对象是什么,所以我知道如何在将来最好地处理它? – Michael

+0

@Michael不确定你的意思 - “.Analyze()'方法调用返回的是什么?它返回一个'IAnalyzeResponse' –