2016-09-22 71 views
0

虽然与AWS Elasticsearch(2.3)打,我用一些样本数据 https://www.elastic.co/guide/en/kibana/3.0/snippets/shakespeare.json有下列映射elasticsearch长期聚集返回桶以非常低的文档数

$ curl --url "https://my_es_id.us-east-1.es.amazonaws.com/shakespeare/_mapping" 

{ 
    "shakespeare": { 
     "mappings": { 
      "act": { 
       "properties": { 
        "line_id": { 
         "type": "integer" 
        }, 
        "line_number": { 
         "type": "string" 
        }, 
        "play_name": { 
         "fields": { 
          "raw": { 
           "index": "not_analyzed", 
           "type": "string" 
          } 
         }, 
         "type": "string" 
        }, 
        "speaker": { 
         "fields": { 
          "raw": { 
           "index": "not_analyzed", 
           "type": "string" 
          } 
         }, 
         "type": "string" 
        }, 
        "speech_number": { 
         "type": "integer" 
        }, 
        "text_entry": { 
         "type": "string" 
        } 
       } 
      }, 
      "line": { 
       "properties": { 
        "line_id": { 
         "type": "integer" 
        }, 
        "line_number": { 
         "type": "string" 
        }, 
        "play_name": { 
         "type": "string" 
        }, 
        "speaker": { 
         "type": "string" 
        }, 
        "speech_number": { 
         "type": "integer" 
        }, 
        "text_entry": { 
         "type": "string" 
        } 
       } 
      }, 
      "scene": { 
       "properties": { 
        "line_id": { 
         "type": "integer" 
        }, 
        "line_number": { 
         "type": "string" 
        }, 
        "play_name": { 
         "type": "string" 
        }, 
        "speaker": { 
         "type": "string" 
        }, 
        "speech_number": { 
         "type": "integer" 
        }, 
        "text_entry": { 
         "type": "string" 
        } 
       } 
      } 
     } 
    } 
} 

现在,当我运行一个查询装好了为了获得整个数据的扬声器计数,我得到以下结果。

$ curl -XPOST "https://my_es_id.us-east-1.es.amazonaws.com/shakespeare/_search" -d' 
{ 
    "aggs" : { 
     "speakers" : { 
      "terms" : { "field" : "speaker.raw"} 
     } 
    } 
}' 

{ 
    "_shards": { 
     "failed": 0, 
     "successful": 5, 
     "total": 5 
    }, 
    "aggregations": { 
     "speakers": { 
      "buckets": [ 
       { 
        "doc_count": 4, 
        "key": "BASTARD" 
       }, 
       { 
        "doc_count": 3, 
        "key": "HAMLET" 
       }, 
       { 
        "doc_count": 3, 
        "key": "KING HENRY VIII" 
       }, 
       { 
        "doc_count": 3, 
        "key": "OF SYRACUSE" 
       }, 
       { 
        "doc_count": 3, 
        "key": "PROSPERO" 
       }, 
       { 
        "doc_count": 3, 
        "key": "WARWICK" 
       }, 
       { 
        "doc_count": 2, 
        "key": "ADRIANO DE ARMADO" 
       }, 
       { 
        "doc_count": 2, 
        "key": "ARCHBISHOP OF YORK" 
       }, 
       { 
        "doc_count": 2, 
        "key": "AUFIDIUS" 
       }, 
       { 
        "doc_count": 2, 
        "key": "BENEDICK" 
       } 
      ], 
      "doc_count_error_upper_bound": 0, 
      "sum_other_doc_count": 153 
     } 
    }, 
    "hits": { 
     "hits": [ 
      { 
       "_id": "0", 
       "_index": "shakespeare", 
       "_score": 1.0, 
       "_source": { 
        "line_id": 1, 
        "line_number": "", 
        "play_name": "Henry IV", 
        "speaker": "", 
        "speech_number": "", 
        "text_entry": "ACT I" 
       }, 
       "_type": "act" 
      }, 
      { 
       "_id": "14", 
       "_index": "shakespeare", 
       "_score": 1.0, 
       "_source": { 
        "line_id": 15, 
        "line_number": "1.1.12", 
        "play_name": "Henry IV", 
        "speaker": "KING HENRY IV", 
        "speech_number": 1, 
        "text_entry": "Did lately meet in the intestine shock" 
       }, 
       "_type": "line" 
      }, 
      { 
       "_id": "19", 
       "_index": "shakespeare", 
       "_score": 1.0, 
       "_source": { 
        "line_id": 20, 
        "line_number": "1.1.17", 
        "play_name": "Henry IV", 
        "speaker": "KING HENRY IV", 
        "speech_number": 1, 
        "text_entry": "The edge of war, like an ill-sheathed knife," 
       }, 
       "_type": "line" 
      }, 
      { 
       "_id": "22", 
       "_index": "shakespeare", 
       "_score": 1.0, 
       "_source": { 
        "line_id": 23, 
        "line_number": "1.1.20", 
        "play_name": "Henry IV", 
        "speaker": "KING HENRY IV", 
        "speech_number": 1, 
        "text_entry": "Whose soldier now, under whose blessed cross" 
       }, 
       "_type": "line" 
      }, 
      { 
       "_id": "24", 
       "_index": "shakespeare", 
       "_score": 1.0, 
       "_source": { 
        "line_id": 25, 
        "line_number": "1.1.22", 
        "play_name": "Henry IV", 
        "speaker": "KING HENRY IV", 
        "speech_number": 1, 
        "text_entry": "Forthwith a power of English shall we levy;" 
       }, 
       "_type": "line" 
      }, 
      { 
       "_id": "25", 
       "_index": "shakespeare", 
       "_score": 1.0, 
       "_source": { 
        "line_id": 26, 
        "line_number": "1.1.23", 
        "play_name": "Henry IV", 
        "speaker": "KING HENRY IV", 
        "speech_number": 1, 
        "text_entry": "Whose arms were moulded in their mothers womb" 
       }, 
       "_type": "line" 
      }, 
      { 
       "_id": "26", 
       "_index": "shakespeare", 
       "_score": 1.0, 
       "_source": { 
        "line_id": 27, 
        "line_number": "1.1.24", 
        "play_name": "Henry IV", 
        "speaker": "KING HENRY IV", 
        "speech_number": 1, 
        "text_entry": "To chase these pagans in those holy fields" 
       }, 
       "_type": "line" 
      }, 
      { 
       "_id": "29", 
       "_index": "shakespeare", 
       "_score": 1.0, 
       "_source": { 
        "line_id": 30, 
        "line_number": "1.1.27", 
        "play_name": "Henry IV", 
        "speaker": "KING HENRY IV", 
        "speech_number": 1, 
        "text_entry": "For our advantage on the bitter cross." 
       }, 
       "_type": "line" 
      }, 
      { 
       "_id": "40", 
       "_index": "shakespeare", 
       "_score": 1.0, 
       "_source": { 
        "line_id": 41, 
        "line_number": "1.1.38", 
        "play_name": "Henry IV", 
        "speaker": "WESTMORELAND", 
        "speech_number": 2, 
        "text_entry": "Whose worst was, that the noble Mortimer," 
       }, 
       "_type": "line" 
      }, 
      { 
       "_id": "41", 
       "_index": "shakespeare", 
       "_score": 1.0, 
       "_source": { 
        "line_id": 42, 
        "line_number": "1.1.39", 
        "play_name": "Henry IV", 
        "speaker": "WESTMORELAND", 
        "speech_number": 2, 
        "text_entry": "Leading the men of Herefordshire to fight" 
       }, 
       "_type": "line" 
      } 
     ], 
     "max_score": 1.0, 
     "total": 111396 
    }, 
    "timed_out": false, 
    "took": 28 
} 

文档在聚合桶中的计数看起来非常低。我希望看到的是与文档数以下的扬声器(通过明确评估扬声器计算下面我算整个数据):

GLOUCESTER 1920 
HAMLET 1582 
IAGO 1161 
FALSTAFF 1117 
KING HENRY V 1086 
BRUTUS 1051 
OTHELLO 928 
MARK ANTONY 927 
KING HENRY VI 917 
DUKE VINCENTIO 909 

我已经花了几个小时在网上搜索这个问题,但我的理由无法理解。我究竟做错了什么?

回答

0

根本原因是映射中的错误和数据被搜索的方式。映射仅针对doc_type设置:'act',当为doc_type:'line'设置时,搜索不应该覆盖所有内容,而只是doc_type:'line'。

详细的解答:

按照该网页上的例子:https://www.elastic.co/guide/en/elasticsearch/guide/current/aggregations-and-analysis.html我意识到这个bug是在映射。

前:

  • 我没有意识到原始数据集有多个doc_types。
  • 在映射,只有DOC_TYPE:“ACT”有场:“扬声器”与not_analyzed版本
  • 我没有设置任何DOC_TYPE
  • 我期待的结果将从DOC_TYPE桶音箱正在寻找:当实际上那些doc_type根本没有任何'speaker.raw'属性。
  • 鉴于此,问题中的计数也是错误的。

后:

  • 新映射添加一个多字段用于字段: '扬声器' 对于每个doc_types的:动作/场景/线。这是speaker.raw,并没有分析。
  • 新的搜索,正确搜索行的发言者,这是最初的意图。
  • 弹性搜索的结果现在与我手动从此数据集中获得的计数匹配。对于顶部10的扬声器在DOC_TYPE当前计数:线如下:

    GLOUCESTER 1907 HAMLET 1572 IAGO 1153 FALSTAFF 1109 亨利五世1076 BRUTUS 1043 OTHELLO 928 标记安东尼915 亨利VI 909 DUKE VINCENTIO 901

这里是正确的映射:

{ 
    "shakespeare" : { 
    "mappings" : { 
     "line" : { 
     "properties" : { 
      "line_id" : { 
      "type" : "integer" 
      }, 
      "line_number" : { 
      "type" : "string" 
      }, 
      "play_name" : { 
      "type" : "string", 
      "fields" : { 
       "raw" : { 
       "type" : "string", 
       "index" : "not_analyzed" 
       } 
      } 
      }, 
      "speaker" : { 
      "type" : "string", 
      "fields" : { 
       "raw" : { 
       "type" : "string", 
       "index" : "not_analyzed" 
       } 
      } 
      }, 
      "speech_number" : { 
      "type" : "integer" 
      }, 
      "text_entry" : { 
      "type" : "string" 
      } 
     } 
     }, 
     "act" : { 
     "properties" : { 
      "line_id" : { 
      "type" : "integer" 
      }, 
      "line_number" : { 
      "type" : "string" 
      }, 
      "play_name" : { 
      "type" : "string", 
      "fields" : { 
       "raw" : { 
       "type" : "string", 
       "index" : "not_analyzed" 
       } 
      } 
      }, 
      "speaker" : { 
      "type" : "string", 
      "fields" : { 
       "raw" : { 
       "type" : "string", 
       "index" : "not_analyzed" 
       } 
      } 
      }, 
      "speech_number" : { 
      "type" : "integer" 
      }, 
      "text_entry" : { 
      "type" : "string" 
      } 
     } 
     }, 
     "scene" : { 
     "properties" : { 
      "line_id" : { 
      "type" : "integer" 
      }, 
      "line_number" : { 
      "type" : "string" 
      }, 
      "play_name" : { 
      "type" : "string", 
      "fields" : { 
       "raw" : { 
       "type" : "string", 
       "index" : "not_analyzed" 
       } 
      } 
      }, 
      "speaker" : { 
      "type" : "string", 
      "fields" : { 
       "raw" : { 
       "type" : "string", 
       "index" : "not_analyzed" 
       } 
      } 
      }, 
      "speech_number" : { 
      "type" : "integer" 
      }, 
      "text_entry" : { 
      "type" : "string" 
      } 
     } 
     } 
    } 
    } 
} 

有了新的映射的结果看的权利:

curl -XPOST "https://my_es_id/shakespeare/line/_search" -d' 
{ 
    "aggs" : { 
     "speakers" : { 
      "terms" : { "field" : "speaker.raw"} 
     } 
    } 
}' 
{ 
    "_shards": { 
     "failed": 0, 
     "successful": 5, 
     "total": 5 
    }, 
    "aggregations": { 
     "speakers": { 
      "buckets": [ 
       { 
        "doc_count": 1907, 
        "key": "GLOUCESTER" 
       }, 
       { 
        "doc_count": 1572, 
        "key": "HAMLET" 
       }, 
       { 
        "doc_count": 1153, 
        "key": "IAGO" 
       }, 
       { 
        "doc_count": 1109, 
        "key": "FALSTAFF" 
       }, 
       { 
        "doc_count": 1076, 
        "key": "KING HENRY V" 
       }, 
       { 
        "doc_count": 1043, 
        "key": "BRUTUS" 
       }, 
       { 
        "doc_count": 928, 
        "key": "OTHELLO" 
       }, 
       { 
        "doc_count": 915, 
        "key": "MARK ANTONY" 
       }, 
       { 
        "doc_count": 909, 
        "key": "KING HENRY VI" 
       }, 
       { 
        "doc_count": 901, 
        "key": "DUKE VINCENTIO" 
       } 
      ], 
      "doc_count_error_upper_bound": 461, 
      "sum_other_doc_count": 94715 
     } 
    }, 
    "hits": { 
     "hits": [ 
      { 
       "_id": "14", 
       "_index": "shakespeare", 
       "_score": 1.0, 
       "_source": { 
        "line_id": 15, 
        "line_number": "1.1.12", 
        "play_name": "Henry IV", 
        "speaker": "KING HENRY IV", 
        "speech_number": 1, 
        "text_entry": "Did lately meet in the intestine shock" 
       }, 
       "_type": "line" 
      }, 
      { 
       "_id": "19", 
       "_index": "shakespeare", 
       "_score": 1.0, 
       "_source": { 
        "line_id": 20, 
        "line_number": "1.1.17", 
        "play_name": "Henry IV", 
        "speaker": "KING HENRY IV", 
        "speech_number": 1, 
        "text_entry": "The edge of war, like an ill-sheathed knife," 
       }, 
       "_type": "line" 
      }, 
      { 
       "_id": "22", 
       "_index": "shakespeare", 
       "_score": 1.0, 
       "_source": { 
        "line_id": 23, 
        "line_number": "1.1.20", 
        "play_name": "Henry IV", 
        "speaker": "KING HENRY IV", 
        "speech_number": 1, 
        "text_entry": "Whose soldier now, under whose blessed cross" 
       }, 
       "_type": "line" 
      }, 
      { 
       "_id": "24", 
       "_index": "shakespeare", 
       "_score": 1.0, 
       "_source": { 
        "line_id": 25, 
        "line_number": "1.1.22", 
        "play_name": "Henry IV", 
        "speaker": "KING HENRY IV", 
        "speech_number": 1, 
        "text_entry": "Forthwith a power of English shall we levy;" 
       }, 
       "_type": "line" 
      }, 
      { 
       "_id": "25", 
       "_index": "shakespeare", 
       "_score": 1.0, 
       "_source": { 
        "line_id": 26, 
        "line_number": "1.1.23", 
        "play_name": "Henry IV", 
        "speaker": "KING HENRY IV", 
        "speech_number": 1, 
        "text_entry": "Whose arms were moulded in their mothers womb" 
       }, 
       "_type": "line" 
      }, 
      { 
       "_id": "26", 
       "_index": "shakespeare", 
       "_score": 1.0, 
       "_source": { 
        "line_id": 27, 
        "line_number": "1.1.24", 
        "play_name": "Henry IV", 
        "speaker": "KING HENRY IV", 
        "speech_number": 1, 
        "text_entry": "To chase these pagans in those holy fields" 
       }, 
       "_type": "line" 
      }, 
      { 
       "_id": "29", 
       "_index": "shakespeare", 
       "_score": 1.0, 
       "_source": { 
        "line_id": 30, 
        "line_number": "1.1.27", 
        "play_name": "Henry IV", 
        "speaker": "KING HENRY IV", 
        "speech_number": 1, 
        "text_entry": "For our advantage on the bitter cross." 
       }, 
       "_type": "line" 
      }, 
      { 
       "_id": "40", 
       "_index": "shakespeare", 
       "_score": 1.0, 
       "_source": { 
        "line_id": 41, 
        "line_number": "1.1.38", 
        "play_name": "Henry IV", 
        "speaker": "WESTMORELAND", 
        "speech_number": 2, 
        "text_entry": "Whose worst was, that the noble Mortimer," 
       }, 
       "_type": "line" 
      }, 
      { 
       "_id": "41", 
       "_index": "shakespeare", 
       "_score": 1.0, 
       "_source": { 
        "line_id": 42, 
        "line_number": "1.1.39", 
        "play_name": "Henry IV", 
        "speaker": "WESTMORELAND", 
        "speech_number": 2, 
        "text_entry": "Leading the men of Herefordshire to fight" 
       }, 
       "_type": "line" 
      }, 
      { 
       "_id": "44", 
       "_index": "shakespeare", 
       "_score": 1.0, 
       "_source": { 
        "line_id": 45, 
        "line_number": "1.1.42", 
        "play_name": "Henry IV", 
        "speaker": "WESTMORELAND", 
        "speech_number": 2, 
        "text_entry": "A thousand of his people butchered;" 
       }, 
       "_type": "line" 
      } 
     ], 
     "max_score": 1.0, 
     "total": 106228 
    }, 
    "timed_out": false, 
    "took": 48 
} 
+1

你介意加入一个答案,告诉您如何解决呢?这将有助于其他谁在未来找到你的问题:) –