ElasticSearch - 筛选器嵌套聚合

我在过滤聚合结果后遇到问题。我认为我走在正确的轨道上，但我觉得我正在追逐我的尾巴。ElasticSearch - 筛选器嵌套聚合

下面是它的外观：

PUT /my_index 
{ 
    "mappings": { 
    "reporting": { 
     "properties": { 
     "events": { 
      "type": "nested", 
      "properties": { 
      "name": { "type": "string", "index" : "not_analyzed" }, 
      "date": { "type": "date" } 
      } 
     } 
     } 
    } 
    } 
}

所以，我的文件是这样的：

{ 
    "events": [ 
    { "name": "INSTALL", "date": "2014-11-01" }, 
    { "name": "UNINSTALL", "date": "2014-11-03" }, 
    { "name": "INSTALL", "date": "2014-11-04" }, 
    ... 
    ] 
}

现在，当我的索引的一些数据，例如：

PUT /my_index/reporting/1 
{ 
    "events": [ 
    { 
     "name": "INSTALL", 
     "date": "2014-11-01" 
    }, 
    { 
     "name": "UNINSTALL", 
     "date": "2014-11-05" 
    } 
] 
} 

PUT /my_index/reporting/2 
{ 
    "events": [ 
    { 
     "name": "INSTALL", 
     "date": "2014-11-01" 
    }, 
    { 
     "name": "UNINSTALL", 
     "date": "2014-11-03" 
    } 
] 
} 

PUT /my_index/reporting/3 
{ 
    "events": [ 
    { 
     "name": "INSTALL", 
     "date": "2014-11-01" 
    }, 
    { 
     "name": "UNINSTALL", 
     "date": "2014-11-02" 
    } 
] 
} 

PUT /my_index/reporting/4 
{ 
    "events": [ 
    { 
     "name": "INSTALL", 
     "date": "2014-11-01" 
    }, 
    { 
     "name": "UNINSTALL", 
     "date": "2014-11-02" 
    }, 
    { 
     "name": "INSTALL", 
     "date": "2014-11-03" 
    } 
] 
} 

PUT /my_index/reporting/5 
{ 
    "events": [ 
    { 
     "name": "INSTALL", 
     "date": "2014-11-01" 
    }, 
    { 
     "name": "UNINSTALL", 
     "date": "2014-11-03" 
    }, 
    { 
     "name": "INSTALL", 
     "date": "2014-11-03" 
    } 
] 
} 

PUT /my_index/reporting/6 
{ 
    "events": [ 
    { 
     "name": "INSTALL", 
     "date": "2014-11-03" 
    }, 
    { 
     "name": "UNINSTALL", 
     "date": "2014-11-03" 
    }, 
    { 
     "name": "INSTALL", 
     "date": "2014-11-05" 
    } 
] 
} 

PUT /my_index/reporting/7 
{ 
    "events": [ 
    { 
     "name": "INSTALL", 
     "date": "2014-11-02" 
    }, 
    { 
     "name": "UNINSTALL", 
     "date": "2014-11-03" 
    }, 
    { 
     "name": "INSTALL", 
     "date": "2014-11-05" 
    } 
] 
} 

PUT /my_index/reporting/8 
{ 
    "events": [ 
    { 
     "name": "INSTALL", 
     "date": "2014-11-01" 
    } 
] 
}

我想要得到安装后（包括）2014-11-02和没有卸载的人数（所以，UNINSTALL在2014年11月2日之前或没有U NINSTALL事件），并将它们分组在date_histogram含义中（具有“日期” - >“计数”数据的桶）。

我设法在这个嵌套数据上写入过滤器，所以我可以得到过滤结果，但是当涉及到直方图聚合时，我总是追着我的尾巴。

这是我卡住的地方。

GET /my_index/reporting/_search 
{ 
    "query": { 
    "filtered": { 
     "query": { 
     "match_all": {} 
     }, 
     "filter": { 
     "bool": { 
      "must": [ 
      { 
       "nested": { 
       "path": "events", 
       "filter": { 
        "bool": { 
        "must": [ 
         { 
         "term": { 
          "name": "INSTALL" 
         } 
         }, 
         { 
         "range": { 
          "date": { 
          "gte": "2014-11-02" 
          } 
         } 
         } 
        ] 
        } 
       } 
       } 
      }, 
      { 
       "nested": { 
       "path": "events", 
       "filter": { 
        "bool": { 
        "should": [ 
         { 
         "bool": { 
          "must_not": [ 
          { 
           "term": { 
           "name": "UNINSTALL" 
           } 
          } 
          ] 
         } 
         }, 
         { 
         "bool": { 
          "must": [ 
          { 
           "term": { 
           "name": "UNINSTALL" 
           } 
          }, 
          { 
           "range": { 
           "date": { 
            "lt": "2014-11-02" 
           } 
           } 
          } 
          ] 
         } 
         } 
        ] 
        } 
       } 
       } 
      } 
      ] 
     } 
     } 
    } 
    }, 
    "aggregations": { 
    "filtered_result": { 
     "filter": { 
     "bool": { 
      "must": [ 
      { 
       "nested": { 
       "path": "events", 
       "filter": { 
        "bool": { 
        "must": [ 
         { 
         "term": { 
          "name": "INSTALL" 
         } 
         }, 
         { 
         "range": { 
          "date": { 
          "gte": "2014-11-02" 
          } 
         } 
         } 
        ] 
        } 
       } 
       } 
      }, 
      { 
       "nested": { 
       "path": "events", 
       "filter": { 
        "bool": { 
        "should": [ 
         { 
         "bool": { 
          "must_not": [ 
          { 
           "term": { 
           "name": "UNINSTALL" 
           } 
          } 
          ] 
         } 
         }, 
         { 
         "bool": { 
          "must": [ 
          { 
           "term": { 
           "name": "UNINSTALL" 
           } 
          }, 
          { 
           "range": { 
           "date": { 
            "lt": "2014-11-02" 
           } 
           } 
          } 
          ] 
         } 
         } 
        ] 
        } 
       } 
       } 
      } 
      ] 
     } 
     }, 
     "aggs": { 
     "result": { 
      "nested": { 
      "path": "events" 
      }, 
      "aggs": { 
      "NAME": { 
       "terms": { 
       "field": "events.date", 
       "format": "yyyy-MM-dd", 
       "order": { 
        "_term": "asc" 
       } 
       } 
      } 
      } 
     } 
     } 
    } 
    } 
}

而且我的结果是这样的：

... omitted 4 documents that match filter criteria ... 
    "aggregations": { 
     "filtered_result": { 
     "doc_count": 4, <---- this is ok, I really have 4 docs that match criteria 
     "result": { 
      "doc_count": 12, <---- those 4 documents really have 12 events (together) 
      "NAME": { 
       "buckets": [ 
        { 
        "key": 1414800000000, 
        "key_as_string": "2014-11-01", 
        "doc_count": 2 
        }, 
        { 
        "key": 1414886400000, 
        "key_as_string": "2014-11-02", 
        "doc_count": 2 
        }, 
        { 
        "key": 1414972800000, 
        "key_as_string": "2014-11-03", 
        "doc_count": 6 
        }, 
        { 
        "key": 1415145600000, 
        "key_as_string": "2014-11-05", 
        "doc_count": 2 
        } 
       ] 
      } 
     } 
     } 
    }

而且我希望得到的东西，如：

"buckets": [ 
{ 
    "key_as_string": "2014-11-02", 
    "doc_count": 0 
}, 
{ 
    "key_as_string": "2014-11-03", 
    "doc_count": 2 
}, 
{ 
    "key_as_string": "2014-11-04", 
    "doc_count": 0 
}, 
{ 
    "key_as_string": "2014-11-05", 
    "doc_count": 2 
} 
]

基本上是匹配的标准4号文件是按日期分布是标准发生时，“2011-11-03”两篇文章，“2014-11-05”两篇文档（2014-11-02之后有4篇文档有事件“安装”，之后没有卸载事件安装）

来源

2014-11-04 tomdzon

这是部分答案。

还有一个主要问题：根据你的数据，实际上是没有文件，将符合您的要求，所以我加了一些：

curl -XPUT 'localhost:9200/my_index/reporting/9' -d '{ 
    "events": [ 
    { 
     "name": "INSTALL", 
     "date": "2014-11-03" 
    } 
] 
}' 

curl -XPUT 'localhost:9200/my_index/reporting/10' -d '{ 
    "events": [ 
    { 
     "name": "INSTALL", 
     "date": "2014-11-03" 
    }, 
    { 
     "name": "UNINSTALL", 
     "date": "2014-11-01" 
    } 
    ] 
}'

为了能够运用逻辑，我改变了架构中，以便该事件也包含在父项中 - 这样您可以搜索“没有任何UNINSTALL事件”。因为事情是，在嵌套搜索中，您总是只查看一个单一事件，因此您无法进行任何类型的“报告范围”搜索。

curl -XPUT 'localhost:9200/my_index' -d '{ 
    "mappings": { 
    "reporting": { 
     "properties": { 
     "events": { 
      "type": "nested", "include_in_root": true, 
      "properties": { 
      "name": { "type": "string", "index" : "not_analyzed" }, 
      "date": { "type": "date" } 
      } 
     } 
     } 
    } 
    } 
}'

而现在是查询本身。看起来，当使用嵌套过滤器时，不能直接进入“过滤器”。你必须先做“查询>过滤>过滤器”的东西。

一般来说，写一个长的elasticsearch查询的提示 - 记住除了“must”和“must_not”之外，还有“and”和“or”运算符 - 就是将它写出代码。你的情况：

has_one(event.name == 'INSTALL' && event.date >= '2014-11-02') 
&& has_none(event.name == 'UNINSTALL') 
&& has_none(event.name == 'UNINSTALL' && event.date >= '2014-11-02')

或者：

has_one(event.name == 'INSTALL' && event.date >= '2014-11-02') 
&& (has_none(event.name == 'UNINSTALL') 
    || has_only(event.name == 'UNINSTALL' && event.date >= '2014-11-02'))

我能适用所有，但最后has_only/has_none。为此，您可能想尝试使用子文档。在那里，你至少可以在must_not bool下使用has_child过滤器。

当前查询：

GET /my_index/reporting/_search 
{ 
    "query": { 
    "filtered": { 
     "query": { 
     "match_all": {} 
     }, 
     "filter": { 
     "and": { 
      "filters": [ 
      { 
       "or": { 
       "filters": [ 
        { 
        "bool": { 
         "must_not": [ 
         { 
          "term": { 
          "events.name": "UNINSTALL" 
          } 
         } 
         ] 
        } 
        }, 
        { 
        "nested": { 
         "path": "events", 
         "query": { 
         "filtered": { 
          "filter": { 
          "bool": { 
           "must": [ 
           { 
            "term": { 
            "name": "UNINSTALL" 
            } 
           }, 
           { 
            "range": { 
            "date": { 
             "lt": "2014-11-02" 
            } 
            } 
           } 
           ] 
          } 
          } 
         } 
         } 
        } 
        } 
       ] 
       } 
      }, 
      { 
       "nested": { 
       "path": "events", 
       "query": { 
        "filtered": { 
        "filter": { 
         "bool": { 
         "must": [ 
          { 
          "term": { 
           "name": "INSTALL" 
          } 
          }, 
          { 
          "range": { 
           "date": { 
           "gte": "2014-11-02" 
           } 
          } 
          } 
         ] 
         } 
        } 
        } 
       } 
       } 
      } 
      ] 
     } 
     } 
    } 
    }, 
    "aggregations": { 
    "filtered_result": { 
     "filter": { 
     "and": { 
      "filters": [ 
      { 
       "or": { 
       "filters": [ 
        { 
        "bool": { 
         "must_not": [ 
         { 
          "term": { 
          "events.name": "UNINSTALL" 
          } 
         } 
         ] 
        } 
        }, 
        { 
        "nested": { 
         "path": "events", 
         "query": { 
         "filtered": { 
          "filter": { 
          "bool": { 
           "must": [ 
           { 
            "term": { 
            "name": "UNINSTALL" 
            } 
           }, 
           { 
            "range": { 
            "date": { 
             "lt": "2014-11-02" 
            } 
            } 
           } 
           ] 
          } 
          } 
         } 
         } 
        } 
        } 
       ] 
       } 
      }, 
      { 
       "nested": { 
       "path": "events", 
       "query": { 
        "filtered": { 
        "filter": { 
         "bool": { 
         "must": [ 
          { 
          "term": { 
           "name": "INSTALL" 
          } 
          }, 
          { 
          "range": { 
           "date": { 
           "gte": "2014-11-02" 
           } 
          } 
          } 
         ] 
         } 
        } 
        } 
       } 
       } 
      } 
      ] 
     } 
     }, 
     "aggs": { 
     "result": { 
      "nested": { 
      "path": "events" 
      }, 
      "aggs": { 
      "NAME": { 
       "terms": { 
       "field": "date", 
       "format": "yyyy-MM-dd", 
       "order": { 
        "_term": "asc" 
       } 
       } 
      } 
      } 
     } 
     } 
    } 
    } 
}

来源

2015-09-09 10:12:57 whythecode

ElasticSearch - 筛选器嵌套聚合

回答

相关问题