2015-02-23 139 views
2

我有一个嵌套聚合和过滤器的问题,基本上没有过滤器它返回总和为全局范围但嵌套doc_count是好的,但总是0,这里是我试图运行的查询:ElasticSearch中的嵌套聚合和过滤器

{ 
    "query": { 
     "nested": { 
      "path": "skills.tree", 
      "query": { 
      "bool" : { 
       "must" : [ 
       {"match": {"leaf0": "Management"}}, 
       {"match": {"leaf1": "Financial"}} 
       ] 
      } 
      } 
     } 
     }, 
    "aggs": { 
     "by_org": { 
       "terms": { 
       "field": "org" 
       }, 
       "aggs": { 
       "sum_weight0-filtered": { 
        "filter": { 
        "nested": { 
         "path": "skills.tree", 
         "query": { 
         "bool" : { 
          "must" : [ 
          {"match": {"leaf0": "Management"}}, 
          {"match": {"leaf1": "Financial"}} 
          ] 
         } 
         } 
        } 
        }, 
        "aggs":{ 
        "sum0":{ 
         "sum": { 
         "field": "skills.tree.weight0" 
         } 
        }, 
        "sum1":{ 
         "sum": { 
         "field": "skills.tree.weight1" 
         } 
        } 
        } 
       } 
       } 
     } 
    } 
} 

和下面是一个示例输出:

{ 
    "took": 978, 
    "timed_out": false, 
    "_shards": { 
     "total": 50, 
     "successful": 50, 
     "failed": 0 
    }, 
    "hits": { 
     "total": 11337, 
     "max_score": 0, 
     "hits": [] 
    }, 
    "aggregations": { 
     "by_org": { 
     "buckets": [ 
      { 
       "key": "Aetna", 
       "doc_count": 1888, 
       "sum_weight0-filtered": { 
        "doc_count": 1888, 
        "sum0": { 
        "value": 0 
        }, 
        "sum1": { 
        "value": 0 
        } 
       } 
      }, 
      { 
       "key": "AECOM", 
       "doc_count": 1085, 
       "sum_weight0-filtered": { 
        "doc_count": 1085, 
        "sum0": { 
        "value": 0 
        }, 
        "sum1": { 
        "value": 0 
        } 
       } 
      } 
.... 

,这里是部分的模式:

'skills'  => array(
           'properties' => array(
            'tree' => array(
             'type' => 'nested', 
             'properties' => array(
              'leaf0' => array(
               "type"  => "multi_field", 
               "fields" => array(
                "leaf0"=> array(
                 "type" => "string", 
                 "index" => "not_analyzed" 
                ), 
                "search"  => array(
                 "type" => "string", 
                 "index" => "analyzed" 
                ) 
               ) 
              ), 
              'leaf1' => array(
               "type"  => "multi_field", 
               "fields" => array(
                "leaf1"=> array(
                 "type" => "string", 
                 "index" => "not_analyzed" 
                ), 
                "search"  => array(
                 "type" => "string", 
                 "index" => "analyzed" 
                ) 
               ) 
              ), 
              'leaf2' => array(
               "type"  => "multi_field", 
               "fields" => array(
                "leaf2"=> array(
                 "type" => "string", 
                 "index" => "not_analyzed" 
                ), 
                "search"  => array(
                 "type" => "string", 
                 "index" => "analyzed" 
                ) 
               ) 
              ), 
              'leaf3' => array(
               "type"  => "multi_field", 
               "fields" => array(
                "leaf3"=> array(
                 "type" => "string", 
                 "index" => "not_analyzed" 
                ), 
                "search"  => array(
                 "type" => "string", 
                 "index" => "analyzed" 
                ) 
               ) 
              ), 
              'leaf4' => array(
               "type"  => "multi_field", 
               "fields" => array(
                "leaf4"=> array(
                 "type" => "string", 
                 "index" => "not_analyzed" 
                ), 
                "search"  => array(
                 "type" => "string", 
                 "index" => "analyzed" 
                ) 
               ) 
              ), 
              'leaf5' => array(
               "type"  => "multi_field", 
               "fields" => array(
                "leaf5"=> array(
                 "type" => "string", 
                 "index" => "not_analyzed" 
                ), 
                "search"  => array(
                 "type" => "string", 
                 "index" => "analyzed" 
                ) 
               ) 
              ), 
              'weight1' => array(
               'type'  => 'integer', 
              ), 
              'weight2' => array(
               'type'  => 'integer', 
              ), 
              'weight3' => array(
               'type'  => 'integer', 
              ), 
              'weight4' => array(
               'type'  => 'integer', 
              ), 
              'weight5' => array(
               'type'  => 'integer', 
              ) 
             ) 
            ) 

问题在于sum0和sum1它们都返回0,尽管数值在那里(它适用于更高范围(无过滤器))。我在这里做错了什么?

回答

1

您应用的嵌套过滤器仅适用于条件,而不适用于聚合将在后续聚合中查找值的位置。这意味着,和值出现在嵌套的对象,而不是在父文件,因此你有0.1 现在,如果你使用嵌套聚合问ES做嵌套对象上的聚集,它应该工作 -

{ 
    "query": { 
    "nested": { 
     "path": "skills.tree", 
     "query": { 
     "bool": { 
      "must": [ 
      { 
       "match": { 
       "leaf0": "Management" 
       } 
      }, 
      { 
       "match": { 
       "leaf1": "Financial" 
       } 
      } 
      ] 
     } 
     } 
    } 
    }, 
    "aggs": { 
    "by_org": { 
     "terms": { 
     "field": "org" 
     }, 
     "aggs": { 
     "sum_weight0-filtered": { 
      "filter": { 
      "nested": { 
       "path": "skills.tree", 
       "query": { 
       "bool": { 
        "must": [ 
        { 
         "match": { 
         "leaf0": "Management" 
         } 
        }, 
        { 
         "match": { 
         "leaf1": "Financial" 
         } 
        } 
        ] 
       } 
       } 
      } 
      }, 
      "aggs": { 
      "nestedAgg": { 
       "nested": { 
       "path": "skills.tree" 
       }, 
       "aggs": { 
       "sum0": { 
        "sum": { 
        "field": "skills.tree.weight0" 
        } 
       }, 
       "sum1": { 
        "sum": { 
        "field": "skills.tree.weight1" 
        } 
       } 
       } 
      } 
      } 
     } 
     } 
    } 
    } 
} 
+0

谢谢,但对于初学者你有语法错误,nestedAgg应该有1个更多的层次称为“嵌套”,然后包装所有下面说,它仍然无法正常工作,我的意思是总结所有数字,但从全球范围,所以它忽略“领域”:“组织”的范围,而不是在每个组织内部总结出去,这是我的原始问题。 – Marcin 2015-02-24 11:58:52

+0

请现在试试。所做的更改 – 2015-02-24 13:37:24

+0

还是不对的,基本上聚集针对完整的索引上运行,而不是水桶: ' “聚合”:{ “by_org”:{ “桶”: { “钥匙”: “本公司”, “ doc_count “:3171, ”sum_weight0过滤“:{ ”doc_count“:3171, ”nestedAgg“:{ ”doc_count“:60117, ”SUM0“:{ ”值“:343885 }, ” sum1“:{ ”value“:93162 } }' – Marcin 2015-02-24 16:59:33

1

问题可能只是您如何访问嵌套字段,具体而言,您必须根据您的映射定义,针对leaf0leaf1search子字段指示那些match语句,这些子字段是实际分析的子字段。考虑到这一点,请尝试以下操作:

{ 
    "query": { 
     "nested": { 
      "path": "skills.tree", 
      "query": { 
      "bool" : { 
       "must" : [ 
       {"match": {"tree.leaf0.search": "Management"}}, 
       {"match": {"tree.leaf1.search": "Financial"}} 
       ] 
      } 
      } 
     } 
     }, 
    "aggs": { 
     "by_org": { 
       "terms": { 
       "field": "org" 
       }, 
       "aggs": { 
       "sum_weight0-filtered": { 
        "filter": { 
        "nested": { 
         "path": "skills.tree", 
         "query": { 
         "bool" : { 
          "must" : [ 
          {"match": {"tree.leaf0.search": "Management"}}, 
          {"match": {"tree.leaf1.search": "Financial"}} 
          ] 
         } 
         } 
        } 
        }, 
        "aggs":{ 
        "tree" : { 
         "nested" : {"path" : "skills.tree"}, 
         "aggs" : { 
         "sum0" : { 
          "sum": { 
          "field": "tree.weight0" 
          } 
         }, 
         "sum1": { 
          "sum": { 
          "field": "tree.weight1" 
          } 
         } 
         } 
        } 
        } 
       } 
       } 
      } 
    } 
} 

我这一个微小的人为测试数据集的工作 - 这可能是值得一提的是,我针对索引的查询在逃,而不是针对特定的文档类型(因为在您最初发布的查询中,嵌套路径似乎是“完全”限定的)。

+0

谢谢,但与另一个答案相同的问题,树聚合doesn' t尊重过滤器,并且针对完整索引运行而不是仅在桶内运行 – Marcin 2015-02-24 17:03:10

+0

这里是一个屏幕快照,您可以看到我的意思是完整索引 - http://postimg.org/image/4006mqtu9/ doc_count在树下是不正确的 – Marcin 2015-02-24 17:05:49