2017-02-21 101 views
2

我一直在使用mongo 3.2.9安装进行一些实时数据调查。主要关键是要找出文件中缺少数据的记录的一些细节。但我正在运行的查询是在robomongo和指南针中超时。

我有一个包含超过300万条记录的集合(foo)。我在寻找所有不具有barId的记录,这是我在蒙戈发射查询:

db.foo.find({barId:{$exists:true}}).explain(true) 

从蒙戈外壳,这是执行计划(超时在robomongo或罗盘)

MongoDB Enterprise > db.foo.find({barId:{$exists:true}}).explain(true) 
{ 
    "queryPlanner" : { 
    "plannerVersion" : 1, 
    "namespace" : "myDatabase01.foo", 
    "indexFilterSet" : false, 
    "parsedQuery" : { 
     "barId" : { 
     "$exists" : true 
     } 
    }, 
    "winningPlan" : { 
     "stage" : "FETCH", 
     "filter" : { 
     "barId" : { 
      "$exists" : true 
     } 
     }, 
     "inputStage" : { 
     "stage" : "IXSCAN", 
     "keyPattern" : { 
      "barId" : 1 
     }, 
     "indexName" : "barId_1", 
     "isMultiKey" : false, 
     "isUnique" : false, 
     "isSparse" : false, 
     "isPartial" : false, 
     "indexVersion" : 1, 
     "direction" : "forward", 
     "indexBounds" : { 
      "barId" : [ 
      "[MinKey, MaxKey]" 
      ] 
     } 
     } 
    }, 
    "rejectedPlans" : [ ] 
    }, 
    "executionStats" : { 
    "executionSuccess" : true, 
    "nReturned" : 2, 
    "executionTimeMillis" : 154716, 
    "totalKeysExamined" : 3361040, 
    "totalDocsExamined" : 3361040, 
    "executionStages" : { 
     "stage" : "FETCH", 
     "filter" : { 
     "barId" : { 
      "$exists" : true 
     } 
     }, 
     "nReturned" : 2, 
     "executionTimeMillisEstimate" : 152060, 
     "works" : 3361041, 
     "advanced" : 2, 
     "needTime" : 3361038, 
     "needYield" : 0, 
     "saveState" : 27619, 
     "restoreState" : 27619, 
     "isEOF" : 1, 
     "invalidates" : 0, 
     "docsExamined" : 3361040, 
     "alreadyHasObj" : 0, 
     "inputStage" : { 
     "stage" : "IXSCAN", 
     "nReturned" : 3361040, 
     "executionTimeMillisEstimate" : 1260, 
     "works" : 3361041, 
     "advanced" : 3361040, 
     "needTime" : 0, 
     "needYield" : 0, 
     "saveState" : 27619, 
     "restoreState" : 27619, 
     "isEOF" : 1, 
     "invalidates" : 0, 
     "keyPattern" : { 
      "barId" : 1 
     }, 
     "indexName" : "barId_1", 
     "isMultiKey" : false, 
     "isUnique" : false, 
     "isSparse" : false, 
     "isPartial" : false, 
     "indexVersion" : 1, 
     "direction" : "forward", 
     "indexBounds" : { 
      "barId" : [ 
      "[MinKey, MaxKey]" 
      ] 
     }, 
     "keysExamined" : 3361040, 
     "dupsTested" : 0, 
     "dupsDropped" : 0, 
     "seenInvalidated" : 0 
     } 
    }, 
    "allPlansExecution" : [ ] 
    }, 
    "serverInfo" : { 
    "host" : "myLinuxMachine", 
    "port" : 8080, 
    "version" : "3.2.9", 
    "gitVersion" : "22ec9e93b40c85fc7cae7d56e7d6a02fd811088c" 
    }, 
    "ok" : 1 
} 

它看起来它使用我barId_1指数,但同时它的所有扫描300万条记录只返回2.

我跑了类似的查询,但像而不是找场的存在我查找了大于0的ID(全部是)

MongoDB Enterprise > db.foo.find({barId:{$gt:"0"}}).explain(true) 
{ 
    "queryPlanner" : { 
    "plannerVersion" : 1, 
    "namespace" : "myDatabase01.foo", 
    "indexFilterSet" : false, 
    "parsedQuery" : { 
     "barId" : { 
     "$gt" : "0" 
     } 
    }, 
    "winningPlan" : { 
     "stage" : "FETCH", 
     "inputStage" : { 
     "stage" : "IXSCAN", 
     "keyPattern" : { 
      "barId" : 1 
     }, 
     "indexName" : "barId_1", 
     "isMultiKey" : false, 
     "isUnique" : false, 
     "isSparse" : false, 
     "isPartial" : false, 
     "indexVersion" : 1, 
     "direction" : "forward", 
     "indexBounds" : { 
      "barId" : [ 
      "(\"0\", {})" 
      ] 
     } 
     } 
    }, 
    "rejectedPlans" : [ ] 
    }, 
    "executionStats" : { 
    "executionSuccess" : true, 
    "nReturned" : 2, 
    "executionTimeMillis" : 54, 
    "totalKeysExamined" : 2, 
    "totalDocsExamined" : 2, 
    "executionStages" : { 
     "stage" : "FETCH", 
     "nReturned" : 2, 
     "executionTimeMillisEstimate" : 10, 
     "works" : 3, 
     "advanced" : 2, 
     "needTime" : 0, 
     "needYield" : 0, 
     "saveState" : 0, 
     "restoreState" : 0, 
     "isEOF" : 1, 
     "invalidates" : 0, 
     "docsExamined" : 2, 
     "alreadyHasObj" : 0, 
     "inputStage" : { 
     "stage" : "IXSCAN", 
     "nReturned" : 2, 
     "executionTimeMillisEstimate" : 10, 
     "works" : 3, 
     "advanced" : 2, 
     "needTime" : 0, 
     "needYield" : 0, 
     "saveState" : 0, 
     "restoreState" : 0, 
     "isEOF" : 1, 
     "invalidates" : 0, 
     "keyPattern" : { 
      "barId" : 1 
     }, 
     "indexName" : "barId_1", 
     "isMultiKey" : false, 
     "isUnique" : false, 
     "isSparse" : false, 
     "isPartial" : false, 
     "indexVersion" : 1, 
     "direction" : "forward", 
     "indexBounds" : { 
      "barId" : [ 
      "(\"1\", {})" 
      ] 
     }, 
     "keysExamined" : 2, 
     "dupsTested" : 0, 
     "dupsDropped" : 0, 
     "seenInvalidated" : 0 
     } 
    }, 
    "allPlansExecution" : [ ] 
    }, 
    "serverInfo" : { 
    "host" : "myLinuxMachine", 
    "port" : 8080, 
    "version" : "3.2.9", 
    "gitVersion" : "22ec9e93b40c85fc7cae7d56e7d6a02fd811088c" 
    }, 
    "ok" : 1 
} 

这又做了barId_1的索引扫描。它扫描了2条记录返回2.

为了完整起见,这里是2条记录,其他300万条在大小和组成上非常相似。

MongoDB Enterprise > db.foo.find({barId:{$gt:"0"}}) 
{ 
    "_id" : "00002f5d-ee4a-4996-bb27-b54ea84df777", "createdDate" : ISODate("2016-11-16T02:26:48.500Z"), "createdBy" : "Exporter", "lastModifiedDate" : ISODate("2016-11-16T02:26:48.500Z"), "lastModifiedBy" : "Exporter", "rolePlayed" : "LA", "roleType" : "T", "oId" : [ "d7316944-62ed-48dc-8ee4-e3bad8c58b10" ], "barId" : "e45b3160-bbb4-24e5-82b3-ad0c28329555", "cId" : "dcc29053-7a1f-439e-9536-fb4e44ff8a51", "timestamp" : "2017-02-20T16:23:15.795Z" 
} 
{ 
    "_id" : "00002f5d-ee4a-4996-bb27-b54ea84df888", "createdDate" : ISODate("2016-11-16T02:26:48.500Z"), "createdBy" : "Exporter", "lastModifiedDate" : ISODate("2016-11-16T02:26:48.500Z"), "lastModifiedBy" : "Exporter", "rolePlayed" : "LA", "roleType" : "T", "oId" : [ "d7316944-62ed-48dc-8ee4-e3bad8c58b10" ], "barId" : "e45b3160-bbb4-24e5-82b3-ad0c28329555", "cId" : "dcc29053-7a1f-439e-9536-fb4e44ff8a51", "timestamp" : "2017-02-20T16:23:15.795Z" 
} 

当然,我做了一些谷歌上搜索了一圈,发现有曾经是使用索引连同条款存在问题,但在许多线程我读过这是固定的。是吗?另外,我发现可以使用以下Hack而不是$ exists子句来在查找字段的存在时强制使用索引。

MongoDB Enterprise > db.foo.find({barId:{$ne:null}}).explain(true) 
{ 
    "queryPlanner" : { 
    "plannerVersion" : 1, 
    "namespace" : "myDatabase01.foo", 
    "indexFilterSet" : false, 
    "parsedQuery" : { 
     "$not" : { 
     "barId" : { 
      "$eq" : null 
     } 
     } 
    }, 
    "winningPlan" : { 
     "stage" : "FETCH", 
     "filter" : { 
     "$not" : { 
      "barId" : { 
      "$eq" : null 
      } 
     } 
     }, 
     "inputStage" : { 
     "stage" : "IXSCAN", 
     "keyPattern" : { 
      "barId" : 1 
     }, 
     "indexName" : "barId_1", 
     "isMultiKey" : false, 
     "isUnique" : false, 
     "isSparse" : false, 
     "isPartial" : false, 
     "indexVersion" : 1, 
     "direction" : "forward", 
     "indexBounds" : { 
      "barId" : [ 
      "[MinKey, null)", 
      "(null, MaxKey]" 
      ] 
     } 
     } 
    }, 
    "rejectedPlans" : [ ] 
    }, 
    "executionStats" : { 
    "executionSuccess" : true, 
    "nReturned" : 2, 
    "executionTimeMillis" : 57, 
    "totalKeysExamined" : 3, 
    "totalDocsExamined" : 2, 
    "executionStages" : { 
     "stage" : "FETCH", 
     "filter" : { 
     "$not" : { 
      "barId" : { 
      "$eq" : null 
      } 
     } 
     }, 
     "nReturned" : 2, 
     "executionTimeMillisEstimate" : 10, 
     "works" : 4, 
     "advanced" : 2, 
     "needTime" : 1, 
     "needYield" : 0, 
     "saveState" : 0, 
     "restoreState" : 0, 
     "isEOF" : 1, 
     "invalidates" : 0, 
     "docsExamined" : 2, 
     "alreadyHasObj" : 0, 
     "inputStage" : { 
     "stage" : "IXSCAN", 
     "nReturned" : 2, 
     "executionTimeMillisEstimate" : 10, 
     "works" : 4, 
     "advanced" : 2, 
     "needTime" : 1, 
     "needYield" : 0, 
     "saveState" : 0, 
     "restoreState" : 0, 
     "isEOF" : 1, 
     "invalidates" : 0, 
     "keyPattern" : { 
      "barId" : 1 
     }, 
     "indexName" : "barId_1", 
     "isMultiKey" : false, 
     "isUnique" : false, 
     "isSparse" : false, 
     "isPartial" : false, 
     "indexVersion" : 1, 
     "direction" : "forward", 
     "indexBounds" : { 
      "barId" : [ 
      "[MinKey, null)", 
      "(null, MaxKey]" 
      ] 
     }, 
     "keysExamined" : 3, 
     "dupsTested" : 0, 
     "dupsDropped" : 0, 
     "seenInvalidated" : 0 
     } 
    }, 
    "allPlansExecution" : [ ] 
    }, 
    "serverInfo" : { 
    "host" : "myLinuxMachine", 
    "port" : 8080, 
    "version" : "3.2.9", 
    "gitVersion" : "22ec9e93b40c85fc7cae7d56e7d6a02fd811088c" 
    }, 
    "ok" : 1 
} 

这项工作,只有2个文件扫描,只有2个文件返回。

因此,我的问题是。 我应该在查询中使用$ exists吗?它是否适合在现场制作应用程序中使用?如果答案是否定的,为什么$ exist子句甚至存在于第一位?

总有这种可能性,它的安装mongo是有过错的,或者可能是索引不知所措。任何灯光都会非常受欢迎,但现在我坚持使用$ ne:null黑客。

回答

2

您应该使用partial index(首选)或稀疏索引barId领域:

db.foo.createIndex(
    { barId: 1 }, 
    { partialFilterExpression: { barId: { $exists: true } } } 
) 
+0

感谢这个得很完美。建议添加索引可以减少执行barId所花费的时间:{$ exists:true}查询的因子为10.我只关心索引差异的原因。为什么不创建像这样的所有索引? – Damo