我正在尝试使用PyMongo构建一个Python脚本,该脚本将能够击中可以获得数量可能存在于数据库中的n个对象的精确匹配的Mongo DB。目前,我有这样的设置:在多个文档字段上的MongoDB精确匹配
db.entries.find({'$or': [<list-of-objects]})
凡对象的列表看起来是这样的:
[{'email': '[email protected]', 'zip': '11111'}, {'email': '[email protected]', 'zip': '11112'}, ...]
使用$or
工作好时,我有在列表中10级左右的项目。我现在正在测试100个,并且要花很长时间才能返回。我曾考虑过使用多个$in
过滤器,但我不知道这是否是最佳选择。
我确定有更好的方法来处理这个问题,但我对Mongo相当陌生。
编辑:的.explain()
输出如下:
{
"executionStats": {
"executionTimeMillis": 228734,
"nReturned": 2,
"totalKeysExamined": 0,
"allPlansExecution": [],
"executionSuccess": true,
"executionStages": {
"needYield": 0,
"saveState": 43556,
"restoreState": 43556,
"isEOF": 1,
"inputStage": {
"needYield": 0,
"saveState": 43556,
"restoreState": 43556,
"isEOF": 1,
"inputStage": {
"needYield": 0,
"direction": "forward",
"saveState": 43556,
"restoreState": 43556,
"isEOF": 1,
"docsExamined": 5453000,
"nReturned": 2,
"needTime": 5452999,
"filter": {
"$or": [{
"$and": [{
"email": {
"$eq": "[email protected]"
}
}, {
"zipcode": {
"$eq": "11111"
}
}]
}, {
"$and": [{
"email": {
"$eq": "[email protected]"
}
}, {
"zipcode": {
"$eq": "11112"
}
}]
}]
},
"executionTimeMillisEstimate": 208083,
"invalidates": 0,
"works": 5453002,
"advanced": 2,
"stage": "COLLSCAN"
},
"nReturned": 2,
"needTime": 5452999,
"executionTimeMillisEstimate": 211503,
"transformBy": {
"_id": false
},
"invalidates": 0,
"works": 5453002,
"advanced": 2,
"stage": "PROJECTION"
},
"nReturned": 2,
"needTime": 5452999,
"executionTimeMillisEstimate": 213671,
"invalidates": 0,
"works": 5453002,
"advanced": 2,
"stage": "SUBPLAN"
},
"totalDocsExamined": 5453000
},
"queryPlanner": {
"parsedQuery": {
"$or": [{
"$and": [{
"email": {
"$eq": "[email protected]"
}
}, {
"zipcode": {
"$eq": "11111"
}
}]
}, {
"$and": [{
"email": {
"$eq": "[email protected]"
}
}, {
"zipcode": {
"$eq": "11112"
}
}]
}]
},
"rejectedPlans": [],
"namespace": "db.entries",
"winningPlan": {
"inputStage": {
"transformBy": {
"_id": false
},
"inputStage": {
"filter": {
"$or": [{
"$and": [{
"email": {
"$eq": "[email protected]"
}
}, {
"zipcode": {
"$eq": "11111"
}
}]
}, {
"$and": [{
"email": {
"$eq": "[email protected]"
}
}, {
"zipcode": {
"$eq": "11112"
}
}]
}]
},
"direction": "forward",
"stage": "COLLSCAN"
},
"stage": "PROJECTION"
},
"stage": "SUBPLAN"
},
"indexFilterSet": false,
"plannerVersion": 1
},
"ok": 1.0,
"serverInfo": {
"host": "somehost",
"version": "3.4.6",
"port": 27017,
"gitVersion": "c55eb86ef46ee7aede3b1e2a5d184a7df4bfb5b5"
}
}
请添加的输出'.explain()' –
@MarkusWMahlberg看到OP – xtheking
查询是有点低效率的,你正在检查的文件5453000终于得到2个文件。为什么不创建1.在任何包含高基数的字段上创建索引,它可以是邮政编码或电子邮件。 2.使用聚合管道,使用您用来创建索引的字段选择文档,然后您必须使用新索引过滤掉大量文档。希望有所帮助。 – Euclides