4
该机器是我的mongodb群集的一个碎片。该集群有三个碎片,machine1中的shard1和machine2中的碎片2,都是8G内存和800G磁盘。 machine3中的configdb,mongos和shard3,特别是16G内存和400G磁盘。Mongodb高页面错误和高锁但内存足够?
现在的问题是:
mongostat在machine3正常,但在MACHINE1和计算机2,页面错误和锁定分贝始终居高不下。
我只列出MACHINE1的一些情况: top命令的结果:
[]$top
Cpu(s): 0.2%us, 0.2%sy, 0.0%ni, 99.2%id, 0.3%wa, 0.0%hi, 0.0%si, 0.2%st
Mem: 7633792k total, 7302168k used, 331624k free, 84456k buffers
Swap: 0k total, 0k used, 0k free, 6209852k cached
PID PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
7562 20 0 200g 1.0g 702m S 0.3 14.3 4:36.50 mongod
这里是mongostat:
insert query update delete getmore command flushes mapped vsize res non-mapped faults locked db idx miss % qr|qw ar|aw netIn netOut conn time
0 2 12 0 0 13 0 100g 201g 911m 101g 11 amazon:38.9% 0 3|0 1|0 4k 4k 14 01:45:35
0 0 3 0 0 7 1 100g 201g 912m 101g 28 amazon:1.2% 0 0|0 0|0 1k 3k 14 01:45:36
0 2 14 0 0 15 0 100g 201g 912m 101g 93 amazon:4.6% 0 0|0 0|0 7k 4k 14 01:45:37
0 0 0 0 0 1 0 100g 201g 911m 101g 141 amazon:0.2% 0 0|0 0|0 62b 2k 14 01:45:38
0 6 24 0 0 25 0 100g 201g 913m 101g 123 amazon:6.4% 0 0|0 0|0 8k 6k 14 01:45:39
0 1 9 0 0 10 0 100g 201g 912m 101g 33 amazon:4.2% 0 0|0 0|0 3k 3k 14 01:45:40
0 12 59 0 0 58 0 100g 201g 914m 101g 108 amazon:30.0% 0 1|0 0|1 24k 12k 14 01:45:41
0 20 93 0 0 96 0 100g 201g 911m 101g 114 amazon:36.1% 0 0|0 0|0 33k 17k 14 01:45:42
0 19 84 0 0 86 0 100g 201g 913m 101g 103 amazon:43.9% 0 0|0 1|0 28k 16k 14 01:45:43
0 9 29 0 0 26 0 100g 201g 914m 101g 37 amazon:5.5% 0 5|0 0|1 11k 6k 14 01:45:44
,这里是服务器状态:
> db.serverStatus()
{
"host" : "XX-XX-XX-XX:25018",
"version" : "2.2.3",
"process" : "mongod",
"pid" : 7562,
"uptime" : 1410,
"uptimeMillis" : NumberLong(1410211),
"uptimeEstimate" : 1390,
"localTime" : ISODate("2013-03-22T01:49:01.459Z"),
"locks" : {
"." : {
"timeLockedMicros" : {
"R" : NumberLong(563437),
"W" : NumberLong(22798453)
},
"timeAcquiringMicros" : {
"R" : NumberLong(303677814),
"W" : NumberLong(59991149)
}
},
"admin" : {
"timeLockedMicros" : {
},
"timeAcquiringMicros" : {
}
},
"local" : {
"timeLockedMicros" : {
"r" : NumberLong(6613),
"w" : NumberLong(0)
},
"timeAcquiringMicros" : {
"r" : NumberLong(1937433),
"w" : NumberLong(0)
}
},
"amazon" : {
"timeLockedMicros" : {
"r" : NumberLong(203845605),
"w" : NumberLong(651848025)
},
"timeAcquiringMicros" : {
"r" : NumberLong(621538184),
"w" : NumberLong(1525509360)
}
},
"test" : {
"timeLockedMicros" : {
"r" : NumberLong(5143),
"w" : NumberLong(999532)
},
"timeAcquiringMicros" : {
"r" : NumberLong(157712),
"w" : NumberLong(60)
}
}
},
"globalLock" : {
"totalTime" : NumberLong(1410211000),
"lockTime" : NumberLong(22798453),
"currentQueue" : {
"total" : 0,
"readers" : 0,
"writers" : 0
},
"activeClients" : {
"total" : 0,
"readers" : 0,
"writers" : 0
}
},
"mem" : {
"bits" : 64,
"resident" : 945,
"virtual" : 205577,
"supported" : true,
"mapped" : 102383,
"mappedWithJournal" : 204766
},
"connections" : {
"current" : 14,
"available" : 805
},
"extra_info" : {
"note" : "fields vary by platform",
"heap_usage_bytes" : 190782680,
"page_faults" : 68002
},
"indexCounters" : {
"btree" : {
"accesses" : 274412,
"hits" : 274412,
"misses" : 0,
"resets" : 0,
"missRatio" : 0
}
},
"backgroundFlushing" : {
"flushes" : 23,
"total_ms" : 89781,
"average_ms" : 3903.521739130435,
"last_ms" : 929,
"last_finished" : ISODate("2013-03-22T01:48:32.243Z")
},
"cursors" : {
"totalOpen" : 0,
"clientCursors_size" : 0,
"timedOut" : 0
},
"network" : {
"bytesIn" : 11325630,
"bytesOut" : 181775584,
"numRequests" : 67850
},
"opcounters" : {
"insert" : 157,
"query" : 6898,
"update" : 29954,
"delete" : 0,
"getmore" : 0,
"command" : 30902
},
"asserts" : {
"regular" : 0,
"warning" : 0,
"msg" : 0,
"user" : 1,
"rollovers" : 0
},
"writeBacksQueued" : false,
"dur" : {
"commits" : 27,
"journaledMB" : 0.36864,
"writeToDataFilesMB" : 1.241313,
"compression" : 0.2963027264769924,
"commitsInWriteLock" : 0,
"earlyCommits" : 0,
"timeMs" : {
"dt" : 3269,
"prepLogBuffer" : 0,
"writeToJournal" : 442,
"writeToDataFiles" : 4,
"remapPrivateView" : 23
}
},
"recordStats" : {
"accessesNotInMemory" : 32752,
"pageFaultExceptionsThrown" : 1656,
"amazon" : {
"accessesNotInMemory" : 32752,
"pageFaultExceptionsThrown" : 1656
},
"local" : {
"accessesNotInMemory" : 0,
"pageFaultExceptionsThrown" : 0
},
"test" : {
"accessesNotInMemory" : 0,
"pageFaultExceptionsThrown" : 0
}
},
"ok" : 1
}
任何人都给我一些建议?非常感谢。
看起来机器内存不足。你可以在常驻记忆中排序吗? Mongo只有1GB,其他的有6GB。 – 2013-03-22 02:50:34
好的,按res分类。后面是32m | python,29m | mongostat,29m | mongostat,29m | mongostat,29m | mongostat,25m | python,15m | python,6468 | mongo,5560 | httpd,5560 | httpd,5560 | httpd,5560 | httpd ,5560 | httpd。就这样。 – 2013-03-22 03:56:01
你有多少写入每秒?你似乎有一个相对较高的更新率。这些更新是否会显着增加文档大小?从这些值中,我假设你使用旋转磁盘?更新可能会强制更新的文档在数据文件中移动,这可能会导致较高的锁定。如果您使用的版本<2.6,则为数据库集合设置[设置'usePowerOf2Sizes'](http://docs.mongodb.org/manual/reference/command/collMod/#collmod)可能会有所帮助。 – 2014-08-03 14:10:41