Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
A monster of a MongoDB record for server monitoring. Emitted once per minute per server in the cluster, aggregated to a "master control cluster" or MCC. Lack of communication is a sign of failure which the MCC reacts to by starting a replacement VM and killing the old one, automatically handling load balancer reconfigurations. VMs are ephemeral …
// Per-host collection, once per minute.
// First unique character key shortening.
// https://github.com/boxedice/sd-agent
{
// DONE
// user, nice, system, iowait, irq, soft, steal, guest, idle
'cpu': [
{ 'u': 0.0, 'n': 0.0, 's': 0.0, 'io': 0.0, 'irq': 0.0, 'o': 0.0, 't': 0.0, 'g': 0.0, 'i': 0.0 },
{ 'u': 0.0, 'n': 0.0, 's': 0.0, 'io': 0.0, 'irq': 0.0, 'o': 0.0, 't': 0.0, 'g': 0.0, 'i': 0.0 },
{ 'u': 0.0, 'n': 0.0, 's': 0.0, 'io': 0.0, 'irq': 0.0, 'o': 0.0, 't': 0.0, 'g': 0.0, 'i': 0.0 },
{ 'u': 0.0, 'n': 0.0, 's': 0.0, 'io': 0.0, 'irq': 0.0, 'o': 0.0, 't': 0.0, 'g': 0.0, 'i': 0.0 }
],
// partition: label, used, available, total, ratio
'disk': { '/dev/sda1': { 'l': '/', 'u': 2222540, 'a': 7084540, 't': 9805144, 'r': 0.24 } },
// rrqm, wrqm, r, w, rkB, wkB, avgrqsz, avgqusz, await, r_await, w_await, svctm, util },
'io': {
'xvdap1': { 'rq': 0.0, 'wq': 0.0, 'r': 0.0, 'w': 0.0, 'rs': 0.0, 'ws': 0.0, 'arqs': 0.0, 'aqs': 0.0, 'a': 0.0, 'ra': 0.0, 'wa': 0.0, 's': 0.0, 'u': 0.0 },
'xvdap2': { 'rq': 0.0, 'wq': 0.0, 'r': 0.0, 'w': 0.0, 'rs': 0.0, 'ws': 0.0, 'arqs': 0.0, 'aqs': 0.0, 'a': 0.0, 'ra': 0.0, 'wa': 0.0, 's': 0.0, 'u': 0.0 }
},
'load': [ 0.0, 0.0, 0.0 ], // DONE
// ram: ( physical: ( free, used, total, ratio ), swap ( free, used, total, ratio ), cache
'ram': { 'p': { 'f': 0, 'u': 0, 't': 0, 'r': 0.0 }, 's': { 'f': 0, 'u': 0, 't': 0, 'r': 0.0 }, 'c': 0 },
// if running a mongo server
'mongo': {
'l': { 'r': 0.0, 'q': { 'r': 0, 'w': 0, 't': 0 } }, // global_lock: ratio, current queue ( readers, writers, total )
'm': { 'r': 0, 'v': 0, 'm': 0 }, // memory: resident, virtual, mapped
'c': { 'c': 0, 'a': 0 }, // connections: current, available
'b': { 's': 0, 'l': 0, 'a': 0 }, // background flushing: secondsSinceLastFlush, lastFlushLength, flushLengthAverage
// May not be present: extraInfo
'h': 0, // heap
'f': 0, // faults
// per-second metrics
'i': { 'a': 0, 'h': 0, 'm': 0, 'r': 0 }, // index counters: accesses, hits, misses, hit/miss ratio
'o': { 'i': 0, 'q': 0, 'u': 0, 'd': 0, 'g': 0, 'c': 0 }, // op counters: inserts, queries, updates, deletes, getMores, commands
'a': { 'r': 0, 'w': 0, 'm': 0, 'u': 0, 'r': 0 }, // asserts: regular, warning, message, user, rollover
'u': { 'o': 0 }, // cUrsors: total open
'r': { 'n': '', 'm': true, 's': false, 'a': false, 'l': '', 'i': '', // replecation: name, master, secondary, arbiter, mystate, myID
'e': { '_id': { 'n': '', 's': '', 'o': 0, 'h': 0, 'e': null }, } }, // mEmbers: name, state, opTimeDate, lastHeartBeat, last errmsg
// (optional) db statistics
's': {
// collections, objects, avgObjSize, dataSize, storageSize, numExtents, indexes, indexSize, fileSize, namespaces, nsSizeMB, OK
'dbname1': { 'c': 21, 'o': 6438, 'aos': 758.0925753339546, 'ds': 4880600, 'ss': 24449024, 'ne': 36, 'i': 23, 'is': 433328, 'fs': 50331648, 'n': 2, 'nss': 4, 'ok': 1 },
'dbname2': { 'c': 21, 'o': 6438, 'aos': 758.0925753339546, 'ds': 4880600, 'ss': 24449024, 'ne': 36, 'i': 23, 'is': 433328, 'fs': 50331648, 'n': 2, 'nss': 4, 'ok': 1 },
'dbname3': { 'c': 21, 'o': 6438, 'aos': 758.0925753339546, 'ds': 4880600, 'ss': 24449024, 'ne': 36, 'i': 23, 'is': 433328, 'fs': 50331648, 'n': 2, 'nss': 4, 'ok': 1 },
'dbname4': { 'c': 21, 'o': 6438, 'aos': 758.0925753339546, 'ds': 4880600, 'ss': 24449024, 'ne': 36, 'i': 23, 'is': 433328, 'fs': 50331648, 'n': 2, 'nss': 4, 'ok': 1 },
'dbname5': { 'c': 21, 'o': 6438, 'aos': 758.0925753339546, 'ds': 4880600, 'ss': 24449024, 'ne': 36, 'i': 23, 'is': 433328, 'fs': 50331648, 'n': 2, 'nss': 4, 'ok': 1 },
},
},
'mysql': { }, // MySQL
// Network
'net': {
// recieve, transmit (bytes)
'eth0': { 'r': 0, 't': 0 },
'eth1': { 'r': 0, 't': 0 }
},
// Nginx
'nginx': { 'c': 0, 'r': 0 }, // connections, requests per second
// Process List
'procs': [
// USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
[ null, null, null, null, null, null, null, null, null, null, null ],
{ 'u': '', 'p': 0, 'c': 0.0, 'm': 0.0, 'v': 0, 'r': 0, 't': null, 's': '', 'l': 2009, 'i': '0:01', 'l': '' }
],
// uWSGI
'uwsgi': {},
// Ping (https://github.com/Kami/sd-agent-plugin-latency-monitor)
'ping': {
'hostname': { 's': 5, 'r': 5, 'mn': 1.1, 'mx': 5.3, 'a': 3.16, 'd': 2.0 }, // sent, recieved, minimum, maximum, average, stddev
},
// Proof of Life / Health (ping + TCP connect, logging success vs. time)
// http://plugins.serverdensity.com/health-check/
'health': {},
// PostgreSQL
'postgres': {},
// PHP-FPM
'php': {},
// Memcached (http://plugins.serverdensity.com/memcached/)
'memcache': {}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.