Skip to content

Instantly share code, notes, and snippets.

@gwicke
Last active September 7, 2015 18:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gwicke/660fdd8ca8a552fc6449 to your computer and use it in GitHub Desktop.
Save gwicke/660fdd8ca8a552fc6449 to your computer and use it in GitHub Desktop.
var zlib = require('zlib');
var snappy = require('snappy');
var avro = require('node-avro-io');
var toArray = require('stream-to-array');
var schema = {
"namespace": "org.wikimedia.mediawiki",
"type": "record",
"name": "EditEvent",
"fields" : [
{"name": "action", "type": "string"},
{
"name": "meta",
"type": "record",
"fields": [
{"name": "wiki", "type": "string" },
{"name": "webHost", "type": "string" },
{"name": "recvFrom", "type": "string" },
{"name": "timestamp", "type": "long" },
{"name": "clientIp", "type": "string" },
{"name": "seqId", "type": "long" },
{"name": "uuid", "type": "string" },
{"name": "userAgent", "type": "string" },
]
},
{"name": "action_init_type", "type": ["null", "string"]},
{"name": "action_init_mechanism", "type": ["null","string"]},
{"name": "action_ready_timing", "type": ["null","long"]},
{"name": "action_save_intent_timing", "type": ["null","long"]},
{"name": "action_save_attempt_timing", "type": ["null","long"]},
{"name": "action_save_success_timing", "type": ["null","long"]},
{"name": "action_save_failure_type", "type": ["null","string"]},
{"name": "action_save_failure_message", "type": ["null","string"]},
{"name": "action_save_failure_timing", "type": ["null","long"]},
{"name": "action_abort_type", "type": ["null","string"]},
{"name": "action_abort_mechanism", "type": ["null","string"]},
{"name": "action_abort_timing", "type": ["null","long"]},
{"name": "mediawiki_version", "type": ["null","string"]},
{"name": "page", "type": ["null","string"]},
{"name": "editing_session_id", "type": ["null","string"]},
{"name": "user", "type": ["null","string"]},
]
};
var record = {
"action": "saveAttempt",
"meta": {
"wiki": "en.wikipedia.org",
"webHost": "mediawiki-vagrant.dev",
"recvFrom": "mediawiki-vagrant.dev",
"timestamp": 1433974402,
"clientIp": "127.0.0.1",
"seqId": 1,
"uuid": "05fcf26",
"userAgent": "nonya"
},
"action_init_type": null,
"action_init_mechanism": null,
"action_ready_timing": null,
"action_save_intent_timing": null,
"action_save_attempt_timing": null,
"action_save_success_timing": null,
"action_save_failure_type": null,
"action_save_failure_message": null,
"action_save_failure_timing": null,
"action_abort_type": null,
"action_abort_mechanism": null,
"action_abort_timing": null,
"mediawiki_version": null,
"page": null,
"editing_session_id": null,
"user": null
};
// Replace null entries with undefined
var jsonRecord = {};
Object.keys(record).forEach(function(key) {
if (record[key] !== null) {
jsonRecord[key] = record[key];
}
});
var writer = new avro.DataFile.Writer(schema, "snappy");
writer.append(record).end();
toArray(writer, function(err, arr) {
var dataBuffer = Buffer.concat(arr);
console.log('avro+snappy', dataBuffer.length);
});
var json = JSON.stringify(jsonRecord);
console.log('json', new Buffer(json).length);
var buf = snappy.compressSync(json);
console.log('json+snappy', buf.length);
buf = zlib.deflate(json, function(err, res) {
console.log('json+deflate', res.length);
});
// Output
// json 221
// json+snappy 198
// json+deflate 160
// avro+snappy 1504 (1327 schema, ~160 data)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment