Skip to content

Instantly share code, notes, and snippets.

@mtth
Last active April 15, 2016 14:35
Show Gist options
  • Save mtth/fb87030c8cc142730d15 to your computer and use it in GitHub Desktop.
Save mtth/fb87030c8cc142730d15 to your computer and use it in GitHub Desktop.
Avro meta type

An Avro schema to describe Avro schemas.

protocol Meta {
enum ArrayType { array }
enum EnumType { enum }
enum FixedType { fixed }
enum MapType { map }
enum PrimitiveType { boolean, bytes, double, float, int, long, null, string }
enum RecordType { record }
/**
* Meta type to represent any type.
*
* The logical type annotation will "unwrap" this type's `value` union field
* to match Avro schema definitions.
*
* It currently only represents canonical schemas, but it would be
* straightforward to add more information (e.g. field order or defaults).
*
*/
@logicalType("meta")
record Meta {
union {
PrimitiveType,
record Array {
ArrayType type;
Meta items;
},
record Enum {
string name;
EnumType type;
array<string> symbols;
},
record Fixed {
string name;
FixedType type;
int size;
},
record Map {
MapType type;
Meta values;
},
record Record {
string name;
RecordType type;
array<record Field { string name; Meta type; }> fields;
},
string, // Reference.
array<Meta> // Union.
} value;
}
}
/* jshint node: true */
'use strict';
/**
* Script to compress/decompress canonical schemas.
*
* Usage:
*
* node meta.js <schema.avsc # Compression
* node meta.js <schema.bin # Decompression
*
* This typically achieves compression ratios of 50%-90%.
*
*/
var avro = require('avsc'),
util = require('util');
avro.assemble('./meta.avdl', function (err, attrs) {
if (err) {
console.trace(err.message);
return;
}
// Generate the type from the IDL.
var protocol = avro.parse(attrs, {logicalTypes: {meta: MetaType}});
var metaType = protocol.getType('Meta');
metaType.setSymbols(protocol.getType('PrimitiveType').getSymbols());
// Process input.
var bufs = [];
process.stdin
.on('data', function (buf) { bufs.push(buf); })
.on('end', function () {
var buf = Buffer.concat(bufs);
try {
var attrs = JSON.parse(buf.toString());
} catch (err) {
// If this fails, it's an encoded schema.
console.log(JSON.stringify(metaType.fromBuffer(buf)));
return;
}
// Else if the above succeeded, it's a decoded ("normal") schema.
var canonicalAttrs = JSON.parse(avro.parse(attrs).getSchema());
process.stdout.write(metaType.toBuffer(canonicalAttrs));
});
});
// Helpers.
/**
* Meta logical type, roughly equivalent to a "named union".
*
*/
function MetaType(attrs, opts) {
avro.types.LogicalType.call(this, attrs, opts);
this._symbols = undefined; // Primitive symbols.
}
util.inherits(MetaType, avro.types.LogicalType);
MetaType.prototype.setSymbols = function (arr) {
this._symbols = arr;
};
MetaType.prototype._fromValue = function (val) {
var obj = val.value;
return obj[Object.keys(obj)[0]];
};
MetaType.prototype._toValue = function (any) {
var obj;
if (typeof any == 'string') {
if (~this._symbols.indexOf(any)) {
// Handling primitive names separately from references lets us save a
// significant amount of bytes (1 byte per type name instead of 5-8).
obj = {PrimitiveType: any};
} else {
obj = {string: any};
}
} else if (any instanceof Array) {
obj = {array: any};
} else {
obj = {};
obj[capitalize(any.type)] = any;
}
return {value: obj};
};
// `enum` is a reserved keyword in JavaScript, so we capitalize the names. It's
// also more consistent to have all records be capitalized anyway.
function capitalize(s) { return s.charAt(0).toUpperCase() + s.slice(1); }
@mtth
Copy link
Author

mtth commented Jan 31, 2016

For example:

$ wc -c Coupon.avsc
703 # Uncompressed size.
$ node meta.js <Coupon.avsc >Coupon.avsc.bin # Compress.
$ wc -c Coupon.avsc.bin
239 # Compressed size (66% ratio).
$ diff Coupon.avsc <(node meta.js <Coupon.avsc.bin) # Check that we can recover the schema.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment