An Avro schema to describe Avro schemas.
Last active
April 15, 2016 14:35
-
-
Save mtth/fb87030c8cc142730d15 to your computer and use it in GitHub Desktop.
Avro meta type
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
protocol Meta { | |
enum ArrayType { array } | |
enum EnumType { enum } | |
enum FixedType { fixed } | |
enum MapType { map } | |
enum PrimitiveType { boolean, bytes, double, float, int, long, null, string } | |
enum RecordType { record } | |
/** | |
* Meta type to represent any type. | |
* | |
* The logical type annotation will "unwrap" this type's `value` union field | |
* to match Avro schema definitions. | |
* | |
* It currently only represents canonical schemas, but it would be | |
* straightforward to add more information (e.g. field order or defaults). | |
* | |
*/ | |
@logicalType("meta") | |
record Meta { | |
union { | |
PrimitiveType, | |
record Array { | |
ArrayType type; | |
Meta items; | |
}, | |
record Enum { | |
string name; | |
EnumType type; | |
array<string> symbols; | |
}, | |
record Fixed { | |
string name; | |
FixedType type; | |
int size; | |
}, | |
record Map { | |
MapType type; | |
Meta values; | |
}, | |
record Record { | |
string name; | |
RecordType type; | |
array<record Field { string name; Meta type; }> fields; | |
}, | |
string, // Reference. | |
array<Meta> // Union. | |
} value; | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* jshint node: true */ | |
'use strict'; | |
/** | |
* Script to compress/decompress canonical schemas. | |
* | |
* Usage: | |
* | |
* node meta.js <schema.avsc # Compression | |
* node meta.js <schema.bin # Decompression | |
* | |
* This typically achieves compression ratios of 50%-90%. | |
* | |
*/ | |
var avro = require('avsc'), | |
util = require('util'); | |
avro.assemble('./meta.avdl', function (err, attrs) { | |
if (err) { | |
console.trace(err.message); | |
return; | |
} | |
// Generate the type from the IDL. | |
var protocol = avro.parse(attrs, {logicalTypes: {meta: MetaType}}); | |
var metaType = protocol.getType('Meta'); | |
metaType.setSymbols(protocol.getType('PrimitiveType').getSymbols()); | |
// Process input. | |
var bufs = []; | |
process.stdin | |
.on('data', function (buf) { bufs.push(buf); }) | |
.on('end', function () { | |
var buf = Buffer.concat(bufs); | |
try { | |
var attrs = JSON.parse(buf.toString()); | |
} catch (err) { | |
// If this fails, it's an encoded schema. | |
console.log(JSON.stringify(metaType.fromBuffer(buf))); | |
return; | |
} | |
// Else if the above succeeded, it's a decoded ("normal") schema. | |
var canonicalAttrs = JSON.parse(avro.parse(attrs).getSchema()); | |
process.stdout.write(metaType.toBuffer(canonicalAttrs)); | |
}); | |
}); | |
// Helpers. | |
/** | |
* Meta logical type, roughly equivalent to a "named union". | |
* | |
*/ | |
function MetaType(attrs, opts) { | |
avro.types.LogicalType.call(this, attrs, opts); | |
this._symbols = undefined; // Primitive symbols. | |
} | |
util.inherits(MetaType, avro.types.LogicalType); | |
MetaType.prototype.setSymbols = function (arr) { | |
this._symbols = arr; | |
}; | |
MetaType.prototype._fromValue = function (val) { | |
var obj = val.value; | |
return obj[Object.keys(obj)[0]]; | |
}; | |
MetaType.prototype._toValue = function (any) { | |
var obj; | |
if (typeof any == 'string') { | |
if (~this._symbols.indexOf(any)) { | |
// Handling primitive names separately from references lets us save a | |
// significant amount of bytes (1 byte per type name instead of 5-8). | |
obj = {PrimitiveType: any}; | |
} else { | |
obj = {string: any}; | |
} | |
} else if (any instanceof Array) { | |
obj = {array: any}; | |
} else { | |
obj = {}; | |
obj[capitalize(any.type)] = any; | |
} | |
return {value: obj}; | |
}; | |
// `enum` is a reserved keyword in JavaScript, so we capitalize the names. It's | |
// also more consistent to have all records be capitalized anyway. | |
function capitalize(s) { return s.charAt(0).toUpperCase() + s.slice(1); } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
For example: