Skip to content

Instantly share code, notes, and snippets.

@rafbarr
Created May 17, 2016 21:19
Show Gist options
  • Save rafbarr/d795033be8073fac6ee1e1ce325ba2ec to your computer and use it in GitHub Desktop.
Save rafbarr/d795033be8073fac6ee1e1ce325ba2ec to your computer and use it in GitHub Desktop.
#!/bin/bash
//usr/bin/env groovy -cp "$(dirname "$0")" "$0" $@; exit $?
@Grab(group = 'org.apache.avro', module = 'avro', version = '1.8.0')
import org.apache.avro.SchemaValidatorBuilder
def cli = new CliBuilder(stopAtNonOption: false)
cli._(longOpt: 'reference-schemas', args: 1, argName: 'dir', 'directory with the reference schemas')
cli._(longOpt: 'schemas-to-check', args: 1, argName: 'dir', 'directory with the schemas to check')
cli.m(
longOpt: 'mode', args: 1, argName: 'mode', required: true,
'compatibility mode: backwards, forwards or full'
)
cli.h(longOpt: 'help', 'show this message')
def cliOptions
if ('-h' in args || '--help' in args) {
cli.usage()
System.exit(0)
} else {
cliOptions = cli.parse(args)
cliOptions || System.exit(1)
}
if (!(cliOptions.m in ['backwards', 'forwards', 'full'])) {
println "error: Unknown compatibility mode: ${cliOptions.m}"
cli.usage()
System.exit(1)
}
def referenceSchemas, schemasToCheck
try {
referenceSchemas = AvroHelpers
.loadSchemasFromFiles(new File(cliOptions.'reference-schemas'))
.findAll { it.value.type in AvroHelpers.NAMED_SCHEMA_TYPES }
.collectEntries { k, v -> [v.fullName, v] }
schemasToCheck = AvroHelpers
.loadSchemasFromFiles(new File(cliOptions.'schemas-to-check'))
.findAll { it.value.type in AvroHelpers.NAMED_SCHEMA_TYPES }
.collectEntries { k, v -> [v.fullName, v] }
} catch (e) {
println e.message
System.exit(1)
}
def shouldCheckForBackwardsCompat = cliOptions.m in ['backwards', 'full']
def shouldCheckForForwardsCompat = cliOptions.m in ['forwards', 'full']
def backwardsCompatChecker = new SchemaValidatorBuilder().canReadStrategy().validateLatest()
def forwardsCompatChecker = new SchemaValidatorBuilder().canBeReadStrategy().validateLatest()
def missingSchemas = referenceSchemas.keySet().minus(schemasToCheck.keySet())
if (missingSchemas && shouldCheckForBackwardsCompat) {
println "can't find the following schemas: ${missingSchemas.collect { "'$it'" }.join(', ')}"
System.exit(1)
}
System.exit(schemasToCheck.any { k, v ->
def failed = false
if (k in referenceSchemas) {
if (shouldCheckForBackwardsCompat) {
try {
backwardsCompatChecker.validate(v, [referenceSchemas[k]])
} catch (e) {
println "failed backwards compatibility check for '$k' ${v.type.name}"
failed = true
}
}
if (shouldCheckForForwardsCompat) {
try {
forwardsCompatChecker.validate(v, [referenceSchemas[k]])
} catch (e) {
println "failed forwards compatibility check for '$k' ${v.type.name}"
failed = true
}
}
}
failed
} ? 1 : 0)
@Grab(group = 'org.apache.avro', module = 'avro', version = '1.8.0')
@Grab(group = 'org.yaml', module = 'snakeyaml', version = '1.17')
import groovy.io.FileType
import groovy.json.JsonOutput
import groovy.transform.stc.ClosureParams
import groovy.transform.stc.FirstParam
import org.apache.avro.Schema
import org.yaml.snakeyaml.Yaml
import org.yaml.snakeyaml.error.YAMLException
class AvroHelpers {
static Set<Schema.Type> NAMED_SCHEMA_TYPES = [
Schema.Type.FIXED,
Schema.Type.ENUM,
Schema.Type.RECORD
].toSet()
/**
* Parses a set of raw schemas resolving any names.
*
* A raw schema is anything that can be transformed into a valid Avro schema string. For
* instance, a {@link File} instance holding an Avro schema could be converted by simply reading
* its content.
*
* @param <T> the type of the raw schema
* @param rawSchemas a set of raw schemas
* @param schemaAdaptor a closure mapping a raw schema into a valid Avro schema string
* @return a mapping from the raw schema to the parsed schema
*/
static <T> Map<T, Schema> parse(
Set<T> rawSchemas,
@ClosureParams(value=FirstParam.class) Closure<String> schemaAdaptor = Closure.IDENTITY
) {
def remainingRawSchemas = rawSchemas.collect() // defensive copy
def schemasByRawSchema = [:]
def schemasByName = [:]
while (!remainingRawSchemas.isEmpty()) {
def undefinedNames = new HashSet()
remainingRawSchemas.removeAll {
def rawSchemaProcessed = false
// bootstrap the parser with previously parsed named schemas
def schemaParser = new Schema.Parser().addTypes(schemasByName)
try {
schemasByRawSchema[it] = schemaParser.parse(schemaAdaptor(it))
schemasByName = schemaParser.types
rawSchemaProcessed = true
} catch (e) {
def undefinedNameMatcher = e.message =~ ~/"(.+)" is not a defined name.*/
if (undefinedNameMatcher.matches()) {
undefinedNames << undefinedNameMatcher[0][1]
} else {
throw new RuntimeException(
"parsing error '${e.message}' for schema '$it'"
)
}
}
rawSchemaProcessed
}
// if none of the undefined names can be resolved in the next pass, throw an exception
if (undefinedNames && undefinedNames.intersect(schemasByName.keySet()).isEmpty()) {
throw new RuntimeException(
"couldn't resolve names: ${undefinedNames.collect { "'$it'" }.join(',')}"
)
}
}
schemasByRawSchema
}
/**
* Loads and parses all *.yaml and *.avsc Avro schema files from a directory.
*
* @param rootDir the directory to load the Avro schemas from
* @return a mapping from the Avro schema file to the parsed schema
*/
static Map<File, Schema> loadSchemasFromFiles(File rootDir) {
if (!rootDir.isDirectory()) {
throw new IllegalArgumentException("'$rootDir' is not a valid directory")
}
def schemaFiles = new HashSet()
rootDir.traverse(type: FileType.FILES, nameFilter: ~/.*\.(yaml|avsc)/) {
schemaFiles << it
}
def yamlParser = new Yaml()
parse(schemaFiles.toSet()) {
if (it.path.endsWith(".yaml")) {
try {
JsonOutput.toJson(yamlParser.load(it.text))
} catch (YAMLException e) {
throw new RuntimeException("invalid YAML file '${it.path}': ${e.message}")
}
} else {
it.text
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment