Last active
February 24, 2018 09:37
-
-
Save RobinL/61612175853458f5746747af51c786b0 to your computer and use it in GitHub Desktop.
testschema
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"$schema": "http://json-schema.org/draft-07/schema#", | |
"type": "object", | |
"title": "Metadata", | |
"description": "MoJ Data Catalogue Metadata", | |
"properties": { | |
"id": { | |
"type": "string", | |
"title": "The ID of this table. Unique identifier which enables a table's metadata to refer to another table using e.g. for a foreign key", | |
"examples": [ | |
"my_database_name.my_table_name" | |
] | |
}, | |
"columns": { | |
"type": "array", | |
"title": "The columns in the table. An array of objects", | |
"items": { | |
"type": "object", | |
"properties": { | |
"description": { | |
"type": "string", | |
"title": "A description of this field" | |
}, | |
"name": { | |
"type": "string", | |
"title": "The column name. Should be lower case with underscores, not spaces", | |
"examples": [ | |
"employee_number" | |
] | |
}, | |
"type": { | |
"type": "string", | |
"title": "The data type. We use a limited set of data types for cross compatibility between Spark, R, Pandas etc. See lookup here: https://github.com/moj-analytical-services/dataengineeringutils/blob/master/dataengineeringutils/data/data_type_conversion.csv", | |
"enum": ["character","int","float","boolean","datetime","date"] | |
} | |
} | |
} | |
}, | |
"table_desc": { | |
"type": "string", | |
"title": "A description of what this table contains " | |
}, | |
"table_name": { | |
"type": "string", | |
"title": "The name of the table in the database" | |
}, | |
"data_format": { | |
"type": "string", | |
"title": "The format of the data in s3, and instruction on how to parse, see here https://github.com/moj-analytical-services/dataengineeringutils/blob/ae295caf93c75c80510abf0c74865939c94d3e70/dataengineeringutils/glue.py#L45", | |
"enum": ["avro","csv","csv_quoted_nodate","regex","orc","par","parquet"] | |
}, | |
"location": { | |
"type": "string", | |
"title": "The path to the data in s3. Usually, you should use path relative to the database root directory, unless the database contains tables spread across mutliple buckets or directories", | |
"examples": [ | |
"sop_full/" | |
] | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment