Created
July 20, 2021 14:15
-
-
Save danyx23/30e290e0e3fe1cf93e6583e06ef54634 to your computer and use it in GitHub Desktop.
Jupyter notebook to validate all OWID grapher configs against a given JSON schema
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"# Grapher json-schema test\n", | |
"\n", | |
"This script checks all grapher configs in the db against the provided schema.json\n", | |
"\n", | |
"This notebook uses Lars' [mysql jupyter notebook magic](https://gist.github.com/larsyencken/935452e59246a58c647dd3953de2ab07)" | |
], | |
"metadata": {} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 54, | |
"source": [ | |
"import pandas as pd\n", | |
"import json\n", | |
"from jsonschema import validate\n", | |
"from dataclasses import dataclass\n" | |
], | |
"outputs": [], | |
"metadata": {} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 55, | |
"source": [ | |
"schemapath = \"./schema experiments/schema.json\"" | |
], | |
"outputs": [], | |
"metadata": {} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 56, | |
"source": [ | |
"schema = None\n", | |
"with open(schemapath, \"r\") as f:\n", | |
" schema = json.load(f)" | |
], | |
"outputs": [], | |
"metadata": {} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 57, | |
"source": [ | |
"# Declare the default dataframe so autocomplete works\n", | |
"df : pd.DataFrame = pd.DataFrame()" | |
], | |
"outputs": [], | |
"metadata": {} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 58, | |
"source": [ | |
"%%mysql\n", | |
"select config from charts" | |
], | |
"outputs": [], | |
"metadata": {} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 59, | |
"source": [ | |
"df" | |
], | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
" config\n", | |
"0 {\"id\": 16, \"map\": {\"colorScale\": {\"baseColorSc...\n", | |
"1 {\"id\": 20, \"map\": {\"colorScale\": {\"baseColorSc...\n", | |
"2 {\"id\": 26, \"map\": {\"colorScale\": {\"baseColorSc...\n", | |
"3 {\"id\": 27, \"map\": {\"colorScale\": {\"baseColorSc...\n", | |
"4 {\"id\": 29, \"map\": {\"mode\": \"specific\", \"maxYea...\n", | |
"... ...\n", | |
"4184 {\"map\": {\"colorScale\": {\"baseColorScheme\": \"Yl...\n", | |
"4185 {\"map\": {\"colorScale\": {\"baseColorScheme\": \"Yl...\n", | |
"4186 {\"data\": {\"availableEntities\": [\"United Kingdo...\n", | |
"4187 {\"id\": 5071, \"data\": {\"availableEntities\": [\"U...\n", | |
"4188 {\"id\": 5072, \"data\": {\"availableEntities\": [\"S...\n", | |
"\n", | |
"[4189 rows x 1 columns]" | |
], | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>config</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>{\"id\": 16, \"map\": {\"colorScale\": {\"baseColorSc...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>{\"id\": 20, \"map\": {\"colorScale\": {\"baseColorSc...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>{\"id\": 26, \"map\": {\"colorScale\": {\"baseColorSc...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>{\"id\": 27, \"map\": {\"colorScale\": {\"baseColorSc...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>{\"id\": 29, \"map\": {\"mode\": \"specific\", \"maxYea...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4184</th>\n", | |
" <td>{\"map\": {\"colorScale\": {\"baseColorScheme\": \"Yl...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4185</th>\n", | |
" <td>{\"map\": {\"colorScale\": {\"baseColorScheme\": \"Yl...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4186</th>\n", | |
" <td>{\"data\": {\"availableEntities\": [\"United Kingdo...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4187</th>\n", | |
" <td>{\"id\": 5071, \"data\": {\"availableEntities\": [\"U...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4188</th>\n", | |
" <td>{\"id\": 5072, \"data\": {\"availableEntities\": [\"S...</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>4189 rows × 1 columns</p>\n", | |
"</div>" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 59 | |
} | |
], | |
"metadata": {} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 60, | |
"source": [ | |
"df[\"configjson\"] = df.config.map(lambda val: json.loads(val))" | |
], | |
"outputs": [], | |
"metadata": {} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 61, | |
"source": [ | |
"validate(df.configjson[0], schema)" | |
], | |
"outputs": [], | |
"metadata": {} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 62, | |
"source": [ | |
"@dataclass\n", | |
"class ConfigValidationError:\n", | |
" id: int\n", | |
" error: Exception" | |
], | |
"outputs": [], | |
"metadata": {} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 63, | |
"source": [ | |
"successes = []\n", | |
"failures = []\n", | |
"for config in df.configjson:\n", | |
" id = config.get(\"id\", None)\n", | |
" try: \n", | |
" validate(config, schema)\n", | |
" successes.append(id)\n", | |
" except Exception as e:\n", | |
" failures.append(ConfigValidationError(id=id, error=e))" | |
], | |
"outputs": [], | |
"metadata": {} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 64, | |
"source": [ | |
"len(successes)" | |
], | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"4181" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 64 | |
} | |
], | |
"metadata": {} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 65, | |
"source": [ | |
"len(failures)" | |
], | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"8" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 65 | |
} | |
], | |
"metadata": {} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 66, | |
"source": [ | |
"failures" | |
], | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"[ConfigValidationError(id=195, error=<ValidationError: \"'-3000' is not of type 'integer'\">),\n", | |
" ConfigValidationError(id=264, error=<ValidationError: \"'1950' is not one of ['latest']\">),\n", | |
" ConfigValidationError(id=271, error=<ValidationError: \"'2000' is not one of ['latest']\">),\n", | |
" ConfigValidationError(id=330, error=<ValidationError: \"'' is not one of ['absolute', 'relative', 'grouped', 'stacked', None]\">),\n", | |
" ConfigValidationError(id=332, error=<ValidationError: \"'' is not one of ['absolute', 'relative', 'grouped', 'stacked', None]\">),\n", | |
" ConfigValidationError(id=334, error=<ValidationError: \"'' is not one of ['absolute', 'relative', 'grouped', 'stacked', None]\">),\n", | |
" ConfigValidationError(id=538, error=<ValidationError: \"'2015' is not one of ['latest']\">),\n", | |
" ConfigValidationError(id=796, error=<ValidationError: \"'2007' is not one of ['latest']\">)]" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 66 | |
} | |
], | |
"metadata": {} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 67, | |
"source": [ | |
"[ item.error.absolute_path for item in failures ]" | |
], | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"[deque(['map', 'minYear']),\n", | |
" deque(['map', 'targetYear']),\n", | |
" deque(['map', 'targetYear']),\n", | |
" deque(['stackMode']),\n", | |
" deque(['stackMode']),\n", | |
" deque(['stackMode']),\n", | |
" deque(['map', 'defaultYear']),\n", | |
" deque(['map', 'defaultYear'])]" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 67 | |
} | |
], | |
"metadata": {} | |
} | |
], | |
"metadata": { | |
"interpreter": { | |
"hash": "40c188d7f09e73b6baff016c76fe93234271a5f8c00107ec837ff90530bba4cc" | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3.9.1 64-bit ('base': conda)" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.9.1" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment