Skip to content

Instantly share code, notes, and snippets.

@edo248
Created September 21, 2019 16:51
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save edo248/431cece0ea5dbc7cb7385897f936d175 to your computer and use it in GitHub Desktop.
Save edo248/431cece0ea5dbc7cb7385897f936d175 to your computer and use it in GitHub Desktop.
Parsing json to python models and serializing to json using pydantic
"""
Parsing complex json structure with pydantic.
Using example json file from from nobelprize.org
### laureate.json from http://api.nobelprize.org/v1/laureate.json
Original data looked like this:
------------
{
"laureates" : [
{
"id":"1",
"firstname":"Wilhelm Conrad",
"surname":"R\u00f6ntgen",
"born":"1845-03-27",
"died":"1923-02-10",
"bornCountry":"Prussia (now Germany)",
"bornCountryCode":"DE",
"bornCity":"Lennep (now Remscheid)",
"diedCountry":"Germany",
"diedCountryCode":"DE",
"diedCity":"Munich",
"gender":"male",
"prizes":[
{
"year":"1901",
"category":"physics",
"share":"1",
"motivation":"\"in recognition of the extraordinary services he has rendered by the discovery of the remarkable rays subsequently named after him\"",
"affiliations":[
{
"name":"Munich University",
"city":"Munich",
"country":"Germany"
}
]
}
]
}
}
---------------------------
"""
import json
from enum import Enum
from typing import List
from pydantic import BaseModel
class Category(str, Enum):
physics = "physics"
math = "math"
economics = "economics"
medicine = "medicine"
peace = "peace"
chemistry = "chemistry"
literature = "literature"
class Prize(BaseModel):
year: int
category: Category
share: int
motivation: str
affiliations: list
class Laureate(BaseModel):
"""fields with no values are required"""
id: int
firstname: str
surname: str = ""
born: str = None
died: str = None
bornCountry: str = None
bornCountryCode: str = None
bornCity: str = None
diedCountry: str = None
diedCountryCode: str = None
diedCity: str = None
gender: str = None
prizes: List[Prize]
class Laureates(BaseModel):
laureates: List[Laureate]
# Load data
with open("laureate.json") as f:
datax = json.load(f)
laureates_data = Laureates(**datax)
# Try filtering by filed values
filtered = filter(lambda l: "Albert" in l.firstname, laureates_data.laureates)
print("\n".join(["%s %s" % (l.firstname, l.surname) for l in filtered]))
# Write data to file
with open('output.json', 'w') as f:
f.write(laureates_data.json())
# Write json schema to file
with open('output_schema.json', 'w') as f:
f.write(laureates_data.schema_json(indent=2))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment