Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Keeping Pandas DataFrames clean when importing JSON
from pandas.io.json import json_normalize
df = json_normalize(data)
class DataFrameFromDict(object):
"""
Temporarily imports data frame columns and deletes them afterwards.
"""
def __init__(self, data):
self.df = json_normalize(data)
self.columns = list(self.df.columns.values)
def __enter__(self):
return self.df
def __exit__(self, exc_type, exc_val, exc_tb):
self.df.drop([c for c in self.columns], axis=1, inplace=True)
from pandas.io.json import json_normalize
df = json_normalize(data)
// make temporary columns
df.columns = ['temp_' + c for c in df.columns]
// pre-processing, basic calculations, etc.
df['company_id'] = df['temp_companyId']
df['location'] = df['temp_properties.city.value']
df['name'] = df['temp_properties.name.value']
df['domain'] = df['temp_properties.website.value']
//... .apply(), .as_type(int), whatever...
df.drop([c for c in df.columns if c.startswith('temp_')], axis=1, inplace=True)
// or
df = df[[c for c in df.columns if not c.startswith('temp_')]]
from pandas.io.json import json_normalize
df = json_normalize(data)
df['company_id'] = df['companyId']
df['location'] = df['properties.city.value']
df['name'] = df['properties.name.value']
df['domain'] = df['properties.website.value']
//... .apply(), .as_type(int), whatever...
with DataFrameFromDict(companies) as df:
// imported dict now in df, same result as json_normalize
df['company_id'] = df['companyId']
df['location'] = df['properties.city.value']
df['name'] = df['properties.name.value']
df['domain'] = df['properties.website.value']
// after context exits, df contains company_id, location, name, and domain
// but no more temporary columns
print(df)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment