Skip to content

Instantly share code, notes, and snippets.

@charanhu
Created December 27, 2021 11:39
Show Gist options
  • Save charanhu/aa62af6fb73e737c46c291de02458a74 to your computer and use it in GitHub Desktop.
Save charanhu/aa62af6fb73e737c46c291de02458a74 to your computer and use it in GitHub Desktop.
# credits : https://www.kaggle.com/julian3833/1-quick-start-read-csv-and-flatten-json-fields
def load_df(csv_path, nrows=None):
'''Just loads the data and flattens the json fields. returns the data frames by converting json files into data frames'''
JSON_COLUMNS = ['device', 'geoNetwork', 'totals', 'trafficSource'] # we are definig a list of json column names
df = pd.read_csv(csv_path,
converters={column: json.loads for column in JSON_COLUMNS}, # It will create JSON object for every json column
dtype={'fullVisitorId': 'str'}, # we are considering 'fullvisitor id as string'
nrows=nrows)
for column in tqdm(JSON_COLUMNS):
column_as_df = json_normalize(df[column]) # json_normalize will return a flatten dataframe of json columns
column_as_df.columns = ["{0}.{1}".format(column, subcolumn) for subcolumn in column_as_df.columns] # we are taking column names
df = df.drop(column, axis=1).merge(column_as_df, right_index=True, left_index=True) # we are dropping json column and merging data frame with parsed columns
print("Loaded {0}. Shape: {1}".format(os.path.basename(csv_path), df.shape))
return df
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment