charanhu/json_normalize.py

## json_normalize.py
# credits : https://www.kaggle.com/julian3833/1-quick-start-read-csv-and-flatten-json-fields

def load_df(csv_path, nrows=None):
    '''Just loads the data and flattens the json fields. returns the data frames by converting json files into data frames'''

    JSON_COLUMNS = ['device', 'geoNetwork', 'totals', 'trafficSource'] # we are definig a list of json column names

    df = pd.read_csv(csv_path,
                     converters={column: json.loads for column in JSON_COLUMNS}, # It will create JSON object for every json column
                     dtype={'fullVisitorId': 'str'},  # we are considering 'fullvisitor id as string'
                     nrows=nrows)

    for column in tqdm(JSON_COLUMNS):

        column_as_df = json_normalize(df[column]) # json_normalize will return a flatten dataframe of json columns

        column_as_df.columns = ["{0}.{1}".format(column, subcolumn) for subcolumn in column_as_df.columns] # we are taking column names

        df = df.drop(column, axis=1).merge(column_as_df, right_index=True, left_index=True) # we are dropping json column and merging data frame with parsed columns

    print("Loaded {0}. Shape: {1}".format(os.path.basename(csv_path), df.shape))

    return df
	# credits : https://www.kaggle.com/julian3833/1-quick-start-read-csv-and-flatten-json-fields

	def load_df(csv_path, nrows=None):
	'''Just loads the data and flattens the json fields. returns the data frames by converting json files into data frames'''

	JSON_COLUMNS = ['device', 'geoNetwork', 'totals', 'trafficSource'] # we are definig a list of json column names

	df = pd.read_csv(csv_path,
	converters={column: json.loads for column in JSON_COLUMNS}, # It will create JSON object for every json column
	dtype={'fullVisitorId': 'str'}, # we are considering 'fullvisitor id as string'
	nrows=nrows)

	for column in tqdm(JSON_COLUMNS):

	column_as_df = json_normalize(df[column]) # json_normalize will return a flatten dataframe of json columns

	column_as_df.columns = ["{0}.{1}".format(column, subcolumn) for subcolumn in column_as_df.columns] # we are taking column names

	df = df.drop(column, axis=1).merge(column_as_df, right_index=True, left_index=True) # we are dropping json column and merging data frame with parsed columns

	print("Loaded {0}. Shape: {1}".format(os.path.basename(csv_path), df.shape))

	return df