Created
December 27, 2021 11:39
-
-
Save charanhu/aa62af6fb73e737c46c291de02458a74 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# credits : https://www.kaggle.com/julian3833/1-quick-start-read-csv-and-flatten-json-fields | |
def load_df(csv_path, nrows=None): | |
'''Just loads the data and flattens the json fields. returns the data frames by converting json files into data frames''' | |
JSON_COLUMNS = ['device', 'geoNetwork', 'totals', 'trafficSource'] # we are definig a list of json column names | |
df = pd.read_csv(csv_path, | |
converters={column: json.loads for column in JSON_COLUMNS}, # It will create JSON object for every json column | |
dtype={'fullVisitorId': 'str'}, # we are considering 'fullvisitor id as string' | |
nrows=nrows) | |
for column in tqdm(JSON_COLUMNS): | |
column_as_df = json_normalize(df[column]) # json_normalize will return a flatten dataframe of json columns | |
column_as_df.columns = ["{0}.{1}".format(column, subcolumn) for subcolumn in column_as_df.columns] # we are taking column names | |
df = df.drop(column, axis=1).merge(column_as_df, right_index=True, left_index=True) # we are dropping json column and merging data frame with parsed columns | |
print("Loaded {0}. Shape: {1}".format(os.path.basename(csv_path), df.shape)) | |
return df |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment