Created
August 13, 2022 19:33
-
-
Save holysheep/be3267c673b9d1463d5e82954e3754dd to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import time | |
import urllib.request | |
import pandas as pd | |
# Option 1 with urlload | |
start0 = time.time() | |
json_objects = [] | |
for json_object in urllib.request.urlopen("https://storage.googleapis.com/xcc-de-assessment/events.json"): | |
json_objects.append(json.loads(json_object)) | |
pd.set_option('display.expand_frame_repr', False) | |
df = pd.json_normalize(json_objects) | |
print("Duration of Option 1 is: {} seconds".format(time.time() - start0)) | |
# Option 2 with pandas read_json | |
start1 = time.time() | |
dataframe = pd.read_json("https://storage.googleapis.com/xcc-de-assessment/events.json", lines=True) | |
pd.set_option('display.expand_frame_repr', False) | |
event_values = pd.DataFrame(dataframe['event'].values.tolist()) | |
df1 = dataframe.drop(columns=['event']) | |
df2 = event_values.add_prefix('event.') | |
dataframe = df1.join(df2) | |
print("Duration of Option 2 is: {} seconds".format((time.time() - start1))) | |
# Result: | |
# Duration of Option 1 is: 24.125062465667725 seconds | |
# Duration of Option 2 is: 24.810150384902954 seconds |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment