Skip to content

Instantly share code, notes, and snippets.

@holysheep
Created August 13, 2022 19:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save holysheep/be3267c673b9d1463d5e82954e3754dd to your computer and use it in GitHub Desktop.
Save holysheep/be3267c673b9d1463d5e82954e3754dd to your computer and use it in GitHub Desktop.
import json
import time
import urllib.request
import pandas as pd
# Option 1 with urlload
start0 = time.time()
json_objects = []
for json_object in urllib.request.urlopen("https://storage.googleapis.com/xcc-de-assessment/events.json"):
json_objects.append(json.loads(json_object))
pd.set_option('display.expand_frame_repr', False)
df = pd.json_normalize(json_objects)
print("Duration of Option 1 is: {} seconds".format(time.time() - start0))
# Option 2 with pandas read_json
start1 = time.time()
dataframe = pd.read_json("https://storage.googleapis.com/xcc-de-assessment/events.json", lines=True)
pd.set_option('display.expand_frame_repr', False)
event_values = pd.DataFrame(dataframe['event'].values.tolist())
df1 = dataframe.drop(columns=['event'])
df2 = event_values.add_prefix('event.')
dataframe = df1.join(df2)
print("Duration of Option 2 is: {} seconds".format((time.time() - start1)))
# Result:
# Duration of Option 1 is: 24.125062465667725 seconds
# Duration of Option 2 is: 24.810150384902954 seconds
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment