Skip to content

Instantly share code, notes, and snippets.

@htahir1
Created March 10, 2019 15:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save htahir1/37480d4902d8c2f0032146016fa8a9bc to your computer and use it in GitHub Desktop.
Save htahir1/37480d4902d8c2f0032146016fa8a9bc to your computer and use it in GitHub Desktop.
Comparison of loading JSON into pandas dataframe
import pandas as pd
from time import time
from pprint import pprint
import simplejson
import json
import ujson
def _loads(data, serializer):
t_start = time()
serialized_data = serializer.loads(data)
t_serialization = time() - t_start
df = pd.DataFrame.from_dict(serialized_data)
t_end = time() - t_start
return {
"duration": t_end,
"columns": df.columns,
"shape": df.shape,
"serialization": t_serialization
}
with open("big.json", "r") as infile:
data = infile.read()
for method in [simplejson, json, ujson]:
result = _loads(data, method)
print "method: " + method.__name__
print "total: " + str(result["duration"])
print "serialization: " + str(result["serialization"])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment