Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save davidbradway/61154ea21354b5935a0429a39fdf40c7 to your computer and use it in GitHub Desktop.
Save davidbradway/61154ea21354b5935a0429a39fdf40c7 to your computer and use it in GitHub Desktop.
# conda create -n pd python
# (source) conda activate pd
# conda install pandas
# conda install matplotlib
# remove pd environment
import json
import os
import datetime as dt
import pandas as pd
from matplotlib import pyplot as plt
car_trips = []
when = []
bike_trips = []
when_bike = []
dirs = os.listdir(os.path.join('Location History', 'Semantic Location History'))
for directory in dirs:
files = os.listdir(os.path.join('Location History', 'Semantic Location History', directory))
for filename in files:
try:
if filename.endswith('.json'):
full_filename = os.path.join('Location History', 'Semantic Location History', directory, filename)
with open(full_filename) as f:
data = json.load(f)
for i in range(len(data['timelineObjects'])):
temp = data['timelineObjects'][i].get('activitySegment')
if temp is not None and temp.get('activityType') is not None and temp.get('activityType') == 'IN_PASSENGER_VEHICLE':
car_trips.append(temp.get('distance')/1.6/1000.)
when.append(temp.get('duration')['startTimestampMs'])
if temp is not None and temp.get('activityType') is not None and temp.get('activityType') == 'CYCLING':
bike_trips.append(temp.get('distance')/1.6/1000.)
when_bike.append(temp.get('duration')['startTimestampMs'])
except Exception as e: # work on python 3.x
print(str(e))
df = pd.DataFrame()
df['Miles'] = car_trips
df['timestampMs'] = pd.DataFrame(when).astype(float)/1000.
df['datetime'] = df['timestampMs'].apply(lambda t: dt.datetime.fromtimestamp(t))
df1 = pd.DataFrame()
df1['Daily Miles'] = df['Miles'].groupby(df['datetime'].dt.to_period('D')).sum()
df1.hist(bins=50)
plt.xlabel('Miles Driven per Day')
plt.ylabel('Number of Days')
plt.title('Histogram of Miles Driven per Day')
plt.axvline(df1['Daily Miles'].median(), color='y', linestyle='dashed', linewidth=1)
plt.show()
df1['Daily Miles'].median()
df2 = pd.DataFrame()
df2['Miles'] = bike_trips
df2['timestampMs'] = pd.DataFrame(when_bike).astype(float)/1000.
df2['datetime'] = df2['timestampMs'].apply(lambda t: dt.datetime.fromtimestamp(t))
df3 = pd.DataFrame()
df3['Daily Miles'] = df2['Miles'].groupby(df2['datetime'].dt.to_period('D')).sum()
df3.hist(bins=50)
plt.xlabel('Miles Biked per Day')
plt.ylabel('Number of Days')
plt.title('Histogram of Miles Biked per Day')
plt.axvline(df3['Daily Miles'].median(), color='y', linestyle='dashed', linewidth=1)
plt.show()
df3['Daily Miles'].median()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment