Skip to content

Instantly share code, notes, and snippets.

@nickjevershed
Created November 7, 2019 03:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nickjevershed/79d1034d919897a9986d402dd96936e7 to your computer and use it in GitHub Desktop.
Save nickjevershed/79d1034d919897a9986d402dd96936e7 to your computer and use it in GitHub Desktop.
#%%
import pandas as pd
import simplejson as json
from datetime import datetime
#%%
with open('1R3zG-DJRqN7MFLGqjhZs26D1SJcKI7pkd1j1XDwAiIM.json') as birdJson:
birdsInfo = json.load(birdJson)['sheets']['Sheet1']
newInfo = {}
for bird in birdsInfo:
birdIndex = 'bird' + bird['id']
newInfo[birdIndex] = {"name":bird['name'], "image": "https://interactive.guim.co.uk/embed/aus/2019/bird-pics/" + bird['img']}
df = pd.read_csv("results.csv")
df['time'] = pd.to_datetime(df['last_modified_time'],unit='ms')
df['time'] = df['time'].dt.tz_localize('UTC').dt.tz_convert('Australia/Sydney')
df = df[df['time'] >= '2019-10-28']
df = df[df['counted'] == 'valid']
def addBird(row):
data = json.loads(row['data'])
index = "bird" + str(data['iid'])
if index in newInfo:
return newInfo[index]['name']
def addImage(row):
data = json.loads(row['data'])
index = "bird" + str(data['iid'])
if index in newInfo:
return newInfo[index]['image']
df['name'] = df.apply(addBird, axis=1)
df['img'] = df.apply(addImage, axis=1)
df['count'] = 1
#%%
#test = df[(df['name'] == "Short-tailed shearwater (muttonbird)") | (df['name'] == "Australian magpie")]
df_10 = df[['name','img','time','count']].groupby(['name','img']).resample('h', on='time').sum().reset_index()
test = df_10[df_10['name'] == 'Rainbow lorikeet']
# df_10 = df[['name','time','count']].resample('h', on='time').sum().reset_index()
#%%
df_10['cumulative'] = df_10.groupby(['name'])['count'].apply(lambda x: x.cumsum())
#%%
test = df_10[df_10['name'] == "Short-tailed shearwater (muttonbird)"]
#%%
#df_10['time'] = df_10['time'].dt.strftime('%H:%M %d %b')
df_10.to_csv('top-ten.csv')
#%%
pvt = df_10.pivot_table(index=['name','img'], columns='time', values='cumulative').reset_index()
#newCols = pvt.columns
#
#for col in newCols:
# print(col)
mapper = lambda x: x.strftime('%H:%M %d %b') if isinstance(x, datetime) else x
pvt.columns = pvt.columns.map(mapper)
pvt.to_csv('pivot.csv')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment