nickjevershed/process.py

## process.py
#%%

import pandas as pd
import simplejson as json
from datetime import datetime

#%%

with open('1R3zG-DJRqN7MFLGqjhZs26D1SJcKI7pkd1j1XDwAiIM.json') as birdJson:
	birdsInfo = json.load(birdJson)['sheets']['Sheet1']

newInfo = {}

for bird in birdsInfo:
	birdIndex = 'bird' + bird['id']
	newInfo[birdIndex] = {"name":bird['name'], "image": "https://interactive.guim.co.uk/embed/aus/2019/bird-pics/" + bird['img']}

df = pd.read_csv("results.csv")

df['time'] = pd.to_datetime(df['last_modified_time'],unit='ms')

df['time'] = df['time'].dt.tz_localize('UTC').dt.tz_convert('Australia/Sydney')

df = df[df['time'] >= '2019-10-28']
df = df[df['counted'] == 'valid']

def addBird(row):
    data = json.loads(row['data'])
    index = "bird" + str(data['iid'])
    if index in newInfo:
        return newInfo[index]['name']

def addImage(row):
    data = json.loads(row['data'])
    index = "bird" + str(data['iid'])
    if index in newInfo:
        return newInfo[index]['image']

df['name'] = df.apply(addBird, axis=1)
df['img'] = df.apply(addImage, axis=1)
df['count'] = 1


#%%

#test = df[(df['name'] == "Short-tailed shearwater (muttonbird)") | (df['name'] == "Australian magpie")]

df_10 = df[['name','img','time','count']].groupby(['name','img']).resample('h', on='time').sum().reset_index()

test = df_10[df_10['name'] == 'Rainbow lorikeet']

# df_10 = df[['name','time','count']].resample('h', on='time').sum().reset_index()

#%%

df_10['cumulative'] = df_10.groupby(['name'])['count'].apply(lambda x: x.cumsum())

#%%

test = df_10[df_10['name'] == "Short-tailed shearwater (muttonbird)"]

#%%

#df_10['time'] = df_10['time'].dt.strftime('%H:%M %d %b')
df_10.to_csv('top-ten.csv')

#%%

pvt = df_10.pivot_table(index=['name','img'], columns='time', values='cumulative').reset_index()

#newCols = pvt.columns
#
#for col in newCols:
#    print(col)

mapper = lambda x: x.strftime('%H:%M %d %b') if isinstance(x, datetime) else x
pvt.columns = pvt.columns.map(mapper)

pvt.to_csv('pivot.csv')
	#%%

	import pandas as pd
	import simplejson as json
	from datetime import datetime

	#%%

	with open('1R3zG-DJRqN7MFLGqjhZs26D1SJcKI7pkd1j1XDwAiIM.json') as birdJson:
	birdsInfo = json.load(birdJson)['sheets']['Sheet1']

	newInfo = {}

	for bird in birdsInfo:
	birdIndex = 'bird' + bird['id']
	newInfo[birdIndex] = {"name":bird['name'], "image": "https://interactive.guim.co.uk/embed/aus/2019/bird-pics/" + bird['img']}

	df = pd.read_csv("results.csv")

	df['time'] = pd.to_datetime(df['last_modified_time'],unit='ms')

	df['time'] = df['time'].dt.tz_localize('UTC').dt.tz_convert('Australia/Sydney')

	df = df[df['time'] >= '2019-10-28']
	df = df[df['counted'] == 'valid']

	def addBird(row):
	data = json.loads(row['data'])
	index = "bird" + str(data['iid'])
	if index in newInfo:
	return newInfo[index]['name']

	def addImage(row):
	data = json.loads(row['data'])
	index = "bird" + str(data['iid'])
	if index in newInfo:
	return newInfo[index]['image']

	df['name'] = df.apply(addBird, axis=1)
	df['img'] = df.apply(addImage, axis=1)
	df['count'] = 1


	#%%

	#test = df[(df['name'] == "Short-tailed shearwater (muttonbird)") \| (df['name'] == "Australian magpie")]

	df_10 = df[['name','img','time','count']].groupby(['name','img']).resample('h', on='time').sum().reset_index()

	test = df_10[df_10['name'] == 'Rainbow lorikeet']

	# df_10 = df[['name','time','count']].resample('h', on='time').sum().reset_index()

	#%%

	df_10['cumulative'] = df_10.groupby(['name'])['count'].apply(lambda x: x.cumsum())

	#%%

	test = df_10[df_10['name'] == "Short-tailed shearwater (muttonbird)"]

	#%%

	#df_10['time'] = df_10['time'].dt.strftime('%H:%M %d %b')
	df_10.to_csv('top-ten.csv')

	#%%

	pvt = df_10.pivot_table(index=['name','img'], columns='time', values='cumulative').reset_index()

	#newCols = pvt.columns
	#
	#for col in newCols:
	# print(col)

	mapper = lambda x: x.strftime('%H:%M %d %b') if isinstance(x, datetime) else x
	pvt.columns = pvt.columns.map(mapper)

	pvt.to_csv('pivot.csv')