Nick Evershed nickjevershed

## pandas-cheatsheat.py
#calculate percentage of grouped items

df.groupby(level=0).apply(lambda x: 100*x/float(x.sum()))

#groupby week, assuming day/month/year date order

df = pd.read_csv('values.csv', parse_dates='date', dayfirst='true', index_col='date')
df.resample('w', how='count').to_csv('values.csv')

#transpose a column series to column headers

## margaret-and-david-reviews.csv
user_score,genre1,genre2,studio,title,url,titles_match,year,rtId,indexNo,director,old_title,mpaa_rating,critic_score,lead_actor2,lead_actor1,title-orig,davScore,margScore,dav-marg,combined-score,reviewed-at,url,ratio
93,Drama,Romance,na,Fireworks (Hana-bi),http://www.rottentomatoes.com/m/1091981-fireworks/,no,1997,19686,2366,Takeshi Kitano,Hana-Bi,R,95,Kayoko Kishimoto,Takeshi Kitano,Hana-Bi,5,5,0,10,SBS,,58
76,Drama,Romance,Indiepix,Samson and Delilah,http://www.rottentomatoes.com/m/10011295-samson_and_delilah/,no,2010,770808521,57,Warwick Thornton,Samson & Delilah,Unrated,94,Marissa Gibson,Rowan McNamara,Samson & Delilah,5,5,0,10,ABC,http://www.abc.net.au/atthemovies/txt/s2542612.htm,88
83,Drama,,Warner Independent Pictures,"Good Night, And Good Luck",http://www.rottentomatoes.com/m/1152019-good_night_and_good_luck/,no,2005,8572,55,George Clooney,"Good Night, and Good Luck",PG,93,Patricia Clarkson,David Strathairn,"Good Night, and Good Luck",5,5,0,10,ABC,http://www.abc.net.au/atthemovies/txt/s1532953.htm,10

## google-sheets-json.py
import simplejson as json
import requests

#your spreadsheet key here. I'm using an example from the Victorian election campaign

key = "1THJ6MgfEk-1egiPFeDuvs4qEi02xTpz4fq9RtO7GijQ"

#google api request urls - I'm doing the first one just to get nice key values (there's probably a better way to do this)

url1 = "https://spreadsheets.google.com/feeds/cells/" + key + "/od6/public/values?alt=json"

## pollies.py
#!/usr/bin/env python
#coding=utf-8

import csv

coalition = ["Chris Pearce","Petro Georgiou","Fran Bailey","Danna Vale","Michael Johnson","Margaret May","Julian McGauran","Mary Jo Fisher","David Hawker","Pat Farmer","Wilson Tuckey","Peter Lindsay","Joanna Gash","Judith Troeth","Alex Somlyay","Alexander Somlyay","Nick Minchin","Nigel Scullion","Alan Ferguson","Russell Trood","Guy Barnett","Chris Back","Eric Abetz","Judith Adams"," Chris Back","Cory Bernardi","Simon Birmingham","Ron Boswell","Ronald Boswell","Sue Boyce","George Brandis","David Bushby","Michaelia Cash","Richard Colbeck","Helen Coonan","Mathias Cormann","Sean Edwards","Alan Eggleston","David Fawcett","Concetta Fierravanti-Wells","Mitch Fifield","Mary Jo Fisher","Bill Heffernan","Gary Humphries","David Johnston","Barnaby Joyce","Helen Kroger","Ian Macdonald","Bridget McKenzie","Brett Mason","Fiona Nash","Marise Payne","Stephen Parry","Michael Ronaldson","Anne Ruston","Scott Ryan","Arthur Sinodinos","Dean Smith","John Williams","Patrick Secker","B

## immi-contracts-type.py
import csv
import re

fList = ['client','detention','detain','manus','nauru','cocos','keeling','christmas','refugee','unaccompanied','humanitarian','minor','staff accomodation','curtin','villawood','scherger','inverbrackie','derby','construction camp','ita','idc','apod','irh','darwin airport','berrimah','bladin','wickham','phosphate','aqua','lilac','maribyrnong','inverbrackie','serco','transfield','g4s','gsl','toll']

nList = ['pontville','CI','weipa','regional','processing','IMA','tamil','farsi','afghanistan','screening','woomera','yongah']

offshore = ['manus', 'nauru']

onshore = ['cocos', 'coco', 'christmas', 'CI', 'phosphate', 'aqua', 'lilac','curtin', 'CIDC', 'villawood', 'VIDC', 'sydney irh', 'woomera', 'WIDC', 'scherger', 'SIDC', 'perth', 'PIDC', 'PIRH', 'yongah', 'maribyrnong', 'MIDC', 'melbourne ITA', 'MITA', 'adelaide ITA', 'AITA', 'inverbrackie', 'brisbane ita', 'BITA', 'DIMA', 'bladin', 'berrimah', 'wickham', 'darwin airport', 'weipa', 'derby', 'NIDC', 'northern immigration detention centre', 'd

## immi-contracts.py
import csv
import re

fList = ['client','detention','detain','manus','nauru','cocos','keeling','christmas','refugee','unaccompanied','humanitarian','minor','staff accomodation','curtin','villawood','scherger','inverbrackie','derby','construction camp','ita','idc','apod','irh','darwin airport','berrimah','bladin','wickham','phosphate','aqua','lilac','maribyrnong','inverbrackie','serco','transfield','g4s','gsl','toll']

nList = ['pontville','CI','weipa','regional','processing','IMA','tamil','farsi','afghanistan','screening','woomera','yongah']

with open('immigration-contracts.csv','rU') as csvinput:
	with open('output.csv', 'w') as csvoutput:
		writer = csv.writer(csvoutput, lineterminator='\n')

## detention-centres.csv

          
            name
            centre
            latitude
            longitude

            
              Brisbane ITA
              Brisbane_ITA
              -27.401747
              153.104782

            
              Curtin IDC
              Curtin_IDC
              -17.38101
              123.677216

            
              Maribyrnong IDC
              Maribyrnong_IDC
              -37.780035
              144.880142

            
              Northern IDC
              Northern_IDC
              -12.425709
              130.900211

            
              Perth IDC
              Perth_IDC
              -31.934562
              115.958118

            
              Scherger IDC
              Scherger_IDC
              -12.633869
              141.888428

            
              Villawood IDC
              Villawood_IDC
              -33.878279
              150.987339

            
              Christmas Island
              Christmas_Island
              -10.488044
              105.611572

            
              Melbourne ITA
              Melbourne_ITA
              -37.841807
              144.952068

## expenses-pdf-scraper.py
#!/usr/bin/env python
import scraperwiki
import urllib2
import lxml.etree

urls = ["http://www.finance.gov.au/sites/default/files//sites/default/files/P33_ABBOTT_Tony.pdf","http://www.finance.gov.au/sites/default/files//sites/default/files/P33_ALBANESE_Anthony.pdf","http://www.finance.gov.au/sites/default/files//sites/default/files/P33_ALEXANDER_John.pdf","http://www.finance.gov.au/sites/default/files//sites/default/files/P33_BALDWIN_Bob.pdf","http://www.finance.gov.au/sites/default/files//sites/default/files/P33_BIRD_Sharon.pdf","http://www.finance.gov.au/sites/default/files//sites/default/files/P33_BISHOP_Bronwyn.pdf","http://www.finance.gov.au/sites/default/files//sites/default/files/P33_BOWEN_Chris.pdf","http://www.finance.gov.au/sites/default/files//sites/default/files/P33_BRADBURY_David.pdf","http://www.finance.gov.au/sites/default/files//sites/default/files/P33_BURKE_Tony.pdf","http://www.finance.gov.au/sites/default/files//sites/default/files/P33_CAMERON_Doug.pdf","http://www.finance.gov.au/sites/defa

## blah.py
import simplejson as json

with open("blah.json") as f:
	blah = json.load(f)

	print blah['features'][0]['properties']['headline']

## parse_captcha.py
import sys
import os
import re
import subprocess
import tempfile
from PIL import Image


def parse_captcha(filename):
    """Return the text for thie image using Tesseract
	#calculate percentage of grouped items

	df.groupby(level=0).apply(lambda x: 100*x/float(x.sum()))

	#groupby week, assuming day/month/year date order

	df = pd.read_csv('values.csv', parse_dates='date', dayfirst='true', index_col='date')
	df.resample('w', how='count').to_csv('values.csv')

	#transpose a column series to column headers
	user_score,genre1,genre2,studio,title,url,titles_match,year,rtId,indexNo,director,old_title,mpaa_rating,critic_score,lead_actor2,lead_actor1,title-orig,davScore,margScore,dav-marg,combined-score,reviewed-at,url,ratio
	93,Drama,Romance,na,Fireworks (Hana-bi),http://www.rottentomatoes.com/m/1091981-fireworks/,no,1997,19686,2366,Takeshi Kitano,Hana-Bi,R,95,Kayoko Kishimoto,Takeshi Kitano,Hana-Bi,5,5,0,10,SBS,,58
	76,Drama,Romance,Indiepix,Samson and Delilah,http://www.rottentomatoes.com/m/10011295-samson_and_delilah/,no,2010,770808521,57,Warwick Thornton,Samson & Delilah,Unrated,94,Marissa Gibson,Rowan McNamara,Samson & Delilah,5,5,0,10,ABC,http://www.abc.net.au/atthemovies/txt/s2542612.htm,88
	83,Drama,,Warner Independent Pictures,"Good Night, And Good Luck",http://www.rottentomatoes.com/m/1152019-good_night_and_good_luck/,no,2005,8572,55,George Clooney,"Good Night, and Good Luck",PG,93,Patricia Clarkson,David Strathairn,"Good Night, and Good Luck",5,5,0,10,ABC,http://www.abc.net.au/atthemovies/txt/s1532953.htm,10
	import simplejson as json
	import requests

	#your spreadsheet key here. I'm using an example from the Victorian election campaign

	key = "1THJ6MgfEk-1egiPFeDuvs4qEi02xTpz4fq9RtO7GijQ"

	#google api request urls - I'm doing the first one just to get nice key values (there's probably a better way to do this)

	url1 = "https://spreadsheets.google.com/feeds/cells/" + key + "/od6/public/values?alt=json"
	#!/usr/bin/env python
	#coding=utf-8

	import csv

	coalition = ["Chris Pearce","Petro Georgiou","Fran Bailey","Danna Vale","Michael Johnson","Margaret May","Julian McGauran","Mary Jo Fisher","David Hawker","Pat Farmer","Wilson Tuckey","Peter Lindsay","Joanna Gash","Judith Troeth","Alex Somlyay","Alexander Somlyay","Nick Minchin","Nigel Scullion","Alan Ferguson","Russell Trood","Guy Barnett","Chris Back","Eric Abetz","Judith Adams"," Chris Back","Cory Bernardi","Simon Birmingham","Ron Boswell","Ronald Boswell","Sue Boyce","George Brandis","David Bushby","Michaelia Cash","Richard Colbeck","Helen Coonan","Mathias Cormann","Sean Edwards","Alan Eggleston","David Fawcett","Concetta Fierravanti-Wells","Mitch Fifield","Mary Jo Fisher","Bill Heffernan","Gary Humphries","David Johnston","Barnaby Joyce","Helen Kroger","Ian Macdonald","Bridget McKenzie","Brett Mason","Fiona Nash","Marise Payne","Stephen Parry","Michael Ronaldson","Anne Ruston","Scott Ryan","Arthur Sinodinos","Dean Smith","John Williams","Patrick Secker","B
	import csv
	import re

	fList = ['client','detention','detain','manus','nauru','cocos','keeling','christmas','refugee','unaccompanied','humanitarian','minor','staff accomodation','curtin','villawood','scherger','inverbrackie','derby','construction camp','ita','idc','apod','irh','darwin airport','berrimah','bladin','wickham','phosphate','aqua','lilac','maribyrnong','inverbrackie','serco','transfield','g4s','gsl','toll']

	nList = ['pontville','CI','weipa','regional','processing','IMA','tamil','farsi','afghanistan','screening','woomera','yongah']

	offshore = ['manus', 'nauru']

	onshore = ['cocos', 'coco', 'christmas', 'CI', 'phosphate', 'aqua', 'lilac','curtin', 'CIDC', 'villawood', 'VIDC', 'sydney irh', 'woomera', 'WIDC', 'scherger', 'SIDC', 'perth', 'PIDC', 'PIRH', 'yongah', 'maribyrnong', 'MIDC', 'melbourne ITA', 'MITA', 'adelaide ITA', 'AITA', 'inverbrackie', 'brisbane ita', 'BITA', 'DIMA', 'bladin', 'berrimah', 'wickham', 'darwin airport', 'weipa', 'derby', 'NIDC', 'northern immigration detention centre', 'd
name	centre	latitude	longitude
Brisbane ITA	Brisbane_ITA	-27.401747	153.104782
Curtin IDC	Curtin_IDC	-17.38101	123.677216
Maribyrnong IDC	Maribyrnong_IDC	-37.780035	144.880142
Northern IDC	Northern_IDC	-12.425709	130.900211
Perth IDC	Perth_IDC	-31.934562	115.958118
Scherger IDC	Scherger_IDC	-12.633869	141.888428
Villawood IDC	Villawood_IDC	-33.878279	150.987339
Christmas Island	Christmas_Island	-10.488044	105.611572
Melbourne ITA	Melbourne_ITA	-37.841807	144.952068
	#!/usr/bin/env python
	import scraperwiki
	import urllib2
	import lxml.etree

	urls = ["http://www.finance.gov.au/sites/default/files//sites/default/files/P33_ABBOTT_Tony.pdf","http://www.finance.gov.au/sites/default/files//sites/default/files/P33_ALBANESE_Anthony.pdf","http://www.finance.gov.au/sites/default/files//sites/default/files/P33_ALEXANDER_John.pdf","http://www.finance.gov.au/sites/default/files//sites/default/files/P33_BALDWIN_Bob.pdf","http://www.finance.gov.au/sites/default/files//sites/default/files/P33_BIRD_Sharon.pdf","http://www.finance.gov.au/sites/default/files//sites/default/files/P33_BISHOP_Bronwyn.pdf","http://www.finance.gov.au/sites/default/files//sites/default/files/P33_BOWEN_Chris.pdf","http://www.finance.gov.au/sites/default/files//sites/default/files/P33_BRADBURY_David.pdf","http://www.finance.gov.au/sites/default/files//sites/default/files/P33_BURKE_Tony.pdf","http://www.finance.gov.au/sites/default/files//sites/default/files/P33_CAMERON_Doug.pdf","http://www.finance.gov.au/sites/defa
	import simplejson as json

	with open("blah.json") as f:
	blah = json.load(f)

	print blah['features'][0]['properties']['headline']
	import sys
	import os
	import re
	import subprocess
	import tempfile
	from PIL import Image


	def parse_captcha(filename):
	"""Return the text for thie image using Tesseract