Skip to content

Instantly share code, notes, and snippets.

@nickjevershed
nickjevershed / pandas-cheatsheat.py
Last active April 25, 2017 00:19
PANDAS cheatsheat
#calculate percentage of grouped items
df.groupby(level=0).apply(lambda x: 100*x/float(x.sum()))
#groupby week, assuming day/month/year date order
df = pd.read_csv('values.csv', parse_dates='date', dayfirst='true', index_col='date')
df.resample('w', how='count').to_csv('values.csv')
#transpose a column series to column headers
@nickjevershed
nickjevershed / margaret-and-david-reviews.csv
Last active August 29, 2015 14:10
3206 movie review scores from At the Movies/The Movie Show
We can't make this file beautiful and searchable because it's too large.
user_score,genre1,genre2,studio,title,url,titles_match,year,rtId,indexNo,director,old_title,mpaa_rating,critic_score,lead_actor2,lead_actor1,title-orig,davScore,margScore,dav-marg,combined-score,reviewed-at,url,ratio
93,Drama,Romance,na,Fireworks (Hana-bi),http://www.rottentomatoes.com/m/1091981-fireworks/,no,1997,19686,2366,Takeshi Kitano,Hana-Bi,R,95,Kayoko Kishimoto,Takeshi Kitano,Hana-Bi,5,5,0,10,SBS,,58
76,Drama,Romance,Indiepix,Samson and Delilah,http://www.rottentomatoes.com/m/10011295-samson_and_delilah/,no,2010,770808521,57,Warwick Thornton,Samson & Delilah,Unrated,94,Marissa Gibson,Rowan McNamara,Samson & Delilah,5,5,0,10,ABC,http://www.abc.net.au/atthemovies/txt/s2542612.htm,88
83,Drama,,Warner Independent Pictures,"Good Night, And Good Luck",http://www.rottentomatoes.com/m/1152019-good_night_and_good_luck/,no,2005,8572,55,George Clooney,"Good Night, and Good Luck",PG,93,Patricia Clarkson,David Strathairn,"Good Night, and Good Luck",5,5,0,10,ABC,http://www.abc.net.au/atthemovies/txt/s1532953.htm,10
@nickjevershed
nickjevershed / google-sheets-json.py
Last active October 7, 2023 03:18
Python script to convert Google spreadsheets to simple JSON file and save it locally. Assumes your data is on the left-most sheet, ie the default. Spreadsheet needs to be 'published to the web'.
import simplejson as json
import requests
#your spreadsheet key here. I'm using an example from the Victorian election campaign
key = "1THJ6MgfEk-1egiPFeDuvs4qEi02xTpz4fq9RtO7GijQ"
#google api request urls - I'm doing the first one just to get nice key values (there's probably a better way to do this)
url1 = "https://spreadsheets.google.com/feeds/cells/" + key + "/od6/public/values?alt=json"
@nickjevershed
nickjevershed / pollies.py
Created August 29, 2014 01:05
Assigns parties to politicians. Takes a csv as input file, assumes names are in the first column
#!/usr/bin/env python
#coding=utf-8
import csv
coalition = ["Chris Pearce","Petro Georgiou","Fran Bailey","Danna Vale","Michael Johnson","Margaret May","Julian McGauran","Mary Jo Fisher","David Hawker","Pat Farmer","Wilson Tuckey","Peter Lindsay","Joanna Gash","Judith Troeth","Alex Somlyay","Alexander Somlyay","Nick Minchin","Nigel Scullion","Alan Ferguson","Russell Trood","Guy Barnett","Chris Back","Eric Abetz","Judith Adams"," Chris Back","Cory Bernardi","Simon Birmingham","Ron Boswell","Ronald Boswell","Sue Boyce","George Brandis","David Bushby","Michaelia Cash","Richard Colbeck","Helen Coonan","Mathias Cormann","Sean Edwards","Alan Eggleston","David Fawcett","Concetta Fierravanti-Wells","Mitch Fifield","Mary Jo Fisher","Bill Heffernan","Gary Humphries","David Johnston","Barnaby Joyce","Helen Kroger","Ian Macdonald","Bridget McKenzie","Brett Mason","Fiona Nash","Marise Payne","Stephen Parry","Michael Ronaldson","Anne Ruston","Scott Ryan","Arthur Sinodinos","Dean Smith","John Williams","Patrick Secker","B
@nickjevershed
nickjevershed / immi-contracts-type.py
Created August 22, 2014 01:36
Classifies immigration department contracts by detention centre type
import csv
import re
fList = ['client','detention','detain','manus','nauru','cocos','keeling','christmas','refugee','unaccompanied','humanitarian','minor','staff accomodation','curtin','villawood','scherger','inverbrackie','derby','construction camp','ita','idc','apod','irh','darwin airport','berrimah','bladin','wickham','phosphate','aqua','lilac','maribyrnong','inverbrackie','serco','transfield','g4s','gsl','toll']
nList = ['pontville','CI','weipa','regional','processing','IMA','tamil','farsi','afghanistan','screening','woomera','yongah']
offshore = ['manus', 'nauru']
onshore = ['cocos', 'coco', 'christmas', 'CI', 'phosphate', 'aqua', 'lilac','curtin', 'CIDC', 'villawood', 'VIDC', 'sydney irh', 'woomera', 'WIDC', 'scherger', 'SIDC', 'perth', 'PIDC', 'PIRH', 'yongah', 'maribyrnong', 'MIDC', 'melbourne ITA', 'MITA', 'adelaide ITA', 'AITA', 'inverbrackie', 'brisbane ita', 'BITA', 'DIMA', 'bladin', 'berrimah', 'wickham', 'darwin airport', 'weipa', 'derby', 'NIDC', 'northern immigration detention centre', 'd
@nickjevershed
nickjevershed / immi-contracts.py
Created August 22, 2014 01:35
classification of immigration department contracts
import csv
import re
fList = ['client','detention','detain','manus','nauru','cocos','keeling','christmas','refugee','unaccompanied','humanitarian','minor','staff accomodation','curtin','villawood','scherger','inverbrackie','derby','construction camp','ita','idc','apod','irh','darwin airport','berrimah','bladin','wickham','phosphate','aqua','lilac','maribyrnong','inverbrackie','serco','transfield','g4s','gsl','toll']
nList = ['pontville','CI','weipa','regional','processing','IMA','tamil','farsi','afghanistan','screening','woomera','yongah']
with open('immigration-contracts.csv','rU') as csvinput:
with open('output.csv', 'w') as csvoutput:
writer = csv.writer(csvoutput, lineterminator='\n')
@nickjevershed
nickjevershed / detention-centres.csv
Created August 19, 2014 06:02
List of Australian detention centres and locations
name centre latitude longitude
Brisbane ITA Brisbane_ITA -27.401747 153.104782
Curtin IDC Curtin_IDC -17.38101 123.677216
Maribyrnong IDC Maribyrnong_IDC -37.780035 144.880142
Northern IDC Northern_IDC -12.425709 130.900211
Perth IDC Perth_IDC -31.934562 115.958118
Scherger IDC Scherger_IDC -12.633869 141.888428
Villawood IDC Villawood_IDC -33.878279 150.987339
Christmas Island Christmas_Island -10.488044 105.611572
Melbourne ITA Melbourne_ITA -37.841807 144.952068
@nickjevershed
nickjevershed / expenses-pdf-scraper.py
Created July 23, 2014 09:30
A scraper for getting politicians' travel expenses from PDF
#!/usr/bin/env python
import scraperwiki
import urllib2
import lxml.etree
urls = ["http://www.finance.gov.au/sites/default/files//sites/default/files/P33_ABBOTT_Tony.pdf","http://www.finance.gov.au/sites/default/files//sites/default/files/P33_ALBANESE_Anthony.pdf","http://www.finance.gov.au/sites/default/files//sites/default/files/P33_ALEXANDER_John.pdf","http://www.finance.gov.au/sites/default/files//sites/default/files/P33_BALDWIN_Bob.pdf","http://www.finance.gov.au/sites/default/files//sites/default/files/P33_BIRD_Sharon.pdf","http://www.finance.gov.au/sites/default/files//sites/default/files/P33_BISHOP_Bronwyn.pdf","http://www.finance.gov.au/sites/default/files//sites/default/files/P33_BOWEN_Chris.pdf","http://www.finance.gov.au/sites/default/files//sites/default/files/P33_BRADBURY_David.pdf","http://www.finance.gov.au/sites/default/files//sites/default/files/P33_BURKE_Tony.pdf","http://www.finance.gov.au/sites/default/files//sites/default/files/P33_CAMERON_Doug.pdf","http://www.finance.gov.au/sites/defa
import simplejson as json
with open("blah.json") as f:
blah = json.load(f)
print blah['features'][0]['properties']['headline']
@nickjevershed
nickjevershed / parse_captcha.py
Created June 20, 2014 01:09
OCR for captchas
import sys
import os
import re
import subprocess
import tempfile
from PIL import Image
def parse_captcha(filename):
"""Return the text for thie image using Tesseract