Skip to content

Instantly share code, notes, and snippets.

@nickjevershed
Created August 22, 2014 01:35
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nickjevershed/e874646076d3c43110c9 to your computer and use it in GitHub Desktop.
Save nickjevershed/e874646076d3c43110c9 to your computer and use it in GitHub Desktop.
classification of immigration department contracts
import csv
import re
fList = ['client','detention','detain','manus','nauru','cocos','keeling','christmas','refugee','unaccompanied','humanitarian','minor','staff accomodation','curtin','villawood','scherger','inverbrackie','derby','construction camp','ita','idc','apod','irh','darwin airport','berrimah','bladin','wickham','phosphate','aqua','lilac','maribyrnong','inverbrackie','serco','transfield','g4s','gsl','toll']
nList = ['pontville','CI','weipa','regional','processing','IMA','tamil','farsi','afghanistan','screening','woomera','yongah']
with open('immigration-contracts.csv','rU') as csvinput:
with open('output.csv', 'w') as csvoutput:
writer = csv.writer(csvoutput, lineterminator='\n')
reader = csv.reader(csvinput, lineterminator='\n')
#Get the headers from the old csv, add to new csv and add our new column header
headers = reader.next()
writer.writerow(headers)
#loop through the rows in the old csv
for row in reader:
newrows = []
for word in nList:
if (re.search( word.lower()+r'\b', row[1].lower())) or (re.search( word.lower()+r'\b', row[9].lower())):
for x in xrange(0, len(row)):
newrows.append(row[x])
print word
break
if newrows:
print newrows
writer.writerow(newrows)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment