Skip to content

Instantly share code, notes, and snippets.

@chewxy
Created October 31, 2012 15:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save chewxy/3987739 to your computer and use it in GitHub Desktop.
Save chewxy/3987739 to your computer and use it in GitHub Desktop.
import bisect
import csv
import datetime
survivalFile = open('Survival_lang_no_title.csv')
reader = csv.reader(survivalFile)
reader.next() # pop the first row
minutes = []
langDict = {}
#extract minutes then sort
print 'reading csv',
readStart = datetime.datetime.now()
for row in reader:
language = row[1]
try:
minute = int(row[3])
except ValueError:
continue
try:
langDict[language].append(minute)
except KeyError:
langDict[language] = [minute]
bisect.insort(minutes, minute) # this keeps shit sorted for the minutes.
bisect.insort(langDict[language], minute) # this keeps shit sorted within the dict
print str(datetime.datetime.now()-readStart)
minutes = list(set(minutes))
print len(minutes), max(minutes), min(minutes)
print 'calculating frequencies',
resultDict = {}
freqStart = datetime.datetime.now()
for min in minutes[::30]:
for language in langDict.keys():
position = bisect.bisect(langDict[language], min)
count = len(langDict[language][position:])
try:
resultDict[min][language] = count
except KeyError:
resultDict[min] = {language:count}
print str(datetime.datetime.now() - freqStart)
fout = open('niceSurvivalFreqJump30.csv','w')
fout.write('min, lang, unanswered\n')
print 'output!',
outStart = datetime.datetime.now()
for min in resultDict.keys():
for language in resultDict[min].keys():
s = '%s, %s, %s\n' % (min, language, resultDict[min][language])
fout.write(s)
fout.close()
print str(datetime.datetime.now() - outStart)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment