Skip to content

Instantly share code, notes, and snippets.

@ryanwitt
Created December 5, 2012 20:44
Show Gist options
  • Save ryanwitt/4219323 to your computer and use it in GitHub Desktop.
Save ryanwitt/4219323 to your computer and use it in GitHub Desktop.
froms = {}
tos = {}
for i,line in enumerate(file('refer.2011.csv')):
try:
fr, to, count = line.strip().split(',')
froms[fr] = froms.get(fr,0) + 1
tos[to] = tos.get(to,0) + 1
except:
import traceback; traceback.print_exc()
if i > 1 and i % 1000000 == 0:
#break
print i, len(froms)
print len(froms)
print 'sorting froms...'
with file('froms.csv', 'w+') as f:
for fr,count in sorted(froms.items(), key=lambda x:x[1], reverse=True):
f.write(fr+','+str(count)+'\n')
print 'sorting tos...'
with file('tos.csv', 'w+') as f:
for to,count in sorted(tos.items(), key=lambda x:x[1], reverse=True):
f.write(to+','+str(count)+'\n')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment