Instantly share code, notes, and snippets.

@Ladsgroup Ladsgroup/parser.py
Created Jan 27, 2018

Embed
What would you like to do?
Clickstream_parser
# License: MIT
import gzip
def search(name, i):
result = []
with gzip.open('clickstream-enwiki-2017-12.tsv.gz','rb') as f:
for line in f:
line = line.decode('utf-8').replace('\n', '')
if line.split('\t')[i] == name:
result.append(line)
result.sort(key=lambda t: int(t.split('\t')[3]), reverse=True)
total = sum([int(t.split('\t')[3]) for t in result])
print('\n'.join(result[:20]))
print('Total: ' + str(total))
all_ = ['Hassan_Rouhani', 'Iran', 'Ali_Khamenei', 'Alexis_Texas', 'Homosexuality', 'Same-sex_marriage']
for case in all_:
search(case, 0)
print('-------')
search(case, 1)
print('-------')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment