Last active
April 25, 2019 11:23
-
-
Save edsu/51504c2817d609d0387031ec8ca6ea61 to your computer and use it in GitHub Desktop.
List the jobs mentioned with #5jobsivehad : https://docs.google.com/spreadsheets/d/1cOwIUIWk5kfVz29v3SujBjTkzQK9paOKOggDhk7MlTE/edit?usp=sharing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import re | |
import csv | |
import json | |
import twarc | |
import string | |
import collections | |
jobs = collections.Counter() | |
twitter = twarc.Twarc() | |
for tweet in twitter.search('5jobsivehad'): | |
if 'retweeted_status' in tweet: | |
continue | |
text = tweet['full_text'] | |
for line in text.split('\n'): | |
m = re.match(r'^[1-5][' + string.punctuation + ']? (.+)', line) | |
if m: | |
job = m.group(1).lower() | |
if not re.search("jobs i.ve had", job): | |
jobs[job] += 1 | |
output = csv.writer(open('jobs.csv', 'w')) | |
output.writerow(['job', 'count']) | |
for job, count in jobs.most_common(): | |
output.writerow([job, count]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment