Skip to content

Instantly share code, notes, and snippets.

@edsu
Last active April 25, 2019 11:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save edsu/51504c2817d609d0387031ec8ca6ea61 to your computer and use it in GitHub Desktop.
Save edsu/51504c2817d609d0387031ec8ca6ea61 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import re
import csv
import json
import twarc
import string
import collections
jobs = collections.Counter()
twitter = twarc.Twarc()
for tweet in twitter.search('5jobsivehad'):
if 'retweeted_status' in tweet:
continue
text = tweet['full_text']
for line in text.split('\n'):
m = re.match(r'^[1-5][' + string.punctuation + ']? (.+)', line)
if m:
job = m.group(1).lower()
if not re.search("jobs i.ve had", job):
jobs[job] += 1
output = csv.writer(open('jobs.csv', 'w'))
output.writerow(['job', 'count'])
for job, count in jobs.most_common():
output.writerow([job, count])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment