Last active
November 4, 2019 13:20
-
-
Save seanmckaybeck/9a2151fcf23bf66844d0 to your computer and use it in GitHub Desktop.
Simple script for analyzing keywords about Ferguson protests. Requires tweepy 2.3.0. Note that you need an API key. store in file called 'config', each key on own line, in following order: API_KEY, API_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
A script for analyzing twitter stats on Ferguson | |
''' | |
import json | |
import re | |
import tweepy | |
def get_api(): | |
''' | |
Creates an instance of the tweepy OAuth class | |
''' | |
with open('config') as f: | |
api_key = f.readline().strip() | |
api_secret = f.readline().strip() | |
access_token = f.readline().strip() | |
access_token_secret = f.readline().strip() | |
auth = tweepy.OAuthHandler(api_key, api_secret) | |
auth.set_access_token(access_token, access_token_secret) | |
return auth | |
class CustomStreamListener(tweepy.StreamListener): | |
''' | |
Sub class of StreamListener to handle searching | |
Ferguson tweets for various keywords | |
''' | |
def __init__(self, *args, **kwargs): | |
super(CustomStreamListener, self).__init__(*args, **kwargs) | |
self.count = 0 | |
with open('common') as f: | |
self.common = set(line.strip() for line in f) | |
self.all_words = {} | |
self.pattern = re.compile("[^\w'#]") | |
def on_status(self, status): | |
print 'Got a tweet' | |
self.count += 1 | |
tweet = status.text.lower() | |
tweet = self.pattern.sub(' ', tweet) | |
words = tweet.split() | |
for word in words: | |
if 'http' not in word and '@' not in word and \ | |
len(word) > 2 and word != '' and \ | |
not word.isspace() and not word.isdigit() and \ | |
word not in self.common: | |
if word not in self.all_words: | |
self.all_words[word] = 1 | |
else: | |
self.all_words[word] += 1 | |
if __name__ == '__main__': | |
l = CustomStreamListener() | |
try: | |
auth = get_api() | |
streaming_api = tweepy.Stream(auth, l) | |
streaming_api.filter(track=['Ferguson']) | |
except KeyboardInterrupt: | |
print '----TOTAL TWEETS----' | |
print l.count | |
print '--------------------' | |
json_data = json.dumps(l.all_words, indent=4) | |
with open('word_data.json', 'w') as f: | |
print >> f, json_data |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
the | |
be | |
to | |
of | |
and | |
a | |
in | |
that | |
have | |
I | |
it | |
for | |
not | |
on | |
with | |
he | |
as | |
you | |
do | |
at | |
this | |
but | |
his | |
by | |
from | |
they | |
we | |
say | |
her | |
she | |
or | |
an | |
will | |
my | |
one | |
all | |
would | |
there | |
their | |
what | |
so | |
up | |
out | |
if | |
about | |
who | |
get | |
which | |
go | |
me | |
when | |
make | |
can | |
like | |
time | |
no | |
just | |
him | |
know | |
take | |
person | |
into | |
year | |
your | |
good | |
some | |
could | |
them | |
see | |
other | |
than | |
then | |
now | |
look | |
only | |
come | |
its | |
over | |
think | |
also | |
back | |
after | |
use | |
two | |
how | |
our | |
work | |
first | |
well | |
way | |
even | |
new | |
want | |
because | |
any | |
these | |
give | |
day | |
most | |
us | |
i'll | |
i'm | |
until | |
ha | |
haha | |
hahaha | |
hahahaha | |
hahahahaha | |
hi | |
rt | |
re | |
omg | |
omgg | |
omggg | |
omgggg | |
omggggg | |
oh | |
ohh | |
ohhh | |
was | |
wtf | |
said | |
done | |
else | |
else's | |
le | |
such | |
via | |
que | |
let | |
still | |
real |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Convert the JSON data into a | |
large block of text for parsing | |
''' | |
import json | |
f = open('word_data.json') | |
data = json.load(f) | |
f.close() | |
final_str = '' | |
for word in data: | |
count = data[word] | |
while count > 0: | |
final_str += word + ' ' | |
count -= 1 | |
with open('word_block.txt', 'w') as f: | |
f.write(final_str) |
@Zulko that is pretty!
How would one end this script from running? Or would you have to just allow it to run its course?
@adammworden Currently just a well placed Ctrl-C
. When you hit Ctrl-C it will save all the data to a JSON file
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
In case you are interested I wrote an API-independent Twitter listener (it doesn't require an account).
Edit : my version :)