Skip to content

@ThaWeatherman /analyze.py
Last active

Embed URL

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Simple script for analyzing keywords about Ferguson protests. Requires tweepy 2.3.0. Note that you need an API key. store in file called 'config', each key on own line, in following order: API_KEY, API_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET
'''
A script for analyzing twitter stats on Ferguson
'''
import json
import re
import tweepy
def get_api():
'''
Creates an instance of the tweepy OAuth class
'''
with open('config') as f:
api_key = f.readline().strip()
api_secret = f.readline().strip()
access_token = f.readline().strip()
access_token_secret = f.readline().strip()
auth = tweepy.OAuthHandler(api_key, api_secret)
auth.set_access_token(access_token, access_token_secret)
return auth
class CustomStreamListener(tweepy.StreamListener):
'''
Sub class of StreamListener to handle searching
Ferguson tweets for various keywords
'''
def __init__(self, *args, **kwargs):
super(CustomStreamListener, self).__init__(*args, **kwargs)
self.count = 0
with open('common') as f:
self.common = set(line.strip() for line in f)
self.all_words = {}
self.pattern = re.compile("[^\w'#]")
def on_status(self, status):
print 'Got a tweet'
self.count += 1
tweet = status.text.lower()
tweet = self.pattern.sub(' ', tweet)
words = tweet.split()
for word in words:
if 'http' not in word and '@' not in word and \
len(word) > 2 and word != '' and \
not word.isspace() and not word.isdigit() and \
word not in self.common:
if word not in self.all_words:
self.all_words[word] = 1
else:
self.all_words[word] += 1
if __name__ == '__main__':
l = CustomStreamListener()
try:
auth = get_api()
streaming_api = tweepy.Stream(auth, l)
streaming_api.filter(track=['Ferguson'])
except KeyboardInterrupt:
print '----TOTAL TWEETS----'
print l.count
print '--------------------'
json_data = json.dumps(l.all_words, indent=4)
with open('word_data.json', 'w') as f:
print >> f, json_data
the
be
to
of
and
a
in
that
have
I
it
for
not
on
with
he
as
you
do
at
this
but
his
by
from
they
we
say
her
she
or
an
will
my
one
all
would
there
their
what
so
up
out
if
about
who
get
which
go
me
when
make
can
like
time
no
just
him
know
take
person
into
year
your
good
some
could
them
see
other
than
then
now
look
only
come
its
over
think
also
back
after
use
two
how
our
work
first
well
way
even
new
want
because
any
these
give
day
most
us
i'll
i'm
until
ha
haha
hahaha
hahahaha
hahahahaha
hi
rt
re
omg
omgg
omggg
omgggg
omggggg
oh
ohh
ohhh
was
wtf
said
done
else
else's
le
such
via
que
let
still
real
'''
Convert the JSON data into a
large block of text for parsing
'''
import json
f = open('word_data.json')
data = json.load(f)
f.close()
final_str = ''
for word in data:
count = data[word]
while count > 0:
final_str += word + ' '
count -= 1
with open('word_block.txt', 'w') as f:
f.write(final_str)
@Zulko

In case you are interested I wrote an API-independent Twitter listener (it doesn't require an account).

Edit : my version :)

@ThaWeatherman

@Zulko that is pretty!

@adammworden

How would one end this script from running? Or would you have to just allow it to run its course?

@ThaWeatherman

@adammworden Currently just a well placed Ctrl-C. When you hit Ctrl-C it will save all the data to a JSON file

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Something went wrong with that request. Please try again.