Skip to content

Instantly share code, notes, and snippets.

@chmodsss
Created November 9, 2016 09:32
Show Gist options
  • Save chmodsss/537cefa03a83ee4a1beb7cd581a71f40 to your computer and use it in GitHub Desktop.
Save chmodsss/537cefa03a83ee4a1beb7cd581a71f40 to your computer and use it in GitHub Desktop.
'''
Grabbing current tweets using python's tweepy module
'''
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
import logging
# get the twitter keys from https://dev.twitter.com/
ckey = ***
csecret = ***
atoken = ***
asecret = ***
logging.basicConfig(level=logging.INFO)
exclude_terms = ('http','@','RT','\nhttp')
exclude_sentences = ('RT')
# excludes
# http : url
# @ : persons
# RT : retweets !!! have to remove the whole tweet
# \nhttp : url
track_words = []
# download frequently used 100 or 1000 words in the language and save it as topwords.txt
with open('topwords.txt','rb') as f:
for words in f:
track_words.append(words.strip().decode('unicode-escape'))
class listener(StreamListener):
def on_data(self, data):
try:
raw_tweet = data.split(',"text":"')[1].split('","source"')[0]
hashlist = [words.startswith("#") for words in raw_tweet.split()]
if True in hashlist:
trim_tweet = [words for words in raw_tweet.split() if not words.startswith(exclude_terms)]
save_file = open("tweets.txt","a")
output = ' '.join(trim_tweet)
save_file.write(output.lower())
save_file.write('\n')
save_file.close()
print("...")
return True
except BaseException as e:
print("failed data",str(e))
pass
except IncompleteRead:
pass
def on_error(self, status):
print(status)
auth = OAuthHandler(ckey, csecret)
auth.set_access_token(atoken, asecret)
twitter_stream = Stream(auth, listener())
twitter_stream.filter(languages=["en"],track = track_words)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment