Created
November 9, 2016 09:32
-
-
Save chmodsss/537cefa03a83ee4a1beb7cd581a71f40 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Grabbing current tweets using python's tweepy module | |
''' | |
from tweepy import Stream | |
from tweepy import OAuthHandler | |
from tweepy.streaming import StreamListener | |
import logging | |
# get the twitter keys from https://dev.twitter.com/ | |
ckey = *** | |
csecret = *** | |
atoken = *** | |
asecret = *** | |
logging.basicConfig(level=logging.INFO) | |
exclude_terms = ('http','@','RT','\nhttp') | |
exclude_sentences = ('RT') | |
# excludes | |
# http : url | |
# @ : persons | |
# RT : retweets !!! have to remove the whole tweet | |
# \nhttp : url | |
track_words = [] | |
# download frequently used 100 or 1000 words in the language and save it as topwords.txt | |
with open('topwords.txt','rb') as f: | |
for words in f: | |
track_words.append(words.strip().decode('unicode-escape')) | |
class listener(StreamListener): | |
def on_data(self, data): | |
try: | |
raw_tweet = data.split(',"text":"')[1].split('","source"')[0] | |
hashlist = [words.startswith("#") for words in raw_tweet.split()] | |
if True in hashlist: | |
trim_tweet = [words for words in raw_tweet.split() if not words.startswith(exclude_terms)] | |
save_file = open("tweets.txt","a") | |
output = ' '.join(trim_tweet) | |
save_file.write(output.lower()) | |
save_file.write('\n') | |
save_file.close() | |
print("...") | |
return True | |
except BaseException as e: | |
print("failed data",str(e)) | |
pass | |
except IncompleteRead: | |
pass | |
def on_error(self, status): | |
print(status) | |
auth = OAuthHandler(ckey, csecret) | |
auth.set_access_token(atoken, asecret) | |
twitter_stream = Stream(auth, listener()) | |
twitter_stream.filter(languages=["en"],track = track_words) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment