Created
December 7, 2019 15:04
-
-
Save scarnecchia/a8fb065d2c69b59d6e8c6ba7c337d0da to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import print_function | |
import tweepy | |
import json | |
from pymongo import MongoClient | |
from http.client import IncompleteRead | |
MONGO_HOST= 'mongodb://path/to/mongodb' # assuming you have mongoDB installed locally | |
WORDS = ['drone', 'drones'] | |
CONSUMER_KEY = "" | |
CONSUMER_SECRET = "" | |
ACCESS_TOKEN = "" | |
ACCESS_TOKEN_SECRET = "" | |
class StreamListener(tweepy.StreamListener): | |
#This is a class provided by tweepy to access the Twitter Streaming API. | |
def on_connect(self): | |
# Called initially to connect to the Streaming API | |
print("You are now connected to the streaming API.") | |
def on_error(self, status_code): | |
# On error - if an error occurs, display the error / status code | |
print('An Error has occured: ' + repr(status_code)) | |
return False | |
def on_data(self, data): | |
#This is the meat of the script...it connects to your mongoDB and stores the tweet | |
try: | |
client = MongoClient(MONGO_HOST) | |
# Use twitterdb database. If it doesn't exist, it will be created. | |
db = client.twitterdb | |
# Decode the JSON from Twitter | |
datajson = json.loads(data) | |
#grab the 'created_at' data from the Tweet to use for display | |
created_at = datajson['created_at'] | |
#print out a message to the screen that we have collected a tweet | |
print("Tweet collected at " + str(created_at)) | |
#insert the data into the mongoDB into a collection called twitter_search | |
#if twitter_search doesn't exist, it will be created. | |
db.crashmas_search.insert(datajson) | |
except Exception as e: | |
print(e) | |
auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET) | |
auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET) | |
#Set up the listener. The 'wait_on_rate_limit=True' is needed to help with Twitter API rate limiting. | |
while True: #Attempting to handle the disconnect caused by IncompleteRead | |
try: | |
listener = StreamListener(api=tweepy.API(wait_on_rate_limit=True)) | |
streamer = tweepy.Stream(auth=auth, listener=listener) | |
print("Tracking: " + str(WORDS)) | |
streamer.filter(track=WORDS, stall_warnings=True) | |
except IncompleteRead: | |
continue | |
except KeyboardInterrupt: | |
stream.disconnect() | |
break |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment