Skip to content

Instantly share code, notes, and snippets.

@scarnecchia
Created December 7, 2019 15:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save scarnecchia/a8fb065d2c69b59d6e8c6ba7c337d0da to your computer and use it in GitHub Desktop.
Save scarnecchia/a8fb065d2c69b59d6e8c6ba7c337d0da to your computer and use it in GitHub Desktop.
from __future__ import print_function
import tweepy
import json
from pymongo import MongoClient
from http.client import IncompleteRead
MONGO_HOST= 'mongodb://path/to/mongodb' # assuming you have mongoDB installed locally
WORDS = ['drone', 'drones']
CONSUMER_KEY = ""
CONSUMER_SECRET = ""
ACCESS_TOKEN = ""
ACCESS_TOKEN_SECRET = ""
class StreamListener(tweepy.StreamListener):
#This is a class provided by tweepy to access the Twitter Streaming API.
def on_connect(self):
# Called initially to connect to the Streaming API
print("You are now connected to the streaming API.")
def on_error(self, status_code):
# On error - if an error occurs, display the error / status code
print('An Error has occured: ' + repr(status_code))
return False
def on_data(self, data):
#This is the meat of the script...it connects to your mongoDB and stores the tweet
try:
client = MongoClient(MONGO_HOST)
# Use twitterdb database. If it doesn't exist, it will be created.
db = client.twitterdb
# Decode the JSON from Twitter
datajson = json.loads(data)
#grab the 'created_at' data from the Tweet to use for display
created_at = datajson['created_at']
#print out a message to the screen that we have collected a tweet
print("Tweet collected at " + str(created_at))
#insert the data into the mongoDB into a collection called twitter_search
#if twitter_search doesn't exist, it will be created.
db.crashmas_search.insert(datajson)
except Exception as e:
print(e)
auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
#Set up the listener. The 'wait_on_rate_limit=True' is needed to help with Twitter API rate limiting.
while True: #Attempting to handle the disconnect caused by IncompleteRead
try:
listener = StreamListener(api=tweepy.API(wait_on_rate_limit=True))
streamer = tweepy.Stream(auth=auth, listener=listener)
print("Tracking: " + str(WORDS))
streamer.filter(track=WORDS, stall_warnings=True)
except IncompleteRead:
continue
except KeyboardInterrupt:
stream.disconnect()
break
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment