Skip to content

Instantly share code, notes, and snippets.

@om-henners
Last active August 29, 2015 14:17
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save om-henners/fe173f5d1c2dd667b3d4 to your computer and use it in GitHub Desktop.
Save om-henners/fe173f5d1c2dd667b3d4 to your computer and use it in GitHub Desktop.
Tweepy streaming to Fiona
#! /usr/bin/env python
# -*- coding: utf-8 -*-
"""
Stream twitter data directly to a dataset with Fiona and Tweepy.
"""
import fiona
import fiona.crs
from tweepy.streaming import StreamListener
class FionaStreaming(StreamListener):
"""
Stream Listener subclass that wraps a Fiona dataset to stream individual
tweets with coordinates to a spatial dataset.
Examples
--------
>>> auth = tweepy.OAuthHandler(client_key, client_secret_key)
>>> auth.set_access_token(access_key, access_key_secret)
>>> api = tweepy.API(auth)
>>> l = FionaStreaming("melbourne_tweets.shp")
>>> stream = tweepy.Stream(auth, l)
>>> stream.filter(locations=(144.463056,-38.313611,145.463056,-37.313611))
>>> l.close()
"""
def __init__(self, out_path, mode="w", driver='ESRI Shapefile', api=None):
"""
Create the dataset object and the stream listener.
:param out_path: Path to write to on disk
:type out_path: str
:param mode: File open mode. Should be either "w" or "a"
:type mode: str
:param driver: Fiona (OGR) driver to write the dataset
:type driver: str
:param api: tweepy API (optional - will be created automatically if not
provided)
:type api: tweepy.API
"""
super(FionaStreaming, self).__init__(api)
schema = {
"geometry": "Point",
"properties": {
"id": "str",
"username": "str",
"name": "str",
"text": "str:144",
"created_at": "datetime",
"orig_username": "str",
"orig_name": "str",
"orig_created_at": "datetime"
}
}
crs = fiona.crs.from_epsg(4326)
self.dest = fiona.open(out_path, mode, driver=driver, crs=crs, schema=schema)
def on_status(self, status):
"""
When a new status comes in write if there are coordinates available
write it to the output dataset
:param status: The twitter status object from tweepy
:type status: tweepy.Status
:return: Whether to keep the stream open
:rtype: bool
"""
if not status.coordinates:
return True
properties = {
"id": status.id_str,
"username": status.user.screen_name,
"name": status.user.name,
"text": status.text,
"created_at": status.created_at.isoformat(),
"orig_username": None,
"orig_name": None,
"orig_created_at": None
}
if status.retweeted:
properties.update({
"orig_username": status.retweeted_status.user.screen_name,
"orig_name": status.retweeted_status.user.name,
"orig_created_at": status.retweeted_status.user.created_at.isoformat()
})
feature = {
"geometry": status.coordinates,
"properties": properties,
"id": "-1"
}
self.dest.write(feature)
self.dest.flush()
return True
def close(self):
"""
Make sure to close the dataset when you've finished streaming to the
file
:rtype: None
"""
self.dest.close()
__author__ = "om_henners"
__license__ = "Apache 2.0"
__version__ = "0.0.1"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment