Skip to content

Instantly share code, notes, and snippets.

@bycoffe
Created September 5, 2009 04:32
Show Gist options
  • Save bycoffe/181287 to your computer and use it in GitHub Desktop.
Save bycoffe/181287 to your computer and use it in GitHub Desktop.
"""
A small Django app I built last year that looked for people on Twitter saying they were sick.
An example of using Twitter search.
"""
# models.py
from django.contrib.gis.db import models
class Phrase(models.Model):
phrase = models.CharField(max_length=100)
def __unicode__(self):
return self.phrase
class TwitterUser(models.Model):
name = models.CharField(max_length=100)
username = models.CharField(max_length=100, unique=True)
location = models.CharField(max_length=100, blank=True)
pt = models.PointField(null=True)
def __unicode__(self):
return self.username
class Tweet(models.Model):
twitter_id = models.IntegerField(unique=True)
tweet = models.CharField(max_length=255)
phrase = models.ForeignKey(Phrase)
user = models.ForeignKey(TwitterUser)
published = models.DateTimeField()
is_active = models.BooleanField(default=True)
def __unicode__(self):
return self.tweet
# get_tweets.py
import datetime
import os
import sys
import urllib
import urllib2
from django.core.management import setup_environ
from django.contrib.gis.geos import *
from django.http import HttpRequest
from BeautifulSoup import BeautifulSoup
from dateutil.parser import parse as dateparse
from geopy import geocoders
URL = "http://search.twitter.com/search.atom?%s"
g = geocoders.Google('')
def phrase_urls():
from sickontwitter.models import Phrase
for phrase in Phrase.objects.all():
p = '"%s"' % phrase.phrase
qs = urllib.urlencode({'q': p})
yield (phrase, URL % qs)
def url_to_soup(url):
page = urllib2.urlopen(url)
soup = BeautifulSoup(page.read())
page.close()
return soup
def parse_xml(soup, phrase):
entries = soup.findAll('entry')
for entry in entries:
parse_entry(entry, phrase)
def fix_tags(string):
"""Strip bold tags
"""
string = string.replace('<b>', '').replace('</b>', '')
string = string.replace('&lt;', '<').replace('&gt;', '>')
string = string.replace('&amp;', '&');
return string
def parse_entry(entry, phrase):
from sickontwitter.models import Tweet
published = dateparse(entry.find('published').contents[0])
url = entry.find('link')['href']
twitter_id = url.split('/')[-1]
message = fix_tags(entry.find('content').contents[0])
author_url = entry.find('author').find('uri').contents[0]
author = get_author(author_url)
tweet, created = Tweet.objects.get_or_create(
twitter_id=twitter_id,
defaults={'tweet': message,
'phrase': phrase,
'user': author,
'published': published})
if created:
print(tweet)
def get_author(url):
from sickontwitter.models import TwitterUser
username = unicode(url.split('/')[-1])
try:
return TwitterUser.objects.get(username=username)
except TwitterUser.DoesNotExist:
pass
soup = url_to_soup(url)
name, location = parse_author_page(soup)
pt = get_pt(location)
author = TwitterUser.objects.create(
name=name,
username=username,
location=location,
pt=pt)
return author
def get_pt(location):
pt = Point(0, 0)
if not location:
return pt
try:
place, (lat, lng) = g.geocode(location)
except ValueError:
return pt
pt = Point(float(lng), float(lat))
return pt
def parse_author_page(soup):
try:
name = soup.find('span', {'class': 'fn'}).contents[0]
except AttributeError:
name = ''
try:
location = soup.find('span', {'class': 'adr'}).contents[0]
except AttributeError:
location = ''
return (unicode(name), unicode(location))
def _main():
# Django setup
sys.path.append(os.getcwd())
import settings
setup_environ(settings)
for phrase, url in phrase_urls():
soup = url_to_soup(url)
parse_xml(soup, phrase)
if __name__ == '__main__':
_main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment