Skip to content

Instantly share code, notes, and snippets.

@andylolz
Created February 4, 2015 09:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save andylolz/625f479dd5bed9ce2d08 to your computer and use it in GitHub Desktop.
Save andylolz/625f479dd5bed9ce2d08 to your computer and use it in GitHub Desktop.
Incomplete… A start at a twitter list scraper for UKIP candidates
#!/usr/bin/env python
import requests
import twitter
from common import get_empty_json_directory, write_ppc_json
twitter_handle = 'Vote_UKIP'
list_name = 'ukip-ppcs-3'
consumer_key = '#####'
consumer_secret = '#####'
access_token = '#####'
access_token_secret = '#####'
mapit_constituencies = 'http://mapit.mysociety.org/areas/WMC'
api = twitter.Api(consumer_key=consumer_key,
consumer_secret=consumer_secret,
access_token_key=access_token,
access_token_secret=access_token_secret)
candidates = api.GetListMembers(None, list_name, owner_screen_name=twitter_handle)
constituencies = requests.get(mapit_constituencies).json()
constituencies = {constituency['name']: id_ for id_, constituency in constituencies.items()}
json_directory = get_empty_json_directory('ukip')
for candidate in candidates:
if candidate.location in constituencies:
print '%s is standing in %s (%s) - @%s' % (candidate.name, candidate.location, constituencies[candidate.location], candidate.screen_name)
# else:
# print '%s is standing somewhere like %s' % (candidate.name, candidate.location)
person_link = row.find('a')
if not person_link:
continue
person_url = person_link['href']
print "person_url:", person_url
m = re.search(r'/([^/]+)\.aspx$', person_url)
if not m:
raise Exception, "Couldn't parse {0}".format(person_url)
person_slug = m.group(1)
cells = row.find_all('td')
constituency = cells[1].text
data = get_person(
person_url,
person_slug,
constituency,
)
write_ppc_json(data, constituency, json_directory)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment