Skip to content

Instantly share code, notes, and snippets.

@chadmhorner
Created March 28, 2019 17:45
Show Gist options
  • Save chadmhorner/312ff5ec6c7bcfb23559b99648dba5ea to your computer and use it in GitHub Desktop.
Save chadmhorner/312ff5ec6c7bcfb23559b99648dba5ea to your computer and use it in GitHub Desktop.
from readypipe import requests, starting_task, subtask, schedule, save, get_attempts
URLS = {
'Andrew Yang': 'https://twitter.com/andrewyang',
'Bernie Sanders': 'https://twitter.com/BernieSanders',
'Elizabeth Warren': 'https://twitter.com/ewarren',
'Cory Booker': 'https://twitter.com/CoryBooker',
'Kamala Harris': 'https://twitter.com/KamalaHarris',
'Pete Buttigieg': 'https://twitter.com/PeteButtigieg',
'Julian Castro': 'https://twitter.com/JulianCastro',
'John Delaney': 'https://twitter.com/JohnDelaney',
'Tulsi Gabbard': 'https://twitter.com/TulsiGabbard',
'Kirsten Gillibrand': 'https://twitter.com/SenGillibrand',
'John Hickenlooper': 'https://twitter.com/Hickenlooper',
'Jay Inslee': 'https://twitter.com/JayInslee',
'Amy Klobuchar': 'https://twitter.com/amyklobuchar',
'Marianne Williamson': 'https://twitter.com/marwilliamson',
'Stacey Abrams': 'https://twitter.com/staceyabrams',
'Michael Bennet': 'https://twitter.com/SenatorBennet',
'Joe Biden': 'https://twitter.com/JoeBiden',
'Steve Bullock': 'https://twitter.com/GovernorBullock',
'Andrew Cuomo': 'https://twitter.com/NYGovCuomo',
'Bill de Blasio': 'https://twitter.com/BilldeBlasio',
'Terry McAuliffe': 'https://twitter.com/TerryMcAuliffe',
'Seth Moulton': 'https://twitter.com/sethmoulton',
'Beto O\'Rourke': 'https://twitter.com/betoorourke',
'Eric Swalwell': 'https://twitter.com/RepSwalwell',
'Mike Gravel': 'https://twitter.com/MikeGravel',
}
@starting_task
def load_urls():
for candidate in URLS.keys():
schedule('scrape_candidate', (candidate, URLS[candidate]))
@subtask
def scrape_candidate(candidate, candidate_url):
MAX_ATTEMPTS = 5 #only retry 5 times
if get_attempts() >= MAX_ATTEMPTS:
return
page = requests.get_dom_from_content(candidate_url)
followers = page.xpath('//*/li[@class="ProfileNav-item ProfileNav-item--followers"]')[0]
span = followers.xpath('descendant::span[@class="ProfileNav-value"]')[0]
_follower_count = span.attrib['data-count']
save('dem_candidates', {
'candidate': candidate,
'candidate_url': candidate_url,
'followers_string': _follower_count,
'followers': int(_follower_count),
})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment