chadmhorner/twitter_followers.py

## twitter_followers.py
from readypipe import requests, starting_task, subtask, schedule, save, get_attempts

URLS = {
    'Andrew Yang': 'https://twitter.com/andrewyang',
    'Bernie Sanders': 'https://twitter.com/BernieSanders',
    'Elizabeth Warren': 'https://twitter.com/ewarren',
    'Cory Booker': 'https://twitter.com/CoryBooker',
    'Kamala Harris': 'https://twitter.com/KamalaHarris',
    'Pete Buttigieg': 'https://twitter.com/PeteButtigieg',
    'Julian Castro': 'https://twitter.com/JulianCastro',
    'John Delaney': 'https://twitter.com/JohnDelaney',
    'Tulsi Gabbard': 'https://twitter.com/TulsiGabbard',
    'Kirsten Gillibrand': 'https://twitter.com/SenGillibrand',
    'John Hickenlooper': 'https://twitter.com/Hickenlooper',
    'Jay Inslee': 'https://twitter.com/JayInslee',
    'Amy Klobuchar': 'https://twitter.com/amyklobuchar',
    'Marianne Williamson': 'https://twitter.com/marwilliamson',
    'Stacey Abrams': 'https://twitter.com/staceyabrams',
    'Michael Bennet': 'https://twitter.com/SenatorBennet',
    'Joe Biden': 'https://twitter.com/JoeBiden',
    'Steve Bullock': 'https://twitter.com/GovernorBullock',
    'Andrew Cuomo': 'https://twitter.com/NYGovCuomo',
    'Bill de Blasio': 'https://twitter.com/BilldeBlasio',
    'Terry McAuliffe': 'https://twitter.com/TerryMcAuliffe',
    'Seth Moulton': 'https://twitter.com/sethmoulton',
    'Beto O\'Rourke': 'https://twitter.com/betoorourke',
    'Eric Swalwell': 'https://twitter.com/RepSwalwell',
    'Mike Gravel': 'https://twitter.com/MikeGravel',
}

@starting_task
def load_urls():
    for candidate in URLS.keys():
        schedule('scrape_candidate', (candidate, URLS[candidate]))

@subtask
def scrape_candidate(candidate, candidate_url):
    MAX_ATTEMPTS = 5 #only retry 5 times
    if get_attempts() >= MAX_ATTEMPTS:
        return

    page = requests.get_dom_from_content(candidate_url)
    followers = page.xpath('//*/li[@class="ProfileNav-item ProfileNav-item--followers"]')[0]
    span = followers.xpath('descendant::span[@class="ProfileNav-value"]')[0]
    _follower_count = span.attrib['data-count']
    save('dem_candidates', {
        'candidate': candidate,
        'candidate_url': candidate_url,
        'followers_string': _follower_count,
        'followers': int(_follower_count),
    })
	from readypipe import requests, starting_task, subtask, schedule, save, get_attempts

	URLS = {
	'Andrew Yang': 'https://twitter.com/andrewyang',
	'Bernie Sanders': 'https://twitter.com/BernieSanders',
	'Elizabeth Warren': 'https://twitter.com/ewarren',
	'Cory Booker': 'https://twitter.com/CoryBooker',
	'Kamala Harris': 'https://twitter.com/KamalaHarris',
	'Pete Buttigieg': 'https://twitter.com/PeteButtigieg',
	'Julian Castro': 'https://twitter.com/JulianCastro',
	'John Delaney': 'https://twitter.com/JohnDelaney',
	'Tulsi Gabbard': 'https://twitter.com/TulsiGabbard',
	'Kirsten Gillibrand': 'https://twitter.com/SenGillibrand',
	'John Hickenlooper': 'https://twitter.com/Hickenlooper',
	'Jay Inslee': 'https://twitter.com/JayInslee',
	'Amy Klobuchar': 'https://twitter.com/amyklobuchar',
	'Marianne Williamson': 'https://twitter.com/marwilliamson',
	'Stacey Abrams': 'https://twitter.com/staceyabrams',
	'Michael Bennet': 'https://twitter.com/SenatorBennet',
	'Joe Biden': 'https://twitter.com/JoeBiden',
	'Steve Bullock': 'https://twitter.com/GovernorBullock',
	'Andrew Cuomo': 'https://twitter.com/NYGovCuomo',
	'Bill de Blasio': 'https://twitter.com/BilldeBlasio',
	'Terry McAuliffe': 'https://twitter.com/TerryMcAuliffe',
	'Seth Moulton': 'https://twitter.com/sethmoulton',
	'Beto O\'Rourke': 'https://twitter.com/betoorourke',
	'Eric Swalwell': 'https://twitter.com/RepSwalwell',
	'Mike Gravel': 'https://twitter.com/MikeGravel',
	}

	@starting_task
	def load_urls():
	for candidate in URLS.keys():
	schedule('scrape_candidate', (candidate, URLS[candidate]))

	@subtask
	def scrape_candidate(candidate, candidate_url):
	MAX_ATTEMPTS = 5 #only retry 5 times
	if get_attempts() >= MAX_ATTEMPTS:
	return

	page = requests.get_dom_from_content(candidate_url)
	followers = page.xpath('//*/li[@class="ProfileNav-item ProfileNav-item--followers"]')[0]
	span = followers.xpath('descendant::span[@class="ProfileNav-value"]')[0]
	_follower_count = span.attrib['data-count']
	save('dem_candidates', {
	'candidate': candidate,
	'candidate_url': candidate_url,
	'followers_string': _follower_count,
	'followers': int(_follower_count),
	})