Skip to content

Instantly share code, notes, and snippets.

View chadmhorner's full-sized avatar

Chad Horner chadmhorner

  • YipitData
  • New York
View GitHub Profile
from readypipe import requests, starting_task, subtask, schedule, save, get_attempts
URLS = {
'Andrew Yang': 'https://twitter.com/andrewyang',
'Bernie Sanders': 'https://twitter.com/BernieSanders',
'Elizabeth Warren': 'https://twitter.com/ewarren',
'Cory Booker': 'https://twitter.com/CoryBooker',
'Kamala Harris': 'https://twitter.com/KamalaHarris',
'Pete Buttigieg': 'https://twitter.com/PeteButtigieg',
'Julian Castro': 'https://twitter.com/JulianCastro',
from readypipe import requests, starting_task, subtask, schedule, save
JOBS_URL = 'https://www.amazon.jobs/en/search.json?base_query=&city=&country=&county=&facets%5B%5D=location&facets%5B%5D=business_category&facets%5B%5D=category&facets%5B%5D=schedule_type_id&facets%5B%5D=employee_class&facets%5B%5D=normalized_location&facets%5B%5D=job_function_id&latitude=&loc_group_id=&loc_query=&location%5B%5D=newyork&longitude=&offset=10&query_options=&radius=24km&region=&result_limit=10&sort=relevant'
@starting_task
def get_jobs():
jobs = requests.get_json_from_content(JOBS_URL)
data = {}
data['num_jobs'] = jobs.get('hits', None)
save('amazon_jobs', data)
from readypipe import requests, starting_task, subtask, schedule, save, open_browser
from json import loads
API_KEY = 'your_api_key_here'
BASE_URL = 'https://dev.virtualearth.net/REST/v1/Routes/driving?key=' + API_KEY + '&o=json&jsonp=Microsoft.Maps.NetworkCallbacks.f83b5e&c=en-US&fi=true&errorDetail=true&wp.0=%s&wp.1=%s&ra=routeproperties,routepath&optmz=timeWithTraffic&du=mi&tt=departure&maxSolns=3&rpo=Points'
AIRPORT_COORDINATES = {
'JFK': '40.643391,-73.781937',
'EWR': '40.692150,-74.181557',
'LGA': '40.773415,-73.870674',
from readypipe import requests, starting_task, subtask, schedule, save, save_many
BASE_URL = 'https://www.oddschecker.com'
URLS = {
'Roma': 'https://www.oddschecker.com/awards/oscars/best-picture/bet-history/roma',
'Cuaron': 'https://www.oddschecker.com/awards/oscars/best-director/bet-history/alfonso-cuaron',
'Malek': 'https://www.oddschecker.com/awards/oscars/best-actor/bet-history/rami-malek',
'Close': 'https://www.oddschecker.com/awards/oscars/best-actress/bet-history/glenn-close',
'Ali': 'https://www.oddschecker.com/awards/oscars/best-supporting-actor/bet-history/mahershala-ali',
'King': 'https://www.oddschecker.com/awards/oscars/best-supporting-actress/bet-history/regina-king',
from readypipe import requests, starting_task, subtask, schedule, save
API_KEY = 'your_api_key_here'
SERVICE_URL = 'https://collector-otp-prod.camsys-apps.com/realtime/serviceStatus?apikey=' + API_KEY
@starting_task
def check_routes():
json_data = requests.get_json_from_content(SERVICE_URL)
last_updated = json_data.get('lastUpdated')
route_details = json_data.get('routeDetails')
from readypipe import requests, starting_task, subtask, schedule, save
BASE_URL = 'https://www.pro-football-reference.com'
SCOREBOARD_URL = 'https://www.pro-football-reference.com/years/2018/week_%s.htm'
@starting_task
def sweep_game_urls():
for i in range(1, 18): #weeks 1 through 17
week = requests.get_dom_from_content(SCOREBOARD_URL % i) #visit week scoreboard
games = week.xpath('//*/td[@class="right gamelink"]') #get all game links
from readypipe import requests, starting_task, subtask, schedule, save, schedule_many, open_browser
from json import loads
from lxml import etree
import datetime
from pandas import read_csv
from requests.exceptions import ConnectTimeout
SEARCH_URL = 'https://www.fandango.com/%s_movietimes?mode=general&q=%s'
JSON_ENDPOINT = 'https://www.fandango.com/napi/theaterswithshowtimes?zipCode=%s&city=&state=&date=%s&page=1&favTheaterOnly=false&limit=10&isdesktop=true'
@chadmhorner
chadmhorner / spotify_charts.py
Created January 24, 2019 16:48
Scrape Spotify Charts using ReadyPipe
from readypipe import requests, starting_task, subtask, schedule, save, schedule_many
import datetime
CHART_URL = 'https://spotifycharts.com/regional/%s/daily/%s'
@starting_task
def load_todays_urls():
chart_page = requests.get_dom_from_content(CHART_URL % ('global', 'latest')) #visit latest page
countries_set = set()
yesterday = (datetime.datetime.today() - datetime.timedelta(days=1)).strftime('%Y-%m-%d') #get yesterday's date