Skip to content

Instantly share code, notes, and snippets.

@BlackArbsCEO
Created May 20, 2017 15:02
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 5 You must be signed in to fork a gist
  • Save BlackArbsCEO/1dcdd45f4518dbbcfd9cb539be239b37 to your computer and use it in GitHub Desktop.
Save BlackArbsCEO/1dcdd45f4518dbbcfd9cb539be239b37 to your computer and use it in GitHub Desktop.
import asyncio
import aiohttp
# ================================================
# for first run only
class first_async_scraper:
def __init__(self):
pass
async def _fetch(self, symbol, url, session, headers):
"""fn: to retrieve option quotes as JSON
Params:
symbol : str(), ETF
url : str(), request url
session : aiohttp.ClientSession() object
headers : dict() containing header info
Returns:
response : JSON/Python Dict
"""
async with session.post(url.format(symbol), headers=headers) as response:
return await response.json(content_type=None)
async def run(self, symbols, user_agent):
"""fn: to aggregate response option quotes
Params:
symbols : list of str(), ETF symbols
user_agent : str()
Returns:
responses : list of JSON
"""
url = 'https://core-api.barchart.com/v1/options/chain?symbol={}&fields=strikePrice%2ClastPrice%2CpercentFromLast%2CbidPrice%2Cmidpoint%2CaskPrice%2CpriceChange%2CpercentChange%2Cvolatility%2Cvolume%2CopenInterest%2CoptionType%2CdaysToExpiration%2CexpirationDate%2CsymbolCode%2CsymbolType&groupBy=optionType&raw=1&meta=field.shortName%2Cfield.type%2Cfield.description'
headers = {
"Accept":"application/json",
"Accept-Encoding":"gzip, deflate, sdch, br",
"Accept-Language":"en-US,en;q = 0.8",
"Connection":"keep-alive",
"Host":"core-api.barchart.com",
"Origin":"https://www.barchart.com",
"Referer":"https://www.barchart.com/etfs-funds/quotes/{}/options",
"User-Agent":user_agent,
}
tasks = []
async with aiohttp.ClientSession() as session:
for symbol in symbols:
headers['Referer'] = headers['Referer'].format(symbol)
task = asyncio.ensure_future(self._fetch(symbol, url, session, headers))
tasks.append(task)
# gather returns responses in original order not arrival order
# https://docs.python.org/3/library/asyncio-task.html#task-functions
responses = await asyncio.gather(*tasks)
return responses
# ================================================
class expirys:
def __init__(self, ETFS, first_future_result):
"""Class to extract expiration data from Dict
Params:
ETFS : list of ETF symbol str()
first_future_result : list of response objects (dict/JSON) from the first scraper
"""
self.ETFS = ETFS
self.first_future_result = first_future_result
def _get_dict_expiry(self, response):
"""fn: to get expirations from response dict
Params:
response : dict/JSON object
Returns:
list() of date str(), "YYYY-MM-DD"
"""
if response['count'] == 0:
return None
else:
return response['meta']['expirations']
def get_expirys(self):
"""fn: to create dict with k, v = symbol, list of expirys
we have to do this b/c JSON/dict response data doesn't
contain symbol identifier
Returns:
dict(symbol = list of expiry dates)
"""
from itertools import zip_longest
expirys = {}
for symbol, resp in zip_longest(self.ETFS, self.first_future_result):
# we can do this because results are in order of submission not arrival
# gather returns responses in original order not arrival order
# https://docs.python.org/3/library/asyncio-task.html#task-functions
expirys[symbol] = self._get_dict_expiry(resp)
return expirys
# ================================================
# async by url + expirations
class xp_async_scraper:
def __init__(self):
pass
async def _xp_fetch(self, symbol, expiry, url, session, headers):
"""fn: to retrieve option quotes as JSON
Params:
symbol : str(), ETF
expiry : str(), "YYYY-MM-DD"
url : str(), request url
session : aiohttp.ClientSession() object
headers : dict() containing header info
Returns:
response : JSON/Python Dict
"""
async with session.post(url.format(symbol, expiry), headers=headers) as response:
return await response.json(content_type=None)
async def xp_run(self, symbol, expirys, user_agent):
"""fn: to aggregate response option quotes
Params:
symbol : str(), ETF
expirys : list of date str() "YYYY-MM-DD"
user_agent : str()
Returns:
responses : list of JSON
"""
url = "https://core-api.barchart.com/v1/options/chain?symbol={}&fields=strikePrice%2ClastPrice%2CpercentFromLast%2CbidPrice%2Cmidpoint%2CaskPrice%2CpriceChange%2CpercentChange%2Cvolatility%2Cvolume%2CopenInterest%2CoptionType%2CdaysToExpiration%2CexpirationDate%2CsymbolCode%2CsymbolType&groupBy=optionType&expirationDate={}&raw=1&meta=field.shortName%2Cfield.type%2Cfield.description"
headers = {
"Accept":"application/json",
"Accept-Encoding":"gzip, deflate, sdch, br",
"Accept-Language":"en-US,en;q=0.8",
"Connection":"keep-alive",
"Host":"core-api.barchart.com",
"Origin":"https://www.barchart.com",
"Referer":"https://www.barchart.com/etfs-funds/quotes/{}/options",
"User-Agent":user_agent,
}
tasks = []
async with aiohttp.ClientSession() as session:
for expiry in expirys:
headers['Referer'] = headers['Referer'].format(symbol)
task = asyncio.ensure_future(self._xp_fetch(symbol, expiry, url, session, headers))
tasks.append(task)
# gather returns responses in original order not arrival order
# https://docs.python.org/3/library/asyncio-task.html#task-functions
responses = await asyncio.gather(*tasks)
return responses
# ================================================
# async get html page source
class last_price_scraper:
def __init__(self):
pass
async def _fetch(self, symbol, url, session):
"""fn: to retrieve option quotes as JSON
Params:
symbol : str(), ETF
url : str(), request url
session : aiohttp.ClientSession() object
Returns:
response : text object
"""
async with session.get(url.format(symbol)) as response:
return await response.text()
async def run(self, symbols):
"""fn: to aggregate response option quotes
Params:
symbols : list of str(), ETF symbols
Returns:
responses : list of text
"""
url = 'https://www.barchart.com/stocks/quotes/{}/options'
tasks = []
async with aiohttp.ClientSession() as session:
for symbol in symbols:
task = asyncio.ensure_future(self._fetch(symbol, url, session))
tasks.append(task)
# gather returns responses in original order not arrival order
# https://docs.python.org/3/library/asyncio-task.html#task-functions
responses = await asyncio.gather(*tasks)
return responses
@andrewmakarskiy
Copy link

It seems barchart changed the way they handle requests. I tried to run your script for just one ticker AAPL and got this error:

Async Barchart Scraper starting...

price scraper script run time:  0 days 00:00:02.408933

first async scraper script run time:  0 days 00:00:01.772125
Traceback (most recent call last):
  File "async_barchart_option_scraper.py", line 76, in <module>
    expirys = xp.get_expirys()
  File "/Users/tt3/Documents/R/OOR-Stocks/Python/BlackArbsCEO/async_option_scraper.py", line 91, in get_expirys
    expirys[symbol] = self._get_dict_expiry(resp)
  File "/Users/tt3/Documents/R/OOR-Stocks/Python/BlackArbsCEO/async_option_scraper.py", line 73, in _get_dict_expiry
    if response['count'] == 0:
KeyError: 'count'

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment