Skip to content

Instantly share code, notes, and snippets.

@akshaykarnawat
Created February 9, 2019 20:55
Show Gist options
  • Save akshaykarnawat/2f76964957de63ee5d5383e0c8aa7ee3 to your computer and use it in GitHub Desktop.
Save akshaykarnawat/2f76964957de63ee5d5383e0c8aa7ee3 to your computer and use it in GitHub Desktop.
Getting SDR data from DTCC public repository
# the structure of the code needs to change and the script needs to be parameterized with func...
# but gets the job done for the data needed
from urllib.parse import urljoin
from bs4 import BeautifulSoup
from datetime import datetime
import pandas as pd
import requests
import zipfile
def getParamString():
# this might fail in windows as you can't convert float to string
# with more than 2 decimal places; the way dtcc sets its query param it needs 13 char
# for now...
return str((datetime.utcnow() - datetime(1970,1,1)).total_seconds()).replace('.', '')[:13]
url = 'https://rtdata.dtcc.com/gtr'
res = requests.get(url=url)
cookies = res.cookies
headers = res.headers
if res.status_code != 200:
return
soup = BeautifulSoup(res.text, 'html.parser')
urlToCall = soup.find('iframe',
attrs={'class' : 'gridBody cumulativeSliceGrid'}).get('src', '')
res.close()
# subsequent call
# call urlToCall to get the details surrounding cumulative.
res = requests.get(url=urlToCall, params={'_': getParamString()},
cookies=cookies, headers=headers)
soup = BeautifulSoup(res.text, 'html.parser')
dataset = soup.findAll('div', attrs={'class' : 'trackerGrid pollingGrid'})
res.close()
# make calls to the actual data available
for d in dataset:
print('Getting data for', d.get('id', ''))
durl = urljoin(urlToCall, d.get('sourcefile', ''))
res = requests.get(url=durl, params={'_': getParamString()},
cookies=cookies, headers=headers)
soup = BeautifulSoup(res.text, 'html.parser')
res.close()
ddata = [a.get('href') for a in soup.findAll('a')]
# now we can loop through each of links in ddata to get the zip file
# download the data in there and convert it to a pandas dataframe
for dd in ddata:
res = requests.get(dd)
content = res.content
z = zipfile.ZipFile(io.BytesIO(content))
res.close()
for f in z.filelist:
if '.csv' in f.filename:
df = pd.read_csv(z.open(f))
# do your thing -- pandas the * out of data
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment