Skip to content

Instantly share code, notes, and snippets.

@vcgato29
Forked from akshaykarnawat/dtcc.py
Created October 23, 2019 16:58
Show Gist options
  • Save vcgato29/f242aa93d48759d5a01645bdf6d7c047 to your computer and use it in GitHub Desktop.
Save vcgato29/f242aa93d48759d5a01645bdf6d7c047 to your computer and use it in GitHub Desktop.
Getting SDR data from DTCC public repository
# the structure of the code needs to change and the script needs to be parameterized with func...
# but gets the job done for the data needed
from urllib.parse import urljoin
from bs4 import BeautifulSoup
from datetime import datetime
import pandas as pd
import requests
import zipfile
def getParamString():
# this might fail in windows as you can't convert float to string
# with more than 2 decimal places; the way dtcc sets its query param it needs 13 char
# for now...
return str((datetime.utcnow() - datetime(1970,1,1)).total_seconds()).replace('.', '')[:13]
url = 'https://rtdata.dtcc.com/gtr'
res = requests.get(url=url)
cookies = res.cookies
headers = res.headers
if res.status_code != 200:
return
soup = BeautifulSoup(res.text, 'html.parser')
urlToCall = soup.find('iframe',
attrs={'class' : 'gridBody cumulativeSliceGrid'}).get('src', '')
res.close()
# subsequent call
# call urlToCall to get the details surrounding cumulative.
res = requests.get(url=urlToCall, params={'_': getParamString()},
cookies=cookies, headers=headers)
soup = BeautifulSoup(res.text, 'html.parser')
dataset = soup.findAll('div', attrs={'class' : 'trackerGrid pollingGrid'})
res.close()
# make calls to the actual data available
for d in dataset:
print('Getting data for', d.get('id', ''))
durl = urljoin(urlToCall, d.get('sourcefile', ''))
res = requests.get(url=durl, params={'_': getParamString()},
cookies=cookies, headers=headers)
soup = BeautifulSoup(res.text, 'html.parser')
res.close()
ddata = [a.get('href') for a in soup.findAll('a')]
# now we can loop through each of links in ddata to get the zip file
# download the data in there and convert it to a pandas dataframe
for dd in ddata:
res = requests.get(dd)
content = res.content
z = zipfile.ZipFile(io.BytesIO(content))
res.close()
for f in z.filelist:
if '.csv' in f.filename:
df = pd.read_csv(z.open(f))
# do your thing -- pandas the * out of data
@xrsrz
Copy link

xrsrz commented Oct 5, 2020

if res.status_code != 200:
return

that causes a syntax error, return outside a function?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment