-
-
Save vcgato29/f242aa93d48759d5a01645bdf6d7c047 to your computer and use it in GitHub Desktop.
Getting SDR data from DTCC public repository
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# the structure of the code needs to change and the script needs to be parameterized with func... | |
# but gets the job done for the data needed | |
from urllib.parse import urljoin | |
from bs4 import BeautifulSoup | |
from datetime import datetime | |
import pandas as pd | |
import requests | |
import zipfile | |
def getParamString(): | |
# this might fail in windows as you can't convert float to string | |
# with more than 2 decimal places; the way dtcc sets its query param it needs 13 char | |
# for now... | |
return str((datetime.utcnow() - datetime(1970,1,1)).total_seconds()).replace('.', '')[:13] | |
url = 'https://rtdata.dtcc.com/gtr' | |
res = requests.get(url=url) | |
cookies = res.cookies | |
headers = res.headers | |
if res.status_code != 200: | |
return | |
soup = BeautifulSoup(res.text, 'html.parser') | |
urlToCall = soup.find('iframe', | |
attrs={'class' : 'gridBody cumulativeSliceGrid'}).get('src', '') | |
res.close() | |
# subsequent call | |
# call urlToCall to get the details surrounding cumulative. | |
res = requests.get(url=urlToCall, params={'_': getParamString()}, | |
cookies=cookies, headers=headers) | |
soup = BeautifulSoup(res.text, 'html.parser') | |
dataset = soup.findAll('div', attrs={'class' : 'trackerGrid pollingGrid'}) | |
res.close() | |
# make calls to the actual data available | |
for d in dataset: | |
print('Getting data for', d.get('id', '')) | |
durl = urljoin(urlToCall, d.get('sourcefile', '')) | |
res = requests.get(url=durl, params={'_': getParamString()}, | |
cookies=cookies, headers=headers) | |
soup = BeautifulSoup(res.text, 'html.parser') | |
res.close() | |
ddata = [a.get('href') for a in soup.findAll('a')] | |
# now we can loop through each of links in ddata to get the zip file | |
# download the data in there and convert it to a pandas dataframe | |
for dd in ddata: | |
res = requests.get(dd) | |
content = res.content | |
z = zipfile.ZipFile(io.BytesIO(content)) | |
res.close() | |
for f in z.filelist: | |
if '.csv' in f.filename: | |
df = pd.read_csv(z.open(f)) | |
# do your thing -- pandas the * out of data |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
if res.status_code != 200:
return
that causes a syntax error, return outside a function?