Created
August 16, 2021 15:00
-
-
Save pdavidsonFIA/38cda87d3e5fe496871a471a545677e2 to your computer and use it in GitHub Desktop.
Download EIOPA rfr and store in sqlite database
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
from datetime import date | |
import zipfile | |
import sqlite3 | |
from sqlite3 import Error | |
import pandas as pd | |
def download_file(url: str, | |
workspace, # url of the file to download | |
filename='' # file path+name to give to the file to download | |
): | |
""" | |
this function downloads a file and give it a name if not explicitly specified. | |
""" | |
if filename: | |
extension = url[(url.rfind(".")):] | |
if extension not in filename: | |
filename = filename + extension | |
else: | |
pass | |
else: | |
# if filename not specified, then the file name will be the original file name | |
filename = url[(url.rfind("/") + 1):] | |
target_file = os.path.join(workspace, filename) | |
if os.path.isfile(target_file): | |
print("file already exists in this location, not downloading") | |
else: | |
urllib.request.urlretrieve(url, target_file) # simpler for file downloading | |
print("file downloaded and saved in the following location: " + target_file) | |
return target_file | |
def eiopa_rfr_link(rep_date): | |
urls = ["https://www.eiopa.europa.eu/tools-and-data/risk-free-interest-rate-term-structures_en", | |
"https://www.eiopa.europa.eu/risk-free-rate-previous-releases-and-preparatory-phase"] | |
reference_date = rep_date.strftime('%Y%m%d') | |
filename = "eiopa_rfr_" + reference_date | |
zip_name = filename + ".zip" | |
valid_links = [] | |
for page in urls: | |
if len(valid_links) == 0: | |
resp = requests.get(page) | |
soup = bs.BeautifulSoup(resp.text, 'lxml') | |
soup2 = soup.find('div', {"class": "group-related-resources"}) | |
links = [] | |
for link in soup2.findAll('a'): | |
links.append(link.get("href")) | |
valid_links = [link for link in links if filename in link] | |
if len(valid_links) >= 1: | |
valid_link = valid_links[0] | |
else: | |
raise FileNotFoundError("failure: data not found for this rep_date: " + reference_date) | |
return valid_link | |
def download_EIOPA_rates(rep_date, workspace): | |
""" Download and unzip the EIOPA files """ | |
url = eiopa_rfr_link(rep_date) | |
zip_file = download_file(url, workspace) | |
reference_date = rep_date.strftime('%Y%m%d') | |
name_excelfile = "EIOPA_RFR_" + reference_date + "_Term_Structures" + ".xlsx" | |
name_excelfile_spreads = "EIOPA_RFR_" + reference_date + "_PD_Cod" + ".xlsx" | |
with zipfile.ZipFile(zip_file) as zipobj: | |
zipobj.extract(name_excelfile,workspace) | |
zipobj.extract(name_excelfile_spreads, workspace) | |
return {'rfr': os.path.join(workspace, name_excelfile), 'spreads': os.path.join(workspace, name_excelfile_spreads)} | |
def extract_spot_rates(rfr_filepath): | |
currency_codes_and_regions = {"EUR" : "Euro", "PLN" : "Poland", "CHF" : "Switzerland", | |
"USD" : "United States", "GBP" : "United Kingdom", "NOK" : "Norway", | |
"SEK" : "Sweden", "DKK" : "Denmark", "HRK" : "Croatia"} | |
currency_dict = dict((v,k) for k,v in currency_codes_and_regions.items()) | |
rates_tables = {} | |
for name, clean_name in {"RFR_spot_no_VA": 'base', "RFR_spot_with_VA": 'va', | |
"Spot_NO_VA_shock_UP": 'up', "Spot_NO_VA_shock_DOWN": 'down', | |
"Spot_WITH_VA_shock_UP": 'va_up', "Spot_WITH_VA_shock_DOWN": 'va_down'}.items(): | |
df = pd.read_excel(rfr_filepath, | |
sheet_name=name, | |
header=1, | |
index_col=1) | |
to_drop = list(filter(lambda x: "Unnamed" in x, list(df.columns))) | |
df = df.drop(to_drop, axis=1) | |
df.loc["VA"].fillna(0, inplace=True) | |
df = df.iloc[8:] | |
df.index.names = ['Duration'] | |
df = df[currency_dict.keys()].rename(columns=currency_dict) | |
rates_tables[clean_name] = df | |
rates_tables = pd.concat(rates_tables).stack().rename('spot') | |
rates_tables.index.names = ['scenario', 'duration', 'currency_code'] | |
rates_tables.index = rates_tables.index.reorder_levels([0,2,1]) | |
rates_tables = rates_tables.sort_index() | |
return rates_tables | |
def extract_spreads(spread_filepath): | |
spreads_dict = [] | |
# spreads_dict["financial fundamental spreads"] = {} | |
currencies = ["EUR", "BGN", "HRK", "CZK", "DKK", "HUF", "LIC", "PLN", "NOK", "RON", "RUB", "SEK", "CHF", | |
"GBP", "AUD", "BRL", "CAD", "CLP", "CNY", "COP", "HKD", "INR", "JPY", "MYR", "MXN", "NZD", | |
"SGD", "ZAR", "KRW", "TWD", "THB", "TRY", "USD"] | |
for name in currencies: | |
df = pd.read_excel(spread_filepath, | |
sheet_name=name, | |
header=1, | |
usecols='W:AC', | |
nrows=30, | |
skiprows=8, | |
names=[0, 1, 2, 3, 4, 5, 6]) | |
df.index = range(1, 31) | |
df['type'] = 'financial' | |
df['currency_code'] = name | |
spreads_dict.append(df) | |
spreads = pd.concat(spreads_dict) | |
spreads.index.rename('Duration', inplace=True) | |
spreads = spreads.reset_index().set_index(['currency_code', 'Duration']).drop(columns='type') | |
spreads = spreads.stack().rename('spread') | |
spreads.index.names = ['currency_code', 'duration', 'cc_step'] | |
# Incrementing cc_step by 1 so can use 0 for risk free rate: | |
spreads = spreads.reset_index('cc_step') | |
spreads.cc_step = spreads.cc_step + 1 | |
spreads = spreads.set_index('cc_step', append=True) | |
return spreads | |
def create_connection(db_file): | |
""" create a database connection to the SQLite database | |
specified by db_file | |
:param db_file: database file | |
:return: Connection object or None | |
""" | |
conn = None | |
try: | |
conn = sqlite3.connect(db_file) | |
return conn | |
except Error as e: | |
print(e) | |
return conn | |
def exec_sql(conn, sql): | |
""" Execute sql in connection """ | |
try: | |
c = conn.cursor() | |
c.execute(sql) | |
except Error as e: | |
print(e) | |
def create_eiopa_db(database=r"eiopa.db"): | |
annual_rfr = """ CREATE TABLE IF NOT EXISTS rfr_raw ( | |
rep_date TEXT, | |
scenario TEXT, | |
currency_code TEXT, | |
duration INTEGER, | |
spot REAL | |
); """ | |
annual_spreads = """CREATE TABLE IF NOT EXISTS spreads_raw ( | |
rep_date TEXT, | |
currency_code TEXT, | |
duration INTEGER, | |
cc_step INTEGER, | |
spread REAL | |
);""" | |
# create a database connection | |
conn = create_connection(database) | |
# create tables | |
if conn is not None: | |
# create projects table | |
exec_sql(conn, annual_rfr) | |
# create tasks table | |
exec_sql(conn, annual_spreads) | |
else: | |
print("Error! cannot create the database connection.") | |
workspace = r'C:\Users\peter\Desktop\rfr' | |
database = os.path.join(workspace, "eiopa.db") | |
create_eiopa_db(database) | |
proj_range = pd.date_range( | |
start=date(2016, 1, 31), | |
end=date(2021, 7, 31), | |
freq='M') | |
for rep_date in proj_range: | |
files = download_EIOPA_rates(rep_date, workspace) | |
rfr = extract_spot_rates(files['rfr']) | |
spreads = extract_spreads(files['spreads']) | |
rx = rfr.reset_index() | |
rx['rep_date'] = rep_date.strftime('%Y-%m-%d') | |
rx.to_sql('rfr_raw', con=create_connection(database), if_exists='append', index=False) | |
sx = spreads.reset_index() | |
sx['rep_date'] = rep_date.strftime('%Y-%m-%d') | |
sx.to_sql('spreads_raw', con=create_connection(database), if_exists='append', index=False) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment