Skip to content

Instantly share code, notes, and snippets.

@pdavidsonFIA
Created August 16, 2021 15:00
Show Gist options
  • Save pdavidsonFIA/38cda87d3e5fe496871a471a545677e2 to your computer and use it in GitHub Desktop.
Save pdavidsonFIA/38cda87d3e5fe496871a471a545677e2 to your computer and use it in GitHub Desktop.
Download EIOPA rfr and store in sqlite database
import os
from datetime import date
import zipfile
import sqlite3
from sqlite3 import Error
import pandas as pd
def download_file(url: str,
workspace, # url of the file to download
filename='' # file path+name to give to the file to download
):
"""
this function downloads a file and give it a name if not explicitly specified.
"""
if filename:
extension = url[(url.rfind(".")):]
if extension not in filename:
filename = filename + extension
else:
pass
else:
# if filename not specified, then the file name will be the original file name
filename = url[(url.rfind("/") + 1):]
target_file = os.path.join(workspace, filename)
if os.path.isfile(target_file):
print("file already exists in this location, not downloading")
else:
urllib.request.urlretrieve(url, target_file) # simpler for file downloading
print("file downloaded and saved in the following location: " + target_file)
return target_file
def eiopa_rfr_link(rep_date):
urls = ["https://www.eiopa.europa.eu/tools-and-data/risk-free-interest-rate-term-structures_en",
"https://www.eiopa.europa.eu/risk-free-rate-previous-releases-and-preparatory-phase"]
reference_date = rep_date.strftime('%Y%m%d')
filename = "eiopa_rfr_" + reference_date
zip_name = filename + ".zip"
valid_links = []
for page in urls:
if len(valid_links) == 0:
resp = requests.get(page)
soup = bs.BeautifulSoup(resp.text, 'lxml')
soup2 = soup.find('div', {"class": "group-related-resources"})
links = []
for link in soup2.findAll('a'):
links.append(link.get("href"))
valid_links = [link for link in links if filename in link]
if len(valid_links) >= 1:
valid_link = valid_links[0]
else:
raise FileNotFoundError("failure: data not found for this rep_date: " + reference_date)
return valid_link
def download_EIOPA_rates(rep_date, workspace):
""" Download and unzip the EIOPA files """
url = eiopa_rfr_link(rep_date)
zip_file = download_file(url, workspace)
reference_date = rep_date.strftime('%Y%m%d')
name_excelfile = "EIOPA_RFR_" + reference_date + "_Term_Structures" + ".xlsx"
name_excelfile_spreads = "EIOPA_RFR_" + reference_date + "_PD_Cod" + ".xlsx"
with zipfile.ZipFile(zip_file) as zipobj:
zipobj.extract(name_excelfile,workspace)
zipobj.extract(name_excelfile_spreads, workspace)
return {'rfr': os.path.join(workspace, name_excelfile), 'spreads': os.path.join(workspace, name_excelfile_spreads)}
def extract_spot_rates(rfr_filepath):
currency_codes_and_regions = {"EUR" : "Euro", "PLN" : "Poland", "CHF" : "Switzerland",
"USD" : "United States", "GBP" : "United Kingdom", "NOK" : "Norway",
"SEK" : "Sweden", "DKK" : "Denmark", "HRK" : "Croatia"}
currency_dict = dict((v,k) for k,v in currency_codes_and_regions.items())
rates_tables = {}
for name, clean_name in {"RFR_spot_no_VA": 'base', "RFR_spot_with_VA": 'va',
"Spot_NO_VA_shock_UP": 'up', "Spot_NO_VA_shock_DOWN": 'down',
"Spot_WITH_VA_shock_UP": 'va_up', "Spot_WITH_VA_shock_DOWN": 'va_down'}.items():
df = pd.read_excel(rfr_filepath,
sheet_name=name,
header=1,
index_col=1)
to_drop = list(filter(lambda x: "Unnamed" in x, list(df.columns)))
df = df.drop(to_drop, axis=1)
df.loc["VA"].fillna(0, inplace=True)
df = df.iloc[8:]
df.index.names = ['Duration']
df = df[currency_dict.keys()].rename(columns=currency_dict)
rates_tables[clean_name] = df
rates_tables = pd.concat(rates_tables).stack().rename('spot')
rates_tables.index.names = ['scenario', 'duration', 'currency_code']
rates_tables.index = rates_tables.index.reorder_levels([0,2,1])
rates_tables = rates_tables.sort_index()
return rates_tables
def extract_spreads(spread_filepath):
spreads_dict = []
# spreads_dict["financial fundamental spreads"] = {}
currencies = ["EUR", "BGN", "HRK", "CZK", "DKK", "HUF", "LIC", "PLN", "NOK", "RON", "RUB", "SEK", "CHF",
"GBP", "AUD", "BRL", "CAD", "CLP", "CNY", "COP", "HKD", "INR", "JPY", "MYR", "MXN", "NZD",
"SGD", "ZAR", "KRW", "TWD", "THB", "TRY", "USD"]
for name in currencies:
df = pd.read_excel(spread_filepath,
sheet_name=name,
header=1,
usecols='W:AC',
nrows=30,
skiprows=8,
names=[0, 1, 2, 3, 4, 5, 6])
df.index = range(1, 31)
df['type'] = 'financial'
df['currency_code'] = name
spreads_dict.append(df)
spreads = pd.concat(spreads_dict)
spreads.index.rename('Duration', inplace=True)
spreads = spreads.reset_index().set_index(['currency_code', 'Duration']).drop(columns='type')
spreads = spreads.stack().rename('spread')
spreads.index.names = ['currency_code', 'duration', 'cc_step']
# Incrementing cc_step by 1 so can use 0 for risk free rate:
spreads = spreads.reset_index('cc_step')
spreads.cc_step = spreads.cc_step + 1
spreads = spreads.set_index('cc_step', append=True)
return spreads
def create_connection(db_file):
""" create a database connection to the SQLite database
specified by db_file
:param db_file: database file
:return: Connection object or None
"""
conn = None
try:
conn = sqlite3.connect(db_file)
return conn
except Error as e:
print(e)
return conn
def exec_sql(conn, sql):
""" Execute sql in connection """
try:
c = conn.cursor()
c.execute(sql)
except Error as e:
print(e)
def create_eiopa_db(database=r"eiopa.db"):
annual_rfr = """ CREATE TABLE IF NOT EXISTS rfr_raw (
rep_date TEXT,
scenario TEXT,
currency_code TEXT,
duration INTEGER,
spot REAL
); """
annual_spreads = """CREATE TABLE IF NOT EXISTS spreads_raw (
rep_date TEXT,
currency_code TEXT,
duration INTEGER,
cc_step INTEGER,
spread REAL
);"""
# create a database connection
conn = create_connection(database)
# create tables
if conn is not None:
# create projects table
exec_sql(conn, annual_rfr)
# create tasks table
exec_sql(conn, annual_spreads)
else:
print("Error! cannot create the database connection.")
workspace = r'C:\Users\peter\Desktop\rfr'
database = os.path.join(workspace, "eiopa.db")
create_eiopa_db(database)
proj_range = pd.date_range(
start=date(2016, 1, 31),
end=date(2021, 7, 31),
freq='M')
for rep_date in proj_range:
files = download_EIOPA_rates(rep_date, workspace)
rfr = extract_spot_rates(files['rfr'])
spreads = extract_spreads(files['spreads'])
rx = rfr.reset_index()
rx['rep_date'] = rep_date.strftime('%Y-%m-%d')
rx.to_sql('rfr_raw', con=create_connection(database), if_exists='append', index=False)
sx = spreads.reset_index()
sx['rep_date'] = rep_date.strftime('%Y-%m-%d')
sx.to_sql('spreads_raw', con=create_connection(database), if_exists='append', index=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment