Skip to content

Instantly share code, notes, and snippets.

@Vido
Created September 26, 2021 23:25
Show Gist options
  • Save Vido/6c6d43749174ebfcb54b5986fc78ed00 to your computer and use it in GitHub Desktop.
Save Vido/6c6d43749174ebfcb54b5986fc78ed00 to your computer and use it in GitHub Desktop.
import time
import yfinance
import dryscrape
from bs4 import BeautifulSoup
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
def get_market_data(tickers, start, end):
market_data = yfinance.download(
tickers = tickers,
start=start,
end=end,
interval = '1d',
treads = False
)
# Fills missing values. Some stocks are recent IPOs
return (market_data['Close']
.fillna(method='backfill')
.fillna(method='ffill'))
def plot_pca_vs_ibov(log_ts, idx):
rs_df = pd.concat([log_ts, idx], 1)
rs_df.columns = ["PCA Portfolio", "IBOV"]
crs_df = rs_df.cumsum().apply(np.exp)
crs_df.plot()
plt.show()
def plot_pca_vs_ibov_l10(idx, pcal10, ibovl10):
rs_df = pd.concat([idx, pcal10, ibovl10], 1)
rs_df.columns = ["IBOV", "PCA-L10", "IBOV-L10"]
crs_df = rs_df.cumsum().apply(np.exp)
crs_df.plot()
plt.show()
# Loads Ibovespa tickers from file
composition = pd.read_pickle('composition.df')
w_ibov = composition['Part. (%)']
# Get market data
tickers = composition.index.tolist()
data_ts = get_market_data(tickers, '2020-09-23', '2021-09-23')
ibov_ts = get_market_data(['^BVSP'], '2020-09-23', '2021-09-23')
# Get normalized log-return
log_data = data_ts.apply(np.log).diff(1)[1:]
log_ibov = ibov_ts.apply(np.log).diff(1)[1:]
# Plot market returns
acc = log_data.cumsum().apply(np.exp)
acc.plot()
plt.show()
# PCA 1
pca = PCA(1).fit(log_data)
pc1 = pd.Series(index=log_data.columns, data=pca.components_[0])
weights = abs(pc1)/sum(abs(pc1))
# Compare Ibov vs PCA Portfolio
w_ibov = composition['Part. (%)']
print(weights.nlargest(10))
print(weights.nsmallest(10))
print(w_ibov.nlargest(10))
print(w_ibov.nsmallest(10))
#
pca_ts = (weights*log_data).sum(1)
pcal10 = log_data[pc1.nlargest(10).index].mean(1)
il10 = w_ibov.nlargest(10) / sum(w_ibov.nlargest(10))
ibovl10 = (log_data[il10.index] * il10).sum(1)
idx = pd.Series(index=ibov_ts.index, data=log_ibov)
# Plot Ibov vs PCA Portfolio
plot_pca_vs_ibov(pca_ts, idx)
plot_pca_vs_ibov_l10(idx, pcal10, ibovl10)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment