This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### first let's build two strats, one long only, one EWMAC | |
import matplotlib | |
matplotlib.use("TkAgg") | |
matplotlib.rcParams.update({"font.size": 22}) | |
import numpy as np | |
import pandas as pd | |
from random import uniform | |
from sysquant.estimators.correlations import ( | |
correlationEstimate, | |
create_boring_corr_matrix, | |
) | |
from sysquant.optimisation.weights import portfolioWeights | |
from systems.provided.basic.system import basic_db_futures_system | |
from systems.trading_rules import TradingRule | |
from systems.provided.rules.ewmac import ewmac | |
from private.projects.futures_book.generate_instrument_list import ( | |
MASTER_INSTRUMENT_LIST, | |
) | |
from copy import copy | |
from sysquant.estimators.diversification_multipliers import ( | |
diversification_mult_single_period, | |
) | |
## simple examples calculate | |
corr = correlationEstimate( | |
np.array([[1, 0.5, 0.3], [0.5, 1, 0.1], [0.3, 0.1, 1]]), columns=["a", "b", "c"] | |
) | |
corr.average_corr() | |
weights = portfolioWeights(dict(a=0.45, b=0.45, c=0.1)) | |
## independent bets | |
diversification_mult_single_period(corrmatrix=corr, weights=weights) ** 3 | |
def get_independent_bets_for_length_n_and_avg_corr(n: int, avg_corr: float): | |
corr = create_boring_corr_matrix(size=n, offdiag=avg_corr, columns=list(range(n))) | |
weights = portfolioWeights.from_weights_and_keys( | |
list_of_weights=[1.0 / n] * n, list_of_keys=corr.list_of_keys() | |
) | |
return diversification_mult_single_period(corrmatrix=corr, weights=weights) ** 2 | |
def long_bias(price): | |
""" | |
:param price: The price or other series to use (assumed Tx1) | |
:type price: pd.DataFrame | |
:returns: pd.Series -- unscaled, uncapped forecast | |
""" | |
forecast = 10.0 | |
forecast_ts = copy(price) | |
forecast_ts[:] = forecast | |
return forecast_ts | |
## first long only, actually very slow trend | |
long_only_system = basic_db_futures_system( | |
trading_rules=dict( | |
long_only=TradingRule(dict(function=long_bias, data=["data.daily_prices"])), | |
), | |
) | |
long_only_system.get_instrument_list(force_to_passed_list=MASTER_INSTRUMENT_LIST) | |
long_only_system.config.base_currency = "USD" | |
long_only_system.config.notional_trading_capital = 100e6 | |
## Want to store account curves for each subsystem | |
all_curves = dict( | |
[ | |
( | |
instrument_code, | |
long_only_system.accounts.pandl_for_subsystem(instrument_code).percent, | |
) | |
for instrument_code in long_only_system.get_instrument_list() | |
] | |
) | |
## and the correlation matrix for subsystems | |
stacked_curves = pd.concat(all_curves, axis=1) | |
stacked_curves.columns = long_only_system.get_instrument_list() | |
long_only_corr_matrix = correlationEstimate.from_pd(stacked_curves.corr()) | |
## average correlation is 0.0904: | |
long_only_corr_matrix.average_corr() | |
## for instrument returns is 0.224 | |
long_only_system.portfolio.get_correlation_matrix().average_corr() | |
def get_SR_and_IDM_for_a_random_set_of_n( | |
n: int, | |
final_corr_matrix, | |
all_curves: dict, | |
use_equal_weights=True, | |
last_ten_years=False, | |
) -> tuple: | |
## pick some instruments | |
instrument_list = list(final_corr_matrix.columns) | |
subset_of_instruments = pick_a_subset_of_instruments(n, instrument_list) | |
subset_corr_matrix = final_corr_matrix.subset(subset_of_instruments) | |
sr, idm = get_SR_and_IDM_for_subset_of_instruments( | |
subset_of_instruments, | |
subset_corr_matrix=subset_corr_matrix, | |
all_curves=all_curves, | |
use_equal_weights=use_equal_weights, | |
last_ten_years=last_ten_years, | |
) | |
return sr, idm | |
def pick_a_subset_of_instruments(n, instrument_list: list) -> list: | |
remaining_instrument_list = copy(instrument_list) | |
chosen_instruments = [] | |
for __ in range(n): | |
next_instrument = pick_a_random_instrument(remaining_instrument_list) | |
remaining_instrument_list.remove(next_instrument) | |
chosen_instruments.append(next_instrument) | |
return chosen_instruments | |
def pick_a_random_instrument(remaining_instrument_list) -> str: | |
length = len(remaining_instrument_list) | |
idx = random_int_between_0_and_nmax(length - 1) | |
return remaining_instrument_list[idx] | |
def random_int_between_0_and_nmax(nmax) -> int: | |
return int(np.round(uniform(-0.49, nmax + 0.49))) | |
def get_SR_and_IDM_for_subset_of_instruments( | |
subset_of_instruments: list, | |
subset_corr_matrix: correlationEstimate, | |
all_curves: dict, | |
use_equal_weights=True, | |
last_ten_years=False, | |
) -> tuple: | |
instrument_weights = get_instrument_weights_for_subset( | |
subset_of_instruments, subset_corr_matrix, use_equal_weights=use_equal_weights | |
) | |
sr = get_SR_for_subset_given_instrument_weights( | |
instrument_weights=instrument_weights, | |
all_curves=all_curves, | |
subset_of_instruments=subset_of_instruments, | |
last_ten_years=last_ten_years, | |
) | |
idm = diversification_mult_single_period( | |
corrmatrix=subset_corr_matrix, weights=instrument_weights, dm_max=99999 | |
) | |
return sr, idm | |
def get_instrument_weights_for_subset( | |
subset_of_instruments: list, | |
subset_corr_matrix: correlationEstimate, | |
use_equal_weights=False, | |
) -> portfolioWeights: | |
weights_as_list = get_instrument_weights_for_subset_as_list( | |
subset_of_instruments, subset_corr_matrix, use_equal_weights=use_equal_weights | |
) | |
return portfolioWeights.from_weights_and_keys( | |
list_of_weights=weights_as_list, list_of_keys=subset_of_instruments | |
) | |
def get_instrument_weights_for_subset_as_list( | |
subset_of_instruments: list, subset_corr_matrix, use_equal_weights=True | |
) -> list: | |
instrument_count = len(subset_of_instruments) | |
if use_equal_weights or instrument_count < 3: | |
return [1.0 / instrument_count] * instrument_count | |
subset_corr_floored = subset_corr_matrix.floor_correlation_matrix(0) | |
subset_corr_shrunk = subset_corr_floored.shrink_to_average(0.5) | |
## equal vol can treat correlation as covariance | |
weights = optimise_with_sigma(subset_corr_shrunk.values) | |
return weights | |
from scipy.optimize import minimize | |
def optimise_with_sigma(sigma: np.array): | |
number_assets = sigma.shape[1] | |
# Starting weights, equal weighting | |
start_weights = [1.0 / number_assets] * number_assets | |
# Set up constraints - positive weights, adding to 1.0 | |
bounds = [(0.0, 1.0)] * number_assets | |
cdict = [{"type": "eq", "fun": addem}] | |
ans = minimize( | |
variance, | |
start_weights, | |
(sigma,), | |
method="SLSQP", | |
bounds=bounds, | |
constraints=cdict, | |
tol=0.001, | |
) | |
weights = ans["x"] | |
return weights | |
def variance(weights, sigma): | |
# returns the variance (NOT standard deviation) given weights and sigma | |
return weights.dot(sigma.dot(weights.transpose())) | |
def addem(weights): | |
# Used for constraints, weights must sum to 1 | |
return 1.0 - sum(weights) | |
import pandas as pd | |
def get_SR_for_subset_given_instrument_weights( | |
instrument_weights: portfolioWeights, | |
all_curves: dict, | |
subset_of_instruments: list, | |
last_ten_years=False, | |
): | |
df_of_curves = pd.DataFrame( | |
dict( | |
[ | |
(instrument_code, all_curves[instrument_code]) | |
for instrument_code in subset_of_instruments | |
] | |
) | |
) | |
weights_as_df = pd.concat( | |
[ | |
pd.Series(weight, index=df_of_curves.index) | |
for weight in instrument_weights.values() | |
], | |
axis=1, | |
) | |
weights_as_df.columns = list(instrument_weights.keys()) | |
weighted_returns = df_of_curves * weights_as_df | |
portfolio_returns = weighted_returns.sum(axis=1) | |
if last_ten_years: | |
portfolio_returns = portfolio_returns[-2500:] | |
portfolio_returns_mean = portfolio_returns.mean() | |
portfolio_returns_stdev = portfolio_returns.std() | |
return 16 * portfolio_returns_mean / portfolio_returns_stdev | |
from syscore.interactive.progress_bar import progressBar | |
range_to_do = list(range(1, 5)) + list( | |
range(5, len(long_only_system.get_instrument_list()), 5) | |
) | |
max_monte_count = 1000 | |
def effective_monte_count(n, max_monte_count): | |
return int(max_monte_count / (n**0.5)) | |
def get_sr_and_idm_for_various_n( | |
corr_matrix: correlationEstimate, | |
all_curves: dict, | |
use_equal_weights=True, | |
last_ten_years=False, | |
): | |
sharpe_ratio_results = dict() | |
idm_results = dict() | |
p = progressBar(len(range_to_do)) | |
for n in range_to_do: | |
sr_this_n = [] | |
idm_this_n = [] | |
## do max runs to begin with, then | |
use_monte_count = effective_monte_count(n, max_monte_count) | |
for __ in range(use_monte_count): | |
sr, idm = get_SR_and_IDM_for_a_random_set_of_n( | |
n=n, | |
final_corr_matrix=corr_matrix, | |
all_curves=all_curves, | |
use_equal_weights=use_equal_weights, | |
last_ten_years=last_ten_years, | |
) | |
sr_this_n.append(sr) | |
idm_this_n.append(idm) | |
sharpe_ratio_results[n] = sr_this_n | |
idm_results[n] = idm_this_n | |
p.iterate() | |
p.close() | |
return sharpe_ratio_results, idm_results | |
def plot_dict_as_df( | |
dict_of_results, plot_points=True, divide_by_first_mean=False, square_results=False | |
): | |
results_df = return_df_padded_from_dict(dict_of_results) | |
medians = results_df.median(axis=1) | |
if divide_by_first_mean: | |
first_median = medians.values[0] | |
results_df = results_df / first_median | |
medians = medians / first_median | |
if plot_points: | |
results_df.plot(marker=".", linestyle="none", legend=False) | |
medians.plot() | |
def return_df_padded_from_dict(dict_of_results: dict) -> pd.DataFrame: | |
pad_to = np.max([len(x) for x in dict_of_results.values()]) | |
padded_dict = dict( | |
[ | |
(key, pad_list(some_list, pad_to=pad_to)) | |
for key, some_list in dict_of_results.items() | |
] | |
) | |
results_df = pd.DataFrame(padded_dict) | |
return results_df.transpose() | |
def pad_list(some_list, pad_to: int = 1000): | |
missing_len = pad_to - len(some_list) | |
padding = [np.nan] * missing_len | |
return some_list + padding | |
## first long only, actually very slow trend | |
ewmac_system = basic_db_futures_system( | |
trading_rules=dict( | |
ewmac=TradingRule( | |
dict( | |
function=ewmac, | |
data=["data.daily_prices", "rawdata.daily_returns_volatility"], | |
other_args=dict(Lfast=16, Lslow=64), | |
) | |
), | |
), | |
) | |
# forecast scalars | |
ewmac_system.get_instrument_list(force_to_passed_list=MASTER_INSTRUMENT_LIST) | |
ewmac_system.config.base_currency = "USD" | |
ewmac_system.config.forecast_scalars = dict(ewmac=3.75) | |
ewmac_system.config.notional_trading_capital = 100e6 | |
## Want to store account curves for each subsystem | |
ewmac_all_curves = dict( | |
[ | |
( | |
instrument_code, | |
ewmac_system.accounts.pandl_for_subsystem(instrument_code).percent, | |
) | |
for instrument_code in ewmac_system.get_instrument_list() | |
] | |
) | |
## and the correlation matrix | |
stacked_curves = pd.concat(ewmac_all_curves, axis=1) | |
stacked_curves.columns = ewmac_system.get_instrument_list() | |
ewmac_corr_matrix = correlationEstimate.from_pd(stacked_curves.corr()) | |
## average correlation is 0.0813: | |
ewmac_corr_matrix.average_corr() | |
long_sr, long_idm = get_sr_and_idm_for_various_n( | |
long_only_corr_matrix, | |
all_curves=all_curves, | |
use_equal_weights=False, | |
last_ten_years=last_ten_years, | |
) | |
long_sr_ew, long_idm_ew = get_sr_and_idm_for_various_n( | |
long_only_corr_matrix, | |
all_curves=all_curves, | |
use_equal_weights=True, | |
) | |
ewmac_sr, ewmac_idm = get_sr_and_idm_for_various_n( | |
ewmac_corr_matrix, | |
all_curves=ewmac_all_curves, | |
use_equal_weights=False, | |
) | |
ewmac_sr_ew, ewmac_idm_ew = get_sr_and_idm_for_various_n( | |
ewmac_corr_matrix, | |
all_curves=ewmac_all_curves, | |
use_equal_weights=True, | |
) | |
## this sort of thing | |
plot_dict_as_df(ewmac_sr, divide_by_first_mean=True, plot_points=False) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment