Skip to content

Instantly share code, notes, and snippets.

@robcarver17
Created February 28, 2023 16:36
Embed
What would you like to do?
### first let's build two strats, one long only, one EWMAC
import matplotlib
matplotlib.use("TkAgg")
matplotlib.rcParams.update({"font.size": 22})
import numpy as np
import pandas as pd
from random import uniform
from sysquant.estimators.correlations import (
correlationEstimate,
create_boring_corr_matrix,
)
from sysquant.optimisation.weights import portfolioWeights
from systems.provided.basic.system import basic_db_futures_system
from systems.trading_rules import TradingRule
from systems.provided.rules.ewmac import ewmac
from private.projects.futures_book.generate_instrument_list import (
MASTER_INSTRUMENT_LIST,
)
from copy import copy
from sysquant.estimators.diversification_multipliers import (
diversification_mult_single_period,
)
## simple examples calculate
corr = correlationEstimate(
np.array([[1, 0.5, 0.3], [0.5, 1, 0.1], [0.3, 0.1, 1]]), columns=["a", "b", "c"]
)
corr.average_corr()
weights = portfolioWeights(dict(a=0.45, b=0.45, c=0.1))
## independent bets
diversification_mult_single_period(corrmatrix=corr, weights=weights) ** 3
def get_independent_bets_for_length_n_and_avg_corr(n: int, avg_corr: float):
corr = create_boring_corr_matrix(size=n, offdiag=avg_corr, columns=list(range(n)))
weights = portfolioWeights.from_weights_and_keys(
list_of_weights=[1.0 / n] * n, list_of_keys=corr.list_of_keys()
)
return diversification_mult_single_period(corrmatrix=corr, weights=weights) ** 2
def long_bias(price):
"""
:param price: The price or other series to use (assumed Tx1)
:type price: pd.DataFrame
:returns: pd.Series -- unscaled, uncapped forecast
"""
forecast = 10.0
forecast_ts = copy(price)
forecast_ts[:] = forecast
return forecast_ts
## first long only, actually very slow trend
long_only_system = basic_db_futures_system(
trading_rules=dict(
long_only=TradingRule(dict(function=long_bias, data=["data.daily_prices"])),
),
)
long_only_system.get_instrument_list(force_to_passed_list=MASTER_INSTRUMENT_LIST)
long_only_system.config.base_currency = "USD"
long_only_system.config.notional_trading_capital = 100e6
## Want to store account curves for each subsystem
all_curves = dict(
[
(
instrument_code,
long_only_system.accounts.pandl_for_subsystem(instrument_code).percent,
)
for instrument_code in long_only_system.get_instrument_list()
]
)
## and the correlation matrix for subsystems
stacked_curves = pd.concat(all_curves, axis=1)
stacked_curves.columns = long_only_system.get_instrument_list()
long_only_corr_matrix = correlationEstimate.from_pd(stacked_curves.corr())
## average correlation is 0.0904:
long_only_corr_matrix.average_corr()
## for instrument returns is 0.224
long_only_system.portfolio.get_correlation_matrix().average_corr()
def get_SR_and_IDM_for_a_random_set_of_n(
n: int,
final_corr_matrix,
all_curves: dict,
use_equal_weights=True,
last_ten_years=False,
) -> tuple:
## pick some instruments
instrument_list = list(final_corr_matrix.columns)
subset_of_instruments = pick_a_subset_of_instruments(n, instrument_list)
subset_corr_matrix = final_corr_matrix.subset(subset_of_instruments)
sr, idm = get_SR_and_IDM_for_subset_of_instruments(
subset_of_instruments,
subset_corr_matrix=subset_corr_matrix,
all_curves=all_curves,
use_equal_weights=use_equal_weights,
last_ten_years=last_ten_years,
)
return sr, idm
def pick_a_subset_of_instruments(n, instrument_list: list) -> list:
remaining_instrument_list = copy(instrument_list)
chosen_instruments = []
for __ in range(n):
next_instrument = pick_a_random_instrument(remaining_instrument_list)
remaining_instrument_list.remove(next_instrument)
chosen_instruments.append(next_instrument)
return chosen_instruments
def pick_a_random_instrument(remaining_instrument_list) -> str:
length = len(remaining_instrument_list)
idx = random_int_between_0_and_nmax(length - 1)
return remaining_instrument_list[idx]
def random_int_between_0_and_nmax(nmax) -> int:
return int(np.round(uniform(-0.49, nmax + 0.49)))
def get_SR_and_IDM_for_subset_of_instruments(
subset_of_instruments: list,
subset_corr_matrix: correlationEstimate,
all_curves: dict,
use_equal_weights=True,
last_ten_years=False,
) -> tuple:
instrument_weights = get_instrument_weights_for_subset(
subset_of_instruments, subset_corr_matrix, use_equal_weights=use_equal_weights
)
sr = get_SR_for_subset_given_instrument_weights(
instrument_weights=instrument_weights,
all_curves=all_curves,
subset_of_instruments=subset_of_instruments,
last_ten_years=last_ten_years,
)
idm = diversification_mult_single_period(
corrmatrix=subset_corr_matrix, weights=instrument_weights, dm_max=99999
)
return sr, idm
def get_instrument_weights_for_subset(
subset_of_instruments: list,
subset_corr_matrix: correlationEstimate,
use_equal_weights=False,
) -> portfolioWeights:
weights_as_list = get_instrument_weights_for_subset_as_list(
subset_of_instruments, subset_corr_matrix, use_equal_weights=use_equal_weights
)
return portfolioWeights.from_weights_and_keys(
list_of_weights=weights_as_list, list_of_keys=subset_of_instruments
)
def get_instrument_weights_for_subset_as_list(
subset_of_instruments: list, subset_corr_matrix, use_equal_weights=True
) -> list:
instrument_count = len(subset_of_instruments)
if use_equal_weights or instrument_count < 3:
return [1.0 / instrument_count] * instrument_count
subset_corr_floored = subset_corr_matrix.floor_correlation_matrix(0)
subset_corr_shrunk = subset_corr_floored.shrink_to_average(0.5)
## equal vol can treat correlation as covariance
weights = optimise_with_sigma(subset_corr_shrunk.values)
return weights
from scipy.optimize import minimize
def optimise_with_sigma(sigma: np.array):
number_assets = sigma.shape[1]
# Starting weights, equal weighting
start_weights = [1.0 / number_assets] * number_assets
# Set up constraints - positive weights, adding to 1.0
bounds = [(0.0, 1.0)] * number_assets
cdict = [{"type": "eq", "fun": addem}]
ans = minimize(
variance,
start_weights,
(sigma,),
method="SLSQP",
bounds=bounds,
constraints=cdict,
tol=0.001,
)
weights = ans["x"]
return weights
def variance(weights, sigma):
# returns the variance (NOT standard deviation) given weights and sigma
return weights.dot(sigma.dot(weights.transpose()))
def addem(weights):
# Used for constraints, weights must sum to 1
return 1.0 - sum(weights)
import pandas as pd
def get_SR_for_subset_given_instrument_weights(
instrument_weights: portfolioWeights,
all_curves: dict,
subset_of_instruments: list,
last_ten_years=False,
):
df_of_curves = pd.DataFrame(
dict(
[
(instrument_code, all_curves[instrument_code])
for instrument_code in subset_of_instruments
]
)
)
weights_as_df = pd.concat(
[
pd.Series(weight, index=df_of_curves.index)
for weight in instrument_weights.values()
],
axis=1,
)
weights_as_df.columns = list(instrument_weights.keys())
weighted_returns = df_of_curves * weights_as_df
portfolio_returns = weighted_returns.sum(axis=1)
if last_ten_years:
portfolio_returns = portfolio_returns[-2500:]
portfolio_returns_mean = portfolio_returns.mean()
portfolio_returns_stdev = portfolio_returns.std()
return 16 * portfolio_returns_mean / portfolio_returns_stdev
from syscore.interactive.progress_bar import progressBar
range_to_do = list(range(1, 5)) + list(
range(5, len(long_only_system.get_instrument_list()), 5)
)
max_monte_count = 1000
def effective_monte_count(n, max_monte_count):
return int(max_monte_count / (n**0.5))
def get_sr_and_idm_for_various_n(
corr_matrix: correlationEstimate,
all_curves: dict,
use_equal_weights=True,
last_ten_years=False,
):
sharpe_ratio_results = dict()
idm_results = dict()
p = progressBar(len(range_to_do))
for n in range_to_do:
sr_this_n = []
idm_this_n = []
## do max runs to begin with, then
use_monte_count = effective_monte_count(n, max_monte_count)
for __ in range(use_monte_count):
sr, idm = get_SR_and_IDM_for_a_random_set_of_n(
n=n,
final_corr_matrix=corr_matrix,
all_curves=all_curves,
use_equal_weights=use_equal_weights,
last_ten_years=last_ten_years,
)
sr_this_n.append(sr)
idm_this_n.append(idm)
sharpe_ratio_results[n] = sr_this_n
idm_results[n] = idm_this_n
p.iterate()
p.close()
return sharpe_ratio_results, idm_results
def plot_dict_as_df(
dict_of_results, plot_points=True, divide_by_first_mean=False, square_results=False
):
results_df = return_df_padded_from_dict(dict_of_results)
medians = results_df.median(axis=1)
if divide_by_first_mean:
first_median = medians.values[0]
results_df = results_df / first_median
medians = medians / first_median
if plot_points:
results_df.plot(marker=".", linestyle="none", legend=False)
medians.plot()
def return_df_padded_from_dict(dict_of_results: dict) -> pd.DataFrame:
pad_to = np.max([len(x) for x in dict_of_results.values()])
padded_dict = dict(
[
(key, pad_list(some_list, pad_to=pad_to))
for key, some_list in dict_of_results.items()
]
)
results_df = pd.DataFrame(padded_dict)
return results_df.transpose()
def pad_list(some_list, pad_to: int = 1000):
missing_len = pad_to - len(some_list)
padding = [np.nan] * missing_len
return some_list + padding
## first long only, actually very slow trend
ewmac_system = basic_db_futures_system(
trading_rules=dict(
ewmac=TradingRule(
dict(
function=ewmac,
data=["data.daily_prices", "rawdata.daily_returns_volatility"],
other_args=dict(Lfast=16, Lslow=64),
)
),
),
)
# forecast scalars
ewmac_system.get_instrument_list(force_to_passed_list=MASTER_INSTRUMENT_LIST)
ewmac_system.config.base_currency = "USD"
ewmac_system.config.forecast_scalars = dict(ewmac=3.75)
ewmac_system.config.notional_trading_capital = 100e6
## Want to store account curves for each subsystem
ewmac_all_curves = dict(
[
(
instrument_code,
ewmac_system.accounts.pandl_for_subsystem(instrument_code).percent,
)
for instrument_code in ewmac_system.get_instrument_list()
]
)
## and the correlation matrix
stacked_curves = pd.concat(ewmac_all_curves, axis=1)
stacked_curves.columns = ewmac_system.get_instrument_list()
ewmac_corr_matrix = correlationEstimate.from_pd(stacked_curves.corr())
## average correlation is 0.0813:
ewmac_corr_matrix.average_corr()
long_sr, long_idm = get_sr_and_idm_for_various_n(
long_only_corr_matrix,
all_curves=all_curves,
use_equal_weights=False,
last_ten_years=last_ten_years,
)
long_sr_ew, long_idm_ew = get_sr_and_idm_for_various_n(
long_only_corr_matrix,
all_curves=all_curves,
use_equal_weights=True,
)
ewmac_sr, ewmac_idm = get_sr_and_idm_for_various_n(
ewmac_corr_matrix,
all_curves=ewmac_all_curves,
use_equal_weights=False,
)
ewmac_sr_ew, ewmac_idm_ew = get_sr_and_idm_for_various_n(
ewmac_corr_matrix,
all_curves=ewmac_all_curves,
use_equal_weights=True,
)
## this sort of thing
plot_dict_as_df(ewmac_sr, divide_by_first_mean=True, plot_points=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment