Skip to content

Instantly share code, notes, and snippets.

robcarver17/march2023_independent_bets.py Secret

Created February 28, 2023 16:36
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode characters
 ### first let's build two strats, one long only, one EWMAC import matplotlib matplotlib.use("TkAgg") matplotlib.rcParams.update({"font.size": 22}) import numpy as np import pandas as pd from random import uniform from sysquant.estimators.correlations import ( correlationEstimate, create_boring_corr_matrix, ) from sysquant.optimisation.weights import portfolioWeights from systems.provided.basic.system import basic_db_futures_system from systems.trading_rules import TradingRule from systems.provided.rules.ewmac import ewmac from private.projects.futures_book.generate_instrument_list import ( MASTER_INSTRUMENT_LIST, ) from copy import copy from sysquant.estimators.diversification_multipliers import ( diversification_mult_single_period, ) ## simple examples calculate corr = correlationEstimate( np.array([[1, 0.5, 0.3], [0.5, 1, 0.1], [0.3, 0.1, 1]]), columns=["a", "b", "c"] ) corr.average_corr() weights = portfolioWeights(dict(a=0.45, b=0.45, c=0.1)) ## independent bets diversification_mult_single_period(corrmatrix=corr, weights=weights) ** 3 def get_independent_bets_for_length_n_and_avg_corr(n: int, avg_corr: float): corr = create_boring_corr_matrix(size=n, offdiag=avg_corr, columns=list(range(n))) weights = portfolioWeights.from_weights_and_keys( list_of_weights=[1.0 / n] * n, list_of_keys=corr.list_of_keys() ) return diversification_mult_single_period(corrmatrix=corr, weights=weights) ** 2 def long_bias(price): """ :param price: The price or other series to use (assumed Tx1) :type price: pd.DataFrame :returns: pd.Series -- unscaled, uncapped forecast """ forecast = 10.0 forecast_ts = copy(price) forecast_ts[:] = forecast return forecast_ts ## first long only, actually very slow trend long_only_system = basic_db_futures_system( trading_rules=dict( long_only=TradingRule(dict(function=long_bias, data=["data.daily_prices"])), ), ) long_only_system.get_instrument_list(force_to_passed_list=MASTER_INSTRUMENT_LIST) long_only_system.config.base_currency = "USD" long_only_system.config.notional_trading_capital = 100e6 ## Want to store account curves for each subsystem all_curves = dict( [ ( instrument_code, long_only_system.accounts.pandl_for_subsystem(instrument_code).percent, ) for instrument_code in long_only_system.get_instrument_list() ] ) ## and the correlation matrix for subsystems stacked_curves = pd.concat(all_curves, axis=1) stacked_curves.columns = long_only_system.get_instrument_list() long_only_corr_matrix = correlationEstimate.from_pd(stacked_curves.corr()) ## average correlation is 0.0904: long_only_corr_matrix.average_corr() ## for instrument returns is 0.224 long_only_system.portfolio.get_correlation_matrix().average_corr() def get_SR_and_IDM_for_a_random_set_of_n( n: int, final_corr_matrix, all_curves: dict, use_equal_weights=True, last_ten_years=False, ) -> tuple: ## pick some instruments instrument_list = list(final_corr_matrix.columns) subset_of_instruments = pick_a_subset_of_instruments(n, instrument_list) subset_corr_matrix = final_corr_matrix.subset(subset_of_instruments) sr, idm = get_SR_and_IDM_for_subset_of_instruments( subset_of_instruments, subset_corr_matrix=subset_corr_matrix, all_curves=all_curves, use_equal_weights=use_equal_weights, last_ten_years=last_ten_years, ) return sr, idm def pick_a_subset_of_instruments(n, instrument_list: list) -> list: remaining_instrument_list = copy(instrument_list) chosen_instruments = [] for __ in range(n): next_instrument = pick_a_random_instrument(remaining_instrument_list) remaining_instrument_list.remove(next_instrument) chosen_instruments.append(next_instrument) return chosen_instruments def pick_a_random_instrument(remaining_instrument_list) -> str: length = len(remaining_instrument_list) idx = random_int_between_0_and_nmax(length - 1) return remaining_instrument_list[idx] def random_int_between_0_and_nmax(nmax) -> int: return int(np.round(uniform(-0.49, nmax + 0.49))) def get_SR_and_IDM_for_subset_of_instruments( subset_of_instruments: list, subset_corr_matrix: correlationEstimate, all_curves: dict, use_equal_weights=True, last_ten_years=False, ) -> tuple: instrument_weights = get_instrument_weights_for_subset( subset_of_instruments, subset_corr_matrix, use_equal_weights=use_equal_weights ) sr = get_SR_for_subset_given_instrument_weights( instrument_weights=instrument_weights, all_curves=all_curves, subset_of_instruments=subset_of_instruments, last_ten_years=last_ten_years, ) idm = diversification_mult_single_period( corrmatrix=subset_corr_matrix, weights=instrument_weights, dm_max=99999 ) return sr, idm def get_instrument_weights_for_subset( subset_of_instruments: list, subset_corr_matrix: correlationEstimate, use_equal_weights=False, ) -> portfolioWeights: weights_as_list = get_instrument_weights_for_subset_as_list( subset_of_instruments, subset_corr_matrix, use_equal_weights=use_equal_weights ) return portfolioWeights.from_weights_and_keys( list_of_weights=weights_as_list, list_of_keys=subset_of_instruments ) def get_instrument_weights_for_subset_as_list( subset_of_instruments: list, subset_corr_matrix, use_equal_weights=True ) -> list: instrument_count = len(subset_of_instruments) if use_equal_weights or instrument_count < 3: return [1.0 / instrument_count] * instrument_count subset_corr_floored = subset_corr_matrix.floor_correlation_matrix(0) subset_corr_shrunk = subset_corr_floored.shrink_to_average(0.5) ## equal vol can treat correlation as covariance weights = optimise_with_sigma(subset_corr_shrunk.values) return weights from scipy.optimize import minimize def optimise_with_sigma(sigma: np.array): number_assets = sigma.shape[1] # Starting weights, equal weighting start_weights = [1.0 / number_assets] * number_assets # Set up constraints - positive weights, adding to 1.0 bounds = [(0.0, 1.0)] * number_assets cdict = [{"type": "eq", "fun": addem}] ans = minimize( variance, start_weights, (sigma,), method="SLSQP", bounds=bounds, constraints=cdict, tol=0.001, ) weights = ans["x"] return weights def variance(weights, sigma): # returns the variance (NOT standard deviation) given weights and sigma return weights.dot(sigma.dot(weights.transpose())) def addem(weights): # Used for constraints, weights must sum to 1 return 1.0 - sum(weights) import pandas as pd def get_SR_for_subset_given_instrument_weights( instrument_weights: portfolioWeights, all_curves: dict, subset_of_instruments: list, last_ten_years=False, ): df_of_curves = pd.DataFrame( dict( [ (instrument_code, all_curves[instrument_code]) for instrument_code in subset_of_instruments ] ) ) weights_as_df = pd.concat( [ pd.Series(weight, index=df_of_curves.index) for weight in instrument_weights.values() ], axis=1, ) weights_as_df.columns = list(instrument_weights.keys()) weighted_returns = df_of_curves * weights_as_df portfolio_returns = weighted_returns.sum(axis=1) if last_ten_years: portfolio_returns = portfolio_returns[-2500:] portfolio_returns_mean = portfolio_returns.mean() portfolio_returns_stdev = portfolio_returns.std() return 16 * portfolio_returns_mean / portfolio_returns_stdev from syscore.interactive.progress_bar import progressBar range_to_do = list(range(1, 5)) + list( range(5, len(long_only_system.get_instrument_list()), 5) ) max_monte_count = 1000 def effective_monte_count(n, max_monte_count): return int(max_monte_count / (n**0.5)) def get_sr_and_idm_for_various_n( corr_matrix: correlationEstimate, all_curves: dict, use_equal_weights=True, last_ten_years=False, ): sharpe_ratio_results = dict() idm_results = dict() p = progressBar(len(range_to_do)) for n in range_to_do: sr_this_n = [] idm_this_n = [] ## do max runs to begin with, then use_monte_count = effective_monte_count(n, max_monte_count) for __ in range(use_monte_count): sr, idm = get_SR_and_IDM_for_a_random_set_of_n( n=n, final_corr_matrix=corr_matrix, all_curves=all_curves, use_equal_weights=use_equal_weights, last_ten_years=last_ten_years, ) sr_this_n.append(sr) idm_this_n.append(idm) sharpe_ratio_results[n] = sr_this_n idm_results[n] = idm_this_n p.iterate() p.close() return sharpe_ratio_results, idm_results def plot_dict_as_df( dict_of_results, plot_points=True, divide_by_first_mean=False, square_results=False ): results_df = return_df_padded_from_dict(dict_of_results) medians = results_df.median(axis=1) if divide_by_first_mean: first_median = medians.values[0] results_df = results_df / first_median medians = medians / first_median if plot_points: results_df.plot(marker=".", linestyle="none", legend=False) medians.plot() def return_df_padded_from_dict(dict_of_results: dict) -> pd.DataFrame: pad_to = np.max([len(x) for x in dict_of_results.values()]) padded_dict = dict( [ (key, pad_list(some_list, pad_to=pad_to)) for key, some_list in dict_of_results.items() ] ) results_df = pd.DataFrame(padded_dict) return results_df.transpose() def pad_list(some_list, pad_to: int = 1000): missing_len = pad_to - len(some_list) padding = [np.nan] * missing_len return some_list + padding ## first long only, actually very slow trend ewmac_system = basic_db_futures_system( trading_rules=dict( ewmac=TradingRule( dict( function=ewmac, data=["data.daily_prices", "rawdata.daily_returns_volatility"], other_args=dict(Lfast=16, Lslow=64), ) ), ), ) # forecast scalars ewmac_system.get_instrument_list(force_to_passed_list=MASTER_INSTRUMENT_LIST) ewmac_system.config.base_currency = "USD" ewmac_system.config.forecast_scalars = dict(ewmac=3.75) ewmac_system.config.notional_trading_capital = 100e6 ## Want to store account curves for each subsystem ewmac_all_curves = dict( [ ( instrument_code, ewmac_system.accounts.pandl_for_subsystem(instrument_code).percent, ) for instrument_code in ewmac_system.get_instrument_list() ] ) ## and the correlation matrix stacked_curves = pd.concat(ewmac_all_curves, axis=1) stacked_curves.columns = ewmac_system.get_instrument_list() ewmac_corr_matrix = correlationEstimate.from_pd(stacked_curves.corr()) ## average correlation is 0.0813: ewmac_corr_matrix.average_corr() long_sr, long_idm = get_sr_and_idm_for_various_n( long_only_corr_matrix, all_curves=all_curves, use_equal_weights=False, last_ten_years=last_ten_years, ) long_sr_ew, long_idm_ew = get_sr_and_idm_for_various_n( long_only_corr_matrix, all_curves=all_curves, use_equal_weights=True, ) ewmac_sr, ewmac_idm = get_sr_and_idm_for_various_n( ewmac_corr_matrix, all_curves=ewmac_all_curves, use_equal_weights=False, ) ewmac_sr_ew, ewmac_idm_ew = get_sr_and_idm_for_various_n( ewmac_corr_matrix, all_curves=ewmac_all_curves, use_equal_weights=True, ) ## this sort of thing plot_dict_as_df(ewmac_sr, divide_by_first_mean=True, plot_points=False)
to join this conversation on GitHub. Already have an account? Sign in to comment