 ### first let's build two strats, one long only, one EWMAC import matplotlib matplotlib.use("TkAgg") matplotlib.rcParams.update({"font.size": 22}) import numpy as np import pandas as pd from random import uniform from sysquant.estimators.correlations import ( correlationEstimate, create_boring_corr_matrix, ) from sysquant.optimisation.weights import portfolioWeights from systems.provided.basic.system import basic_db_futures_system from systems.trading_rules import TradingRule from systems.provided.rules.ewmac import ewmac from private.projects.futures_book.generate_instrument_list import ( MASTER_INSTRUMENT_LIST, ) from copy import copy from sysquant.estimators.diversification_multipliers import ( diversification_mult_single_period, ) ## simple examples calculate corr = correlationEstimate( np.array([[1, 0.5, 0.3], [0.5, 1, 0.1], [0.3, 0.1, 1]]), columns=["a", "b", "c"] ) corr.average_corr() weights = portfolioWeights(dict(a=0.45, b=0.45, c=0.1)) ## independent bets diversification_mult_single_period(corrmatrix=corr, weights=weights) ** 3 def get_independent_bets_for_length_n_and_avg_corr(n: int, avg_corr: float): corr = create_boring_corr_matrix(size=n, offdiag=avg_corr, columns=list(range(n))) weights = portfolioWeights.from_weights_and_keys( list_of_weights=[1.0 / n] * n, list_of_keys=corr.list_of_keys() ) return diversification_mult_single_period(corrmatrix=corr, weights=weights) ** 2 def long_bias(price): """ :param price: The price or other series to use (assumed Tx1) :type price: pd.DataFrame :returns: pd.Series -- unscaled, uncapped forecast """ forecast = 10.0 forecast_ts = copy(price) forecast_ts[:] = forecast return forecast_ts ## first long only, actually very slow trend long_only_system = basic_db_futures_system( trading_rules=dict( long_only=TradingRule(dict(function=long_bias, data=["data.daily_prices"])), ), ) long_only_system.get_instrument_list(force_to_passed_list=MASTER_INSTRUMENT_LIST) long_only_system.config.base_currency = "USD" long_only_system.config.notional_trading_capital = 100e6 ## Want to store account curves for each subsystem all_curves = dict( [ ( instrument_code, long_only_system.accounts.pandl_for_subsystem(instrument_code).percent, ) for instrument_code in long_only_system.get_instrument_list() ] ) ## and the correlation matrix for subsystems stacked_curves = pd.concat(all_curves, axis=1) stacked_curves.columns = long_only_system.get_instrument_list() long_only_corr_matrix = correlationEstimate.from_pd(stacked_curves.corr()) ## average correlation is 0.0904: long_only_corr_matrix.average_corr() ## for instrument returns is 0.224 long_only_system.portfolio.get_correlation_matrix().average_corr() def get_SR_and_IDM_for_a_random_set_of_n( n: int, final_corr_matrix, all_curves: dict, use_equal_weights=True, last_ten_years=False, ) -> tuple: ## pick some instruments instrument_list = list(final_corr_matrix.columns) subset_of_instruments = pick_a_subset_of_instruments(n, instrument_list) subset_corr_matrix = final_corr_matrix.subset(subset_of_instruments) sr, idm = get_SR_and_IDM_for_subset_of_instruments( subset_of_instruments, subset_corr_matrix=subset_corr_matrix, all_curves=all_curves, use_equal_weights=use_equal_weights, last_ten_years=last_ten_years, ) return sr, idm def pick_a_subset_of_instruments(n, instrument_list: list) -> list: remaining_instrument_list = copy(instrument_list) chosen_instruments = [] for __ in range(n): next_instrument = pick_a_random_instrument(remaining_instrument_list) remaining_instrument_list.remove(next_instrument) chosen_instruments.append(next_instrument) return chosen_instruments def pick_a_random_instrument(remaining_instrument_list) -> str: length = len(remaining_instrument_list) idx = random_int_between_0_and_nmax(length - 1) return remaining_instrument_list[idx] def random_int_between_0_and_nmax(nmax) -> int: return int(np.round(uniform(-0.49, nmax + 0.49))) def get_SR_and_IDM_for_subset_of_instruments( subset_of_instruments: list, subset_corr_matrix: correlationEstimate, all_curves: dict, use_equal_weights=True, last_ten_years=False, ) -> tuple: instrument_weights = get_instrument_weights_for_subset( subset_of_instruments, subset_corr_matrix, use_equal_weights=use_equal_weights ) sr = get_SR_for_subset_given_instrument_weights( instrument_weights=instrument_weights, all_curves=all_curves, subset_of_instruments=subset_of_instruments, last_ten_years=last_ten_years, ) idm = diversification_mult_single_period( corrmatrix=subset_corr_matrix, weights=instrument_weights, dm_max=99999 ) return sr, idm def get_instrument_weights_for_subset( subset_of_instruments: list, subset_corr_matrix: correlationEstimate, use_equal_weights=False, ) -> portfolioWeights: weights_as_list = get_instrument_weights_for_subset_as_list( subset_of_instruments, subset_corr_matrix, use_equal_weights=use_equal_weights ) return portfolioWeights.from_weights_and_keys( list_of_weights=weights_as_list, list_of_keys=subset_of_instruments ) def get_instrument_weights_for_subset_as_list( subset_of_instruments: list, subset_corr_matrix, use_equal_weights=True ) -> list: instrument_count = len(subset_of_instruments) if use_equal_weights or instrument_count < 3: return [1.0 / instrument_count] * instrument_count subset_corr_floored = subset_corr_matrix.floor_correlation_matrix(0) subset_corr_shrunk = subset_corr_floored.shrink_to_average(0.5) ## equal vol can treat correlation as covariance weights = optimise_with_sigma(subset_corr_shrunk.values) return weights from scipy.optimize import minimize def optimise_with_sigma(sigma: np.array): number_assets = sigma.shape[1] # Starting weights, equal weighting start_weights = [1.0 / number_assets] * number_assets # Set up constraints - positive weights, adding to 1.0 bounds = [(0.0, 1.0)] * number_assets cdict = [{"type": "eq", "fun": addem}] ans = minimize( variance, start_weights, (sigma,), method="SLSQP", bounds=bounds, constraints=cdict, tol=0.001, ) weights = ans["x"] return weights def variance(weights, sigma): # returns the variance (NOT standard deviation) given weights and sigma return weights.dot(sigma.dot(weights.transpose())) def addem(weights): # Used for constraints, weights must sum to 1 return 1.0 - sum(weights) import pandas as pd def get_SR_for_subset_given_instrument_weights( instrument_weights: portfolioWeights, all_curves: dict, subset_of_instruments: list, last_ten_years=False, ): df_of_curves = pd.DataFrame( dict( [ (instrument_code, all_curves[instrument_code]) for instrument_code in subset_of_instruments ] ) ) weights_as_df = pd.concat( [ pd.Series(weight, index=df_of_curves.index) for weight in instrument_weights.values() ], axis=1, ) weights_as_df.columns = list(instrument_weights.keys()) weighted_returns = df_of_curves * weights_as_df portfolio_returns = weighted_returns.sum(axis=1) if last_ten_years: portfolio_returns = portfolio_returns[-2500:] portfolio_returns_mean = portfolio_returns.mean() portfolio_returns_stdev = portfolio_returns.std() return 16 * portfolio_returns_mean / portfolio_returns_stdev from syscore.interactive.progress_bar import progressBar range_to_do = list(range(1, 5)) + list( range(5, len(long_only_system.get_instrument_list()), 5) ) max_monte_count = 1000 def effective_monte_count(n, max_monte_count): return int(max_monte_count / (n**0.5)) def get_sr_and_idm_for_various_n( corr_matrix: correlationEstimate, all_curves: dict, use_equal_weights=True, last_ten_years=False, ): sharpe_ratio_results = dict() idm_results = dict() p = progressBar(len(range_to_do)) for n in range_to_do: sr_this_n = [] idm_this_n = [] ## do max runs to begin with, then use_monte_count = effective_monte_count(n, max_monte_count) for __ in range(use_monte_count): sr, idm = get_SR_and_IDM_for_a_random_set_of_n( n=n, final_corr_matrix=corr_matrix, all_curves=all_curves, use_equal_weights=use_equal_weights, last_ten_years=last_ten_years, ) sr_this_n.append(sr) idm_this_n.append(idm) sharpe_ratio_results[n] = sr_this_n idm_results[n] = idm_this_n p.iterate() p.close() return sharpe_ratio_results, idm_results def plot_dict_as_df( dict_of_results, plot_points=True, divide_by_first_mean=False, square_results=False ): results_df = return_df_padded_from_dict(dict_of_results) medians = results_df.median(axis=1) if divide_by_first_mean: first_median = medians.values[0] results_df = results_df / first_median medians = medians / first_median if plot_points: results_df.plot(marker=".", linestyle="none", legend=False) medians.plot() def return_df_padded_from_dict(dict_of_results: dict) -> pd.DataFrame: pad_to = np.max([len(x) for x in dict_of_results.values()]) padded_dict = dict( [ (key, pad_list(some_list, pad_to=pad_to)) for key, some_list in dict_of_results.items() ] ) results_df = pd.DataFrame(padded_dict) return results_df.transpose() def pad_list(some_list, pad_to: int = 1000): missing_len = pad_to - len(some_list) padding = [np.nan] * missing_len return some_list + padding ## first long only, actually very slow trend ewmac_system = basic_db_futures_system( trading_rules=dict( ewmac=TradingRule( dict( function=ewmac, data=["data.daily_prices", "rawdata.daily_returns_volatility"], other_args=dict(Lfast=16, Lslow=64), ) ), ), ) # forecast scalars ewmac_system.get_instrument_list(force_to_passed_list=MASTER_INSTRUMENT_LIST) ewmac_system.config.base_currency = "USD" ewmac_system.config.forecast_scalars = dict(ewmac=3.75) ewmac_system.config.notional_trading_capital = 100e6 ## Want to store account curves for each subsystem ewmac_all_curves = dict( [ ( instrument_code, ewmac_system.accounts.pandl_for_subsystem(instrument_code).percent, ) for instrument_code in ewmac_system.get_instrument_list() ] ) ## and the correlation matrix stacked_curves = pd.concat(ewmac_all_curves, axis=1) stacked_curves.columns = ewmac_system.get_instrument_list() ewmac_corr_matrix = correlationEstimate.from_pd(stacked_curves.corr()) ## average correlation is 0.0813: ewmac_corr_matrix.average_corr() long_sr, long_idm = get_sr_and_idm_for_various_n( long_only_corr_matrix, all_curves=all_curves, use_equal_weights=False, last_ten_years=last_ten_years, ) long_sr_ew, long_idm_ew = get_sr_and_idm_for_various_n( long_only_corr_matrix, all_curves=all_curves, use_equal_weights=True, ) ewmac_sr, ewmac_idm = get_sr_and_idm_for_various_n( ewmac_corr_matrix, all_curves=ewmac_all_curves, use_equal_weights=False, ) ewmac_sr_ew, ewmac_idm_ew = get_sr_and_idm_for_various_n( ewmac_corr_matrix, all_curves=ewmac_all_curves, use_equal_weights=True, ) ## this sort of thing plot_dict_as_df(ewmac_sr, divide_by_first_mean=True, plot_points=False)
