rolandkofler/macds_beat_bnh.py

## macds_beat_bnh.py
import pandas as pd
import logging

# Configure logging
logging.basicConfig(level=logging.INFO,
                    format='%(asctime)s - %(levelname)s - %(message)s',
                    handlers=[logging.FileHandler('protocol.log'), logging.StreamHandler()])

# Load the data from disk
in_sample_data = pd.read_csv('in_sample_data.csv', index_col='date', parse_dates=True)
logging.info(in_sample_data.head())

# Calculate MACD
INITIAL_BALANCE = 10000


def calculate_macd(data, short_period=38, long_period=43, signal_period=3):
    short_ema = data['price'].ewm(span=short_period, adjust=False).mean()
    long_ema = data['price'].ewm(span=long_period, adjust=False).mean()
    data['MACD'] = short_ema - long_ema
    data['Signal_Line'] = data['MACD'].ewm(span=signal_period, adjust=False).mean()


def simulate_trading_with_fee(data, initial_balance=INITIAL_BALANCE, trading_fee_percent=0.26):
    balance = initial_balance
    peak_balance = initial_balance  # Track the highest balance achieved
    max_drawdown = 0  # Maximum percentage drawdown from the peak
    position = 0  # Amount of cryptocurrency held

    for i in range(1, len(data)):
        if data['MACD'].iloc[i] > data['Signal_Line'].iloc[i] and position == 0:  # Buy signal

            # Update position to account for the fee
            position = balance / (data['price'].iloc[i] * (1 + trading_fee_percent / 100))
            balance = 0  # All balance is used to buy, considering the fee
        elif data['MACD'].iloc[i] < data['Signal_Line'].iloc[i] and position > 0:  # Sell signal
            # Calculate the revenue after subtracting the trading fee
            revenue = position * data['price'].iloc[i] * (1 - trading_fee_percent / 100)
            balance = revenue
            position = 0  # Position is sold

            # Update peak balance and drawdown if necessary
            if balance > peak_balance:
                peak_balance = balance
            else:
                drawdown = (peak_balance - balance) / peak_balance
                max_drawdown = max(max_drawdown, drawdown)

    # Calculate the final balance if any position is left unsold
    if position > 0:
        balance = position * data['price'].iloc[-1] * (1 - trading_fee_percent / 100)

        # Final update for peak balance and drawdown
        if balance > peak_balance:
            peak_balance = balance
        else:
            drawdown = (peak_balance - balance) / peak_balance
            max_drawdown = max(max_drawdown, drawdown)

    return balance, max_drawdown

# Calculate maximum drawdown
def calculate_max_drawdown(data):
    rolling_max = data['price'].cummax()
    daily_drawdown = data['price'] / rolling_max - 1.0
    max_drawdown = daily_drawdown.cummin().iloc[-1]
    assert max_drawdown <= 0, 'Maximum drawdown should be negative'
    return max_drawdown * -1  # Return as positive number as expected by calmar ratio

def simulate_trading_with_params(data, short_period, long_period, signal_period, initial_balance=10000):
    """
    Simulates trading with given MACD parameters and calculates performance and max drawdown.

    Parameters:
        data (pd.DataFrame): The market data.
        short_period (int): The short period for the MACD calculation.
        long_period (int): The long period for the MACD calculation.
        signal_period (int): The signal period for the MACD calculation.
        initial_balance (float): The initial trading balance.

    Returns:
        tuple: A tuple containing the performance percentage, max drawdown percentage, and final returns.
    """
    calculate_macd(data, short_period, long_period, signal_period)
    final_balance, max_drawdown = simulate_trading_with_fee(data)
    returns = (final_balance - initial_balance)
    performance = (returns / initial_balance)
    annualized_performance = performance / len(data) * 365

    # calculate the Calmar ratio
    calmar_ratio = annualized_performance / max_drawdown
    return {
        'Short Period': short_period,
        'Long Period': long_period,
        'Signal Period': signal_period,
        # 'Performance': performance,
        'Max Drawdown': max_drawdown,
        # 'Returns': returns,
        'Annualized Performance': annualized_performance,
        'Calmar Ratio': calmar_ratio,
    }

def calculate_buy_and_hold_stats(btc_data):
    buy_and_hold_final = (btc_data['price'].iloc[-1] / btc_data['price'].iloc[0]) * INITIAL_BALANCE
    buy_and_hold_performance = ((buy_and_hold_final - INITIAL_BALANCE) / INITIAL_BALANCE)
    buy_and_hold_max_drawdown = calculate_max_drawdown(btc_data)
    buy_and_hold_annualized_performance = buy_and_hold_performance / len(btc_data) * 365
    buy_and_hold_calmar_ratio = buy_and_hold_annualized_performance / buy_and_hold_max_drawdown
    return buy_and_hold_performance, buy_and_hold_max_drawdown, buy_and_hold_annualized_performance, buy_and_hold_calmar_ratio


# Buy and hold strategy
buy_and_hold_performance, buy_and_hold_max_drawdown, buy_and_hold_annualized_performance, buy_and_hold_calmar_ratio = calculate_buy_and_hold_stats(in_sample_data)
logging.info("=== The Buy and Hold Problem under Scrutinity ===")
logging.info(f"Buy and Hold Performance: {buy_and_hold_performance * 100:.2f}%")
logging.info(f"Annualized Buy and Hold Performance: {buy_and_hold_annualized_performance * 100:.2f}%")


# Modify simulate_trading to accept MACD parameters
from joblib import Parallel, delayed

# Define parameter ranges
short_periods = range(10, 60)  # Example range
long_periods = range(40, 200)  # Example range
signal_periods = range(2, 10)  # Example range

# Prepare data for parallel processing
tasks = [(in_sample_data.copy(), sp, lp, sig) for sp in short_periods for lp in long_periods for sig in signal_periods if sp < lp]

# Execute in parallel and measure the time it takes
import time

start = time.time()
results = Parallel(n_jobs=-1)(delayed(simulate_trading_with_params)(data, sp, lp, sig) for data, sp, lp, sig in tasks)
stop = time.time()
logging.info(f"Parallel execution took {stop - start:.2f} seconds")

# Convert results to DataFrame for easier analysis
results_df = pd.DataFrame(results)


# Set the option to display all columns (None means no limit)
pd.set_option('display.max_columns', None)
# Set the display width to None to make pandas attempt to use maximum width
pd.set_option('display.width', None)

sorted_by_apr_results = results_df.copy()
# Add the ratio of annualized performance to the buy and hold annualized performance
sorted_by_apr_results['Annualized Performance Ratio'] = results_df['Annualized Performance'] / buy_and_hold_annualized_performance
sorted_by_apr_results = sorted_by_apr_results.sort_values(by='Annualized Performance Ratio', ascending=False)

# Percentile of the annualized performance ratio better than 1
better_than_buy_and_hold = (sorted_by_apr_results['Annualized Performance Ratio'] > 1).mean() * 100
logging.info(f"MACD strategies have a better than buy and hold annualized performance in the {better_than_buy_and_hold:.2f}th percentile.")
logging.info("it is not supported by the Efficient Market Hypothesis (EMH) that the MACD strategy outperforms the buy and hold strategy so consistently.")
	import pandas as pd
	import logging

	# Configure logging
	logging.basicConfig(level=logging.INFO,
	format='%(asctime)s - %(levelname)s - %(message)s',
	handlers=[logging.FileHandler('protocol.log'), logging.StreamHandler()])

	# Load the data from disk
	in_sample_data = pd.read_csv('in_sample_data.csv', index_col='date', parse_dates=True)
	logging.info(in_sample_data.head())

	# Calculate MACD
	INITIAL_BALANCE = 10000


	def calculate_macd(data, short_period=38, long_period=43, signal_period=3):
	short_ema = data['price'].ewm(span=short_period, adjust=False).mean()
	long_ema = data['price'].ewm(span=long_period, adjust=False).mean()
	data['MACD'] = short_ema - long_ema
	data['Signal_Line'] = data['MACD'].ewm(span=signal_period, adjust=False).mean()


	def simulate_trading_with_fee(data, initial_balance=INITIAL_BALANCE, trading_fee_percent=0.26):
	balance = initial_balance
	peak_balance = initial_balance # Track the highest balance achieved
	max_drawdown = 0 # Maximum percentage drawdown from the peak
	position = 0 # Amount of cryptocurrency held

	for i in range(1, len(data)):
	if data['MACD'].iloc[i] > data['Signal_Line'].iloc[i] and position == 0: # Buy signal

	# Update position to account for the fee
	position = balance / (data['price'].iloc[i] * (1 + trading_fee_percent / 100))
	balance = 0 # All balance is used to buy, considering the fee
	elif data['MACD'].iloc[i] < data['Signal_Line'].iloc[i] and position > 0: # Sell signal
	# Calculate the revenue after subtracting the trading fee
	revenue = position * data['price'].iloc[i] * (1 - trading_fee_percent / 100)
	balance = revenue
	position = 0 # Position is sold

	# Update peak balance and drawdown if necessary
	if balance > peak_balance:
	peak_balance = balance
	else:
	drawdown = (peak_balance - balance) / peak_balance
	max_drawdown = max(max_drawdown, drawdown)

	# Calculate the final balance if any position is left unsold
	if position > 0:
	balance = position * data['price'].iloc[-1] * (1 - trading_fee_percent / 100)

	# Final update for peak balance and drawdown
	if balance > peak_balance:
	peak_balance = balance
	else:
	drawdown = (peak_balance - balance) / peak_balance
	max_drawdown = max(max_drawdown, drawdown)

	return balance, max_drawdown

	# Calculate maximum drawdown
	def calculate_max_drawdown(data):
	rolling_max = data['price'].cummax()
	daily_drawdown = data['price'] / rolling_max - 1.0
	max_drawdown = daily_drawdown.cummin().iloc[-1]
	assert max_drawdown <= 0, 'Maximum drawdown should be negative'
	return max_drawdown * -1 # Return as positive number as expected by calmar ratio

	def simulate_trading_with_params(data, short_period, long_period, signal_period, initial_balance=10000):
	"""
	Simulates trading with given MACD parameters and calculates performance and max drawdown.

	Parameters:
	data (pd.DataFrame): The market data.
	short_period (int): The short period for the MACD calculation.
	long_period (int): The long period for the MACD calculation.
	signal_period (int): The signal period for the MACD calculation.
	initial_balance (float): The initial trading balance.

	Returns:
	tuple: A tuple containing the performance percentage, max drawdown percentage, and final returns.
	"""
	calculate_macd(data, short_period, long_period, signal_period)
	final_balance, max_drawdown = simulate_trading_with_fee(data)
	returns = (final_balance - initial_balance)
	performance = (returns / initial_balance)
	annualized_performance = performance / len(data) * 365

	# calculate the Calmar ratio
	calmar_ratio = annualized_performance / max_drawdown
	return {
	'Short Period': short_period,
	'Long Period': long_period,
	'Signal Period': signal_period,
	# 'Performance': performance,
	'Max Drawdown': max_drawdown,
	# 'Returns': returns,
	'Annualized Performance': annualized_performance,
	'Calmar Ratio': calmar_ratio,
	}

	def calculate_buy_and_hold_stats(btc_data):
	buy_and_hold_final = (btc_data['price'].iloc[-1] / btc_data['price'].iloc[0]) * INITIAL_BALANCE
	buy_and_hold_performance = ((buy_and_hold_final - INITIAL_BALANCE) / INITIAL_BALANCE)
	buy_and_hold_max_drawdown = calculate_max_drawdown(btc_data)
	buy_and_hold_annualized_performance = buy_and_hold_performance / len(btc_data) * 365
	buy_and_hold_calmar_ratio = buy_and_hold_annualized_performance / buy_and_hold_max_drawdown
	return buy_and_hold_performance, buy_and_hold_max_drawdown, buy_and_hold_annualized_performance, buy_and_hold_calmar_ratio




	# Buy and hold strategy
	buy_and_hold_performance, buy_and_hold_max_drawdown, buy_and_hold_annualized_performance, buy_and_hold_calmar_ratio = calculate_buy_and_hold_stats(in_sample_data)
	logging.info("=== The Buy and Hold Problem under Scrutinity ===")
	logging.info(f"Buy and Hold Performance: {buy_and_hold_performance * 100:.2f}%")
	logging.info(f"Annualized Buy and Hold Performance: {buy_and_hold_annualized_performance * 100:.2f}%")


	# Modify simulate_trading to accept MACD parameters
	from joblib import Parallel, delayed

	# Define parameter ranges
	short_periods = range(10, 60) # Example range
	long_periods = range(40, 200) # Example range
	signal_periods = range(2, 10) # Example range

	# Prepare data for parallel processing
	tasks = [(in_sample_data.copy(), sp, lp, sig) for sp in short_periods for lp in long_periods for sig in signal_periods if sp < lp]

	# Execute in parallel and measure the time it takes
	import time

	start = time.time()
	results = Parallel(n_jobs=-1)(delayed(simulate_trading_with_params)(data, sp, lp, sig) for data, sp, lp, sig in tasks)
	stop = time.time()
	logging.info(f"Parallel execution took {stop - start:.2f} seconds")

	# Convert results to DataFrame for easier analysis
	results_df = pd.DataFrame(results)


	# Set the option to display all columns (None means no limit)
	pd.set_option('display.max_columns', None)
	# Set the display width to None to make pandas attempt to use maximum width
	pd.set_option('display.width', None)

	sorted_by_apr_results = results_df.copy()
	# Add the ratio of annualized performance to the buy and hold annualized performance
	sorted_by_apr_results['Annualized Performance Ratio'] = results_df['Annualized Performance'] / buy_and_hold_annualized_performance
	sorted_by_apr_results = sorted_by_apr_results.sort_values(by='Annualized Performance Ratio', ascending=False)

	# Percentile of the annualized performance ratio better than 1
	better_than_buy_and_hold = (sorted_by_apr_results['Annualized Performance Ratio'] > 1).mean() * 100
	logging.info(f"MACD strategies have a better than buy and hold annualized performance in the {better_than_buy_and_hold:.2f}th percentile.")
	logging.info("it is not supported by the Efficient Market Hypothesis (EMH) that the MACD strategy outperforms the buy and hold strategy so consistently.")