databento-bot/markouts.py

## markouts.py
import databento as db
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd


DATE = pd.Timestamp(year=2023, month=6, day=22, tz='US/Eastern')
NUM_TIME_SAMPLES = 1000
SYMBOL = 'NVDA'
WINDOW_LIMITS_US = 120 * 1e6


# Compute markouts
def markout(mbp_data, events, offsets, aggressor=False):
    results = np.empty(
        shape=(len(events), len(offsets), 1),
    )
    df = mbp_data.groupby(level='ts_event').last()

    for i, (ts_event, event) in enumerate(events.iterrows()):
        markouts = df['midprice'].reindex(offsets+ts_event, method='ffill') - event['price']

        if not aggressor and event['side'] == 'B':
            # Passive sell
            markouts *= -1
        elif aggressor and event['side'] == 'A':
            markouts *= -1

        markouts_in_mils = markouts * 1e4
        results[i] = markouts_in_mils.values.reshape(-1,1)

    # Transpose and calculate the median, returning a single array of size (2n - 1)
    return pd.DataFrame(
        data=np.nanmean(results, 0),
        index=offsets.total_seconds() * 1e6,
    )


def plot_markouts(events, lt_thresholds=[100], gt_thresholds=[100,200], aggressor=True):

    samples = np.geomspace(1e-3, WINDOW_LIMITS_US, num=NUM_TIME_SAMPLES)
    offsets = pd.to_timedelta(
        np.append(-np.flip(samples), np.append(0, samples)),
        unit="microseconds",
    )

    # Use linear time steps instead
    #samples = np.linspace(-WINDOW_LIMITS_US, WINDOW_LIMITS_US, num=NUM_TIME_SAMPLES)
    #offsets = pd.to_timedelta(samples, unit="microseconds")

    for threshold in lt_thresholds:
        # Get trade events above size threshold
        events_qual = events[events["size"] < threshold]

        # Calculate markouts
        plt.plot(
            markout(df, events_qual, offsets, aggressor=aggressor),
            drawstyle='steps-post',
            label=f"order size < {threshold:d} (n={len(events_qual):,d})",
        )

    for threshold in gt_thresholds:
        # Get trade events above size threshold
        events_qual = events[events["size"] >= threshold]

        # Calculate markouts
        plt.plot(
            markout(df, events_qual, offsets, aggressor=aggressor),
            drawstyle='steps-post',
            label=f"order size >= {threshold:d} (n={len(events_qual):,d})",
        )

    # Annotations
    plt.title(f"Databento/XNAS.ITCH - {SYMBOL} {'trades' if aggressor else 'fills'} - {DATE.date()}")
    plt.xlabel("Time since event (μs)")
    plt.ylabel("PnL/share (mils)")
    plt.xscale("symlog")
    plt.grid()
    plt.legend()
    plt.show()


if __name__ == '__main__':

    client = db.Historical()

    mbp_data = client.timeseries.get_range(
        dataset="XNAS.ITCH",
        schema="mbp-1",
        symbols=SYMBOL,
        start=DATE,
        end=DATE + pd.Timedelta(days=1),
    )

    df = mbp_data.to_df()
    df = df.dropna()
    df = df.set_index("ts_event")

    # Calculate L1 midprice
    df["midprice"] = df[["bid_px_00", "ask_px_00"]].mean(axis=1)

    # Identify trade actions
    trades = df[(df["action"] == "T") & (df["side"] != "N")]

    plot_markouts(trades, aggressor=False)

    # Since Nasdaq executions are reported on passive side
    trades.loc[:, 'cost'] = trades['price'].mul(trades['size'])
    agg_trades= trades.reset_index().groupby(['ts_event', 'action'], as_index=False).agg({'size': 'sum', 'side': 'first', 'cost':'sum'})
    agg_trades.loc[:, 'price'] = agg_trades['cost'].div(agg_trades['size'])
    agg_trades.set_index('ts_event', inplace=True)

    plot_markouts(agg_trades, aggressor=True)
	import databento as db
	import matplotlib.pyplot as plt
	import numpy as np
	import pandas as pd


	DATE = pd.Timestamp(year=2023, month=6, day=22, tz='US/Eastern')
	NUM_TIME_SAMPLES = 1000
	SYMBOL = 'NVDA'
	WINDOW_LIMITS_US = 120 * 1e6


	# Compute markouts
	def markout(mbp_data, events, offsets, aggressor=False):
	results = np.empty(
	shape=(len(events), len(offsets), 1),
	)
	df = mbp_data.groupby(level='ts_event').last()

	for i, (ts_event, event) in enumerate(events.iterrows()):
	markouts = df['midprice'].reindex(offsets+ts_event, method='ffill') - event['price']

	if not aggressor and event['side'] == 'B':
	# Passive sell
	markouts *= -1
	elif aggressor and event['side'] == 'A':
	markouts *= -1

	markouts_in_mils = markouts * 1e4
	results[i] = markouts_in_mils.values.reshape(-1,1)

	# Transpose and calculate the median, returning a single array of size (2n - 1)
	return pd.DataFrame(
	data=np.nanmean(results, 0),
	index=offsets.total_seconds() * 1e6,
	)


	def plot_markouts(events, lt_thresholds=[100], gt_thresholds=[100,200], aggressor=True):

	samples = np.geomspace(1e-3, WINDOW_LIMITS_US, num=NUM_TIME_SAMPLES)
	offsets = pd.to_timedelta(
	np.append(-np.flip(samples), np.append(0, samples)),
	unit="microseconds",
	)

	# Use linear time steps instead
	#samples = np.linspace(-WINDOW_LIMITS_US, WINDOW_LIMITS_US, num=NUM_TIME_SAMPLES)
	#offsets = pd.to_timedelta(samples, unit="microseconds")

	for threshold in lt_thresholds:
	# Get trade events above size threshold
	events_qual = events[events["size"] < threshold]

	# Calculate markouts
	plt.plot(
	markout(df, events_qual, offsets, aggressor=aggressor),
	drawstyle='steps-post',
	label=f"order size < {threshold:d} (n={len(events_qual):,d})",
	)

	for threshold in gt_thresholds:
	# Get trade events above size threshold
	events_qual = events[events["size"] >= threshold]

	# Calculate markouts
	plt.plot(
	markout(df, events_qual, offsets, aggressor=aggressor),
	drawstyle='steps-post',
	label=f"order size >= {threshold:d} (n={len(events_qual):,d})",
	)

	# Annotations
	plt.title(f"Databento/XNAS.ITCH - {SYMBOL} {'trades' if aggressor else 'fills'} - {DATE.date()}")
	plt.xlabel("Time since event (μs)")
	plt.ylabel("PnL/share (mils)")
	plt.xscale("symlog")
	plt.grid()
	plt.legend()
	plt.show()


	if __name__ == '__main__':

	client = db.Historical()

	mbp_data = client.timeseries.get_range(
	dataset="XNAS.ITCH",
	schema="mbp-1",
	symbols=SYMBOL,
	start=DATE,
	end=DATE + pd.Timedelta(days=1),
	)

	df = mbp_data.to_df()
	df = df.dropna()
	df = df.set_index("ts_event")

	# Calculate L1 midprice
	df["midprice"] = df[["bid_px_00", "ask_px_00"]].mean(axis=1)

	# Identify trade actions
	trades = df[(df["action"] == "T") & (df["side"] != "N")]

	plot_markouts(trades, aggressor=False)

	# Since Nasdaq executions are reported on passive side
	trades.loc[:, 'cost'] = trades['price'].mul(trades['size'])
	agg_trades= trades.reset_index().groupby(['ts_event', 'action'], as_index=False).agg({'size': 'sum', 'side': 'first', 'cost':'sum'})
	agg_trades.loc[:, 'price'] = agg_trades['cost'].div(agg_trades['size'])
	agg_trades.set_index('ts_event', inplace=True)

	plot_markouts(agg_trades, aggressor=True)