Skip to content

Instantly share code, notes, and snippets.

@databento-bot
Created August 18, 2023 09:56
Show Gist options
  • Save databento-bot/5f80501873d80e7acba9c78726d4bfe1 to your computer and use it in GitHub Desktop.
Save databento-bot/5f80501873d80e7acba9c78726d4bfe1 to your computer and use it in GitHub Desktop.
import databento as db
import pandas as pd
client = db.Historical()
def rank_by_volume(top=500):
"""
Returns instrument IDs of instruments that traded most, in descending rank
"""
data = client.timeseries.get_range(
dataset='GLBX.MDP3',
symbols='ALL_SYMBOLS',
schema='ohlcv-1d',
start='2023-08-15',
end='2023-08-15'
)
df = data.to_df()
return df.sort_values(by='volume', ascending=False).instrument_id.tolist()[:top]
def get_symbol_mappings(instrument_id_list):
"""
Fetch raw symbols and tick sizes of specified instrument IDs
"""
data = client.timeseries.get_range(
dataset='GLBX.MDP3',
stype_in='instrument_id',
symbols=instrument_id_list,
schema='definition',
start='2023-08-15',
end='2023-08-15'
)
return data.to_df()[['instrument_id', 'raw_symbol', 'min_price_increment']]
def get_median_spread(instrument_id_list, metadata):
"""
Get median spread of specified instrument IDs, in descending rank
"""
data = client.timeseries.get_range(
dataset='GLBX.MDP3',
stype_in='instrument_id',
symbols=instrument_id_list,
schema='tbbo',
start='2023-08-15T15:00',
end='2023-08-15T16:00'
)
df = data.to_df().merge(metadata, on='instrument_id', how='inner')
df.dropna(subset=['ask_px_00', 'bid_px_00', 'ask_sz_00', 'bid_sz_00', 'min_price_increment'], inplace=True)
# Compute median spread at times of execution
spread_ranking = df.groupby(by='raw_symbol').apply(lambda r: ((r.ask_px_00 -
r.bid_px_00)/r.min_price_increment).median()).sort_values().apply(round)
touch_ranking = df.groupby(by='raw_symbol').apply(lambda r: ((r.ask_sz_00 +
r.bid_sz_00)/2).median()).apply(lambda x: round(x,1))
ranking = pd.concat([spread_ranking, touch_ranking], axis=1)
ranking.columns = ['median_spread', 'median_touch']
return ranking
volume_ranking = rank_by_volume(500)
metadata = get_symbol_mappings(volume_ranking)
spread_ranking = get_median_spread(volume_ranking,
metadata)
spread_ranking.to_csv('ranking.csv')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment