databento-bot/databento-extract-cme.py

## databento-extract-cme.py
#!/usr/bin/env python
#
# databento-extract-cme.py
#
# Fetch data of options on futures through Databento,
# using COMEX Gold (GC) and CBOT US 5-Year T-Note (ZF) and
# including weeklies, as an example

import databento as db
import itertools


# CME has several parent symbols for ZF and GC including weeklies
# We can expand them manually like this or use the `definition` schema
# to fetch them programmatically
SYMBOLS = ['ZF.FUT', 'GC.FUT', 'OZF.OPT']
EXPIRATION_WEEKS = ['1', '2', '3', '4', '5']
EXPIRATION_DAYS = ['M', 'T', 'W', 'R']
SYMBOLS += ['G{}{}.OPT'.format(*pair) for pair in itertools.product(EXPIRATION_WEEKS, EXPIRATION_DAYS)]
SYMBOLS += ['OG{}.OPT'.format(week) for week in [''] + EXPIRATION_WEEKS]
SYMBOLS += ['ZF{}.OPT'.format(week) for week in EXPIRATION_WEEKS]

TZ = 'US/Eastern'

client = db.Historical()


# `parent` symbology is a conveniencefeature on Databento that expands to all
# combinations, expirations, strikes, puts, and calls, having to specify each
# individual symbol specifically. This example includes trading activity for 9,912
# individual symbols after expanding all futures and options chains. If you prefer
# look up specific symbols, use `stype_in='raw_symbol'` (default) instead.
fetch = lambda schema, start, end: client.timeseries.get_range(
    dataset='GLBX.MDP3',
    schema=schema,
    stype_in='parent',
    symbols=SYMBOLS,
    start=start,
    end=end,
).to_df(tz=TZ)

# Fetch trades (i.e. last sale or tick data)
df = fetch('trades', '2024-02-12', '2024-02-13')
df.to_parquet('databento-zf-gc-2024-02-12.trades.parquet.gz', compression='gzip')
df.to_csv('databento-zf-gc-2024-02-12.trades.csv.gz', compression='gzip')

# Fetch all top-of-book changes and trades (i.e. L1 or MBP-1)
# For illustration, fetch only 20 minutes around the open since this is a significant
# amount of data. Batch download is recommendeded for larger amounts of data, see:
# https://databento.com/docs/examples/basics-historical/programmatic-batch-download?historical=python&live=python

df = fetch('mbp-1', '2024-02-12T14:20', '2024-02-12T14:40' )
df.to_parquet('databento-zf-gc-2024-02-12.mbp-1.parquet.gz', compression='gzip')
df.to_csv('databento-zf-gc-2024-02-12.mbp-1.csv.gz', compression='gzip')

# To read from file
# import pandaas as pd
# df = pd.read_parquet('databento-zf-gc-2024-02-12.trades.parquet.gz')
# df = pd.read_parquet('databento-zf-gc-2024-02-12.mbp-1.parquet.gz')
	#!/usr/bin/env python
	#
	# databento-extract-cme.py
	#
	# Fetch data of options on futures through Databento,
	# using COMEX Gold (GC) and CBOT US 5-Year T-Note (ZF) and
	# including weeklies, as an example

	import databento as db
	import itertools


	# CME has several parent symbols for ZF and GC including weeklies
	# We can expand them manually like this or use the `definition` schema
	# to fetch them programmatically
	SYMBOLS = ['ZF.FUT', 'GC.FUT', 'OZF.OPT']
	EXPIRATION_WEEKS = ['1', '2', '3', '4', '5']
	EXPIRATION_DAYS = ['M', 'T', 'W', 'R']
	SYMBOLS += ['G{}{}.OPT'.format(*pair) for pair in itertools.product(EXPIRATION_WEEKS, EXPIRATION_DAYS)]
	SYMBOLS += ['OG{}.OPT'.format(week) for week in [''] + EXPIRATION_WEEKS]
	SYMBOLS += ['ZF{}.OPT'.format(week) for week in EXPIRATION_WEEKS]

	TZ = 'US/Eastern'

	client = db.Historical()


	# `parent` symbology is a conveniencefeature on Databento that expands to all
	# combinations, expirations, strikes, puts, and calls, having to specify each
	# individual symbol specifically. This example includes trading activity for 9,912
	# individual symbols after expanding all futures and options chains. If you prefer
	# look up specific symbols, use `stype_in='raw_symbol'` (default) instead.
	fetch = lambda schema, start, end: client.timeseries.get_range(
	dataset='GLBX.MDP3',
	schema=schema,
	stype_in='parent',
	symbols=SYMBOLS,
	start=start,
	end=end,
	).to_df(tz=TZ)

	# Fetch trades (i.e. last sale or tick data)
	df = fetch('trades', '2024-02-12', '2024-02-13')
	df.to_parquet('databento-zf-gc-2024-02-12.trades.parquet.gz', compression='gzip')
	df.to_csv('databento-zf-gc-2024-02-12.trades.csv.gz', compression='gzip')

	# Fetch all top-of-book changes and trades (i.e. L1 or MBP-1)
	# For illustration, fetch only 20 minutes around the open since this is a significant
	# amount of data. Batch download is recommendeded for larger amounts of data, see:
	# https://databento.com/docs/examples/basics-historical/programmatic-batch-download?historical=python&live=python

	df = fetch('mbp-1', '2024-02-12T14:20', '2024-02-12T14:40' )
	df.to_parquet('databento-zf-gc-2024-02-12.mbp-1.parquet.gz', compression='gzip')
	df.to_csv('databento-zf-gc-2024-02-12.mbp-1.csv.gz', compression='gzip')

	# To read from file
	# import pandaas as pd
	# df = pd.read_parquet('databento-zf-gc-2024-02-12.trades.parquet.gz')
	# df = pd.read_parquet('databento-zf-gc-2024-02-12.mbp-1.parquet.gz')