Skip to content

Instantly share code, notes, and snippets.

@hsm207
Created November 14, 2020 08:08
Show Gist options
  • Save hsm207/db00a8102889dfce269c2d66a118071b to your computer and use it in GitHub Desktop.
Save hsm207/db00a8102889dfce269c2d66a118071b to your computer and use it in GitHub Desktop.
Zipline futures bug
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Linux 0a2e3f5e9596 4.19.128-microsoft-standard #1 SMP Tue Jun 23 12:58:10 UTC 2020 x86_64 GNU/Linux\n"
]
}
],
"source": [
"%%bash\n",
"uname --all"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Python 3.6.12\n"
]
}
],
"source": [
"%%bash\n",
"python --version"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"64\n"
]
}
],
"source": [
"%%bash\n",
"python -c 'import math, sys;print(int(math.log(sys.maxsize + 1, 2) + 1))'"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"alembic==1.4.3\n",
"appdirs==1.4.4\n",
"argon2-cffi==20.1.0\n",
"async-generator==1.10\n",
"attrs==20.2.0\n",
"backcall==0.2.0\n",
"bcolz==1.2.1\n",
"black==20.8b1\n",
"bleach==3.2.1\n",
"Bottleneck==1.3.2\n",
"cached-property==1.5.2\n",
"certifi==2020.6.20\n",
"cffi==1.14.3\n",
"chardet==3.0.4\n",
"click==7.1.2\n",
"cycler==0.10.0\n",
"dataclasses==0.7\n",
"decorator==4.4.2\n",
"defusedxml==0.6.0\n",
"empyrical==0.5.5\n",
"entrypoints==0.3\n",
"h5py==3.0.0\n",
"idna==2.10\n",
"importlib-metadata==2.0.0\n",
"inflection==0.5.1\n",
"intervaltree==3.1.0\n",
"ipykernel==5.3.4\n",
"ipython==7.16.1\n",
"ipython-genutils==0.2.0\n",
"ipywidgets==7.5.1\n",
"iso3166==1.0.1\n",
"iso4217==1.6.20180829\n",
"jedi==0.17.2\n",
"Jinja2==2.11.2\n",
"joblib==0.17.0\n",
"jsonschema==3.2.0\n",
"jupyter==1.0.0\n",
"jupyter-client==6.1.7\n",
"jupyter-console==6.2.0\n",
"jupyter-core==4.6.3\n",
"jupyterlab-pygments==0.1.2\n",
"kiwisolver==1.2.0\n",
"Logbook==1.5.3\n",
"lru-dict==1.1.6\n",
"lxml==4.6.1\n",
"Mako==1.1.3\n",
"MarkupSafe==1.1.1\n",
"matplotlib==3.3.2\n",
"mistune==0.8.4\n",
"more-itertools==8.6.0\n",
"multipledispatch==0.6.0\n",
"mypy-extensions==0.4.3\n",
"nb-black==1.0.7\n",
"nbclient==0.5.1\n",
"nbconvert==6.0.7\n",
"nbformat==5.0.8\n",
"nest-asyncio==1.4.2\n",
"networkx==1.11\n",
"notebook==6.1.4\n",
"numexpr==2.7.1\n",
"numpy==1.19.3\n",
"packaging==20.4\n",
"pandas==0.22.0\n",
"pandas-datareader==0.8.1\n",
"pandocfilters==1.4.3\n",
"parso==0.7.1\n",
"pathspec==0.8.1\n",
"patsy==0.5.1\n",
"pexpect==4.8.0\n",
"pickleshare==0.7.5\n",
"Pillow==8.0.1\n",
"prometheus-client==0.8.0\n",
"prompt-toolkit==3.0.8\n",
"ptyprocess==0.6.0\n",
"pycparser==2.20\n",
"pyfolio==0.9.2\n",
"Pygments==2.7.2\n",
"pyparsing==2.4.7\n",
"pyrsistent==0.17.3\n",
"python-dateutil==2.8.1\n",
"python-editor==1.0.4\n",
"python-interface==1.6.0\n",
"pytz==2020.1\n",
"pyzmq==19.0.2\n",
"qtconsole==4.7.7\n",
"QtPy==1.9.0\n",
"Quandl==3.5.3\n",
"regex==2020.10.28\n",
"requests==2.24.0\n",
"rope==0.18.0\n",
"scikit-learn==0.23.2\n",
"scipy==1.5.3\n",
"seaborn==0.11.0\n",
"Send2Trash==1.5.0\n",
"six==1.15.0\n",
"sortedcontainers==2.2.2\n",
"SQLAlchemy==1.3.20\n",
"statsmodels==0.12.1\n",
"TA-Lib==0.4.19\n",
"tables==3.6.1\n",
"terminado==0.9.1\n",
"testpath==0.4.4\n",
"threadpoolctl==2.1.0\n",
"toml==0.10.2\n",
"toolz==0.11.1\n",
"tornado==6.1\n",
"tqdm==4.51.0\n",
"trading-calendars==2.0.0\n",
"traitlets==4.3.3\n",
"typed-ast==1.4.1\n",
"typing-extensions==3.7.4.3\n",
"urllib3==1.25.11\n",
"wcwidth==0.2.5\n",
"webencodings==0.5.1\n",
"widgetsnbextension==3.5.1\n",
"# Editable Git install with no remote (zipline==0+unknown)\n",
"-e /zipline\n",
"zipp==3.4.0\n"
]
}
],
"source": [
"%%bash\n",
"\n",
"pip freeze"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"root\n"
]
}
],
"source": [
"%%bash\n",
"whoami"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"application/javascript": [
"\n",
" setTimeout(function() {\n",
" var nbb_cell_id = 2;\n",
" var nbb_unformatted_code = \"import requests\\nimport zipfile\";\n",
" var nbb_formatted_code = \"import requests\\nimport zipfile\";\n",
" var nbb_cells = Jupyter.notebook.get_cells();\n",
" for (var i = 0; i < nbb_cells.length; ++i) {\n",
" if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n",
" if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n",
" nbb_cells[i].set_text(nbb_formatted_code);\n",
" }\n",
" break;\n",
" }\n",
" }\n",
" }, 500);\n",
" "
],
"text/plain": [
"<IPython.core.display.Javascript object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import requests\n",
"import zipfile"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"application/javascript": [
"\n",
" setTimeout(function() {\n",
" var nbb_cell_id = 3;\n",
" var nbb_unformatted_code = \"DATA_FOLDER = \\\"/tmp\\\"\";\n",
" var nbb_formatted_code = \"DATA_FOLDER = \\\"/tmp\\\"\";\n",
" var nbb_cells = Jupyter.notebook.get_cells();\n",
" for (var i = 0; i < nbb_cells.length; ++i) {\n",
" if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n",
" if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n",
" nbb_cells[i].set_text(nbb_formatted_code);\n",
" }\n",
" break;\n",
" }\n",
" }\n",
" }, 500);\n",
" "
],
"text/plain": [
"<IPython.core.display.Javascript object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"DATA_FOLDER = \"/tmp\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Download the random futures data from Andreas Clenow's [website](https://www.followingthetrend.com/trading-evolved/):"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"application/javascript": [
"\n",
" setTimeout(function() {\n",
" var nbb_cell_id = 4;\n",
" var nbb_unformatted_code = \"data_url = \\\"https://uc7b8dc0f821c28a93ae6ad0d79a.dl.dropboxusercontent.com/cd/0/get/BDIKliP7oB4WIfgat2JgeOkV93-7bU-4VIAPMehjy9y97JcTe82UPS4L_uFWnwCScUaQ4UubT9WuGEyRAYBHNUokD3PZL9HrR6WyMI3DacV1V10uQzL8TtLAN0OSLcs_7Xc/file\\\"\";\n",
" var nbb_formatted_code = \"data_url = \\\"https://uc7b8dc0f821c28a93ae6ad0d79a.dl.dropboxusercontent.com/cd/0/get/BDIKliP7oB4WIfgat2JgeOkV93-7bU-4VIAPMehjy9y97JcTe82UPS4L_uFWnwCScUaQ4UubT9WuGEyRAYBHNUokD3PZL9HrR6WyMI3DacV1V10uQzL8TtLAN0OSLcs_7Xc/file\\\"\";\n",
" var nbb_cells = Jupyter.notebook.get_cells();\n",
" for (var i = 0; i < nbb_cells.length; ++i) {\n",
" if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n",
" if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n",
" nbb_cells[i].set_text(nbb_formatted_code);\n",
" }\n",
" break;\n",
" }\n",
" }\n",
" }, 500);\n",
" "
],
"text/plain": [
"<IPython.core.display.Javascript object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"data_url = \"https://uc7b8dc0f821c28a93ae6ad0d79a.dl.dropboxusercontent.com/cd/0/get/BDIKliP7oB4WIfgat2JgeOkV93-7bU-4VIAPMehjy9y97JcTe82UPS4L_uFWnwCScUaQ4UubT9WuGEyRAYBHNUokD3PZL9HrR6WyMI3DacV1V10uQzL8TtLAN0OSLcs_7Xc/file\""
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"application/javascript": [
"\n",
" setTimeout(function() {\n",
" var nbb_cell_id = 5;\n",
" var nbb_unformatted_code = \"r = requests.get(data_url)\\nassert r.ok\";\n",
" var nbb_formatted_code = \"r = requests.get(data_url)\\nassert r.ok\";\n",
" var nbb_cells = Jupyter.notebook.get_cells();\n",
" for (var i = 0; i < nbb_cells.length; ++i) {\n",
" if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n",
" if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n",
" nbb_cells[i].set_text(nbb_formatted_code);\n",
" }\n",
" break;\n",
" }\n",
" }\n",
" }, 500);\n",
" "
],
"text/plain": [
"<IPython.core.display.Javascript object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"r = requests.get(data_url)\n",
"assert r.ok"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"application/javascript": [
"\n",
" setTimeout(function() {\n",
" var nbb_cell_id = 6;\n",
" var nbb_unformatted_code = \"with open(f\\\"{DATA_FOLDER}/data.zip\\\", \\\"wb\\\") as f:\\n f.write(r.content)\\n\\nwith zipfile.ZipFile(f\\\"{DATA_FOLDER}/data.zip\\\", \\\"r\\\") as zip_ref:\\n zip_ref.extractall(DATA_FOLDER)\";\n",
" var nbb_formatted_code = \"with open(f\\\"{DATA_FOLDER}/data.zip\\\", \\\"wb\\\") as f:\\n f.write(r.content)\\n\\nwith zipfile.ZipFile(f\\\"{DATA_FOLDER}/data.zip\\\", \\\"r\\\") as zip_ref:\\n zip_ref.extractall(DATA_FOLDER)\";\n",
" var nbb_cells = Jupyter.notebook.get_cells();\n",
" for (var i = 0; i < nbb_cells.length; ++i) {\n",
" if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n",
" if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n",
" nbb_cells[i].set_text(nbb_formatted_code);\n",
" }\n",
" break;\n",
" }\n",
" }\n",
" }, 500);\n",
" "
],
"text/plain": [
"<IPython.core.display.Javascript object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"with open(f\"{DATA_FOLDER}/data.zip\", \"wb\") as f:\n",
" f.write(r.content)\n",
"\n",
"with zipfile.ZipFile(f\"{DATA_FOLDER}/data.zip\", \"r\") as zip_ref:\n",
" zip_ref.extractall(DATA_FOLDER)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Script to register the random futures bundle:"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Overwriting /root/.zipline/random_futures_data.py\n"
]
},
{
"data": {
"application/javascript": [
"\n",
" setTimeout(function() {\n",
" var nbb_cell_id = 7;\n",
" var nbb_unformatted_code = \"%%writefile ~/.zipline/random_futures_data.py\\n\\nimport pandas as pd\\nfrom os import listdir\\nfrom tqdm import tqdm # Used for progress bar\\n\\n# Change the path to where you have your data\\nbase_path = \\\"/tmp/data/\\\"\\ndata_path = base_path + \\\"random_futures/\\\"\\nmeta_path = \\\"futures_meta/meta.csv\\\"\\nfutures_lookup = pd.read_csv(base_path + meta_path, index_col=0)\\n\\n\\\"\\\"\\\"\\nThe ingest function needs to have this exact signature,\\nmeaning these arguments passed, as shown below.\\n\\\"\\\"\\\"\\n\\n\\ndef random_futures_data(\\n environ,\\n asset_db_writer,\\n minute_bar_writer,\\n daily_bar_writer,\\n adjustment_writer,\\n calendar,\\n start_session,\\n end_session,\\n cache,\\n show_progress,\\n output_dir,\\n):\\n\\n # Get list of files from path\\n # Slicing off the last part\\n # 'example.csv'[:-4] = 'example'\\n symbols = [f[:-4] for f in listdir(data_path)]\\n\\n if not symbols:\\n raise ValueError(\\\"No symbols found in folder.\\\")\\n\\n # Prepare an empty DataFrame for dividends\\n divs = pd.DataFrame(\\n columns=[\\\"sid\\\", \\\"amount\\\", \\\"ex_date\\\", \\\"record_date\\\", \\\"declared_date\\\", \\\"pay_date\\\"]\\n )\\n\\n # Prepare an empty DataFrame for splits\\n splits = pd.DataFrame(columns=[\\\"sid\\\", \\\"ratio\\\", \\\"effective_date\\\"])\\n\\n # Prepare an empty DataFrame for metadata\\n metadata = pd.DataFrame(\\n columns=(\\n \\\"start_date\\\",\\n \\\"end_date\\\",\\n \\\"auto_close_date\\\",\\n \\\"symbol\\\",\\n \\\"root_symbol\\\",\\n \\\"expiration_date\\\",\\n \\\"notice_date\\\",\\n \\\"tick_size\\\",\\n \\\"exchange\\\",\\n )\\n )\\n\\n # Check valid trading dates, according to the selected exchange calendar\\n sessions = calendar.sessions_in_range(start_session, end_session)\\n\\n # Get data for all stocks and write to Zipline\\n daily_bar_writer.write(process_futures(symbols, sessions, metadata))\\n\\n adjustment_writer.write(splits=splits, dividends=divs)\\n\\n # Prepare root level metadata\\n root_symbols = futures_lookup.copy()\\n root_symbols[\\\"root_symbol_id\\\"] = root_symbols.index.values\\n del root_symbols[\\\"minor_fx_adj\\\"]\\n\\n # write the meta data\\n asset_db_writer.write(futures=metadata, root_symbols=root_symbols)\\n\\n\\ndef process_futures(symbols, sessions, metadata):\\n # Loop the stocks, setting a unique Security ID (SID)\\n sid = 0\\n\\n # Loop the symbols with progress bar, using tqdm\\n for symbol in tqdm(symbols, desc=\\\"Loading data...\\\"):\\n sid += 1\\n\\n # Read the stock data from csv file.\\n df = pd.read_csv(\\n \\\"{}/{}.csv\\\".format(data_path, symbol), index_col=[0], parse_dates=[0]\\n )\\n\\n # Check for minor currency quotes\\n adjustment_factor = futures_lookup.loc[\\n futures_lookup[\\\"root_symbol\\\"] == df.iloc[0][\\\"root_symbol\\\"]\\n ][\\\"minor_fx_adj\\\"].iloc[0]\\n\\n df[\\\"open\\\"] *= adjustment_factor\\n df[\\\"high\\\"] *= adjustment_factor\\n df[\\\"low\\\"] *= adjustment_factor\\n df[\\\"close\\\"] *= adjustment_factor\\n\\n # Avoid potential high / low data errors in data set\\n # And apply minor currency adjustment for USc quotes\\n df[\\\"high\\\"] = df[[\\\"high\\\", \\\"close\\\"]].max(axis=1)\\n df[\\\"low\\\"] = df[[\\\"low\\\", \\\"close\\\"]].min(axis=1)\\n df[\\\"high\\\"] = df[[\\\"high\\\", \\\"open\\\"]].max(axis=1)\\n df[\\\"low\\\"] = df[[\\\"low\\\", \\\"open\\\"]].min(axis=1)\\n\\n # Synch to the official exchange calendar\\n df = df.reindex(sessions.tz_localize(None))[df.index[0] : df.index[-1]]\\n\\n # Forward fill missing data\\n df.fillna(method=\\\"ffill\\\", inplace=True)\\n\\n # Drop remaining NaN\\n df.dropna(inplace=True)\\n\\n # Cut dates before 2000, avoiding Zipline issue\\n df = df[\\\"2000-01-01\\\":]\\n\\n # Prepare contract metadata\\n make_meta(sid, metadata, df, sessions)\\n\\n del df[\\\"openinterest\\\"]\\n del df[\\\"expiration_date\\\"]\\n del df[\\\"root_symbol\\\"]\\n del df[\\\"symbol\\\"]\\n\\n yield sid, df\\n\\n\\ndef make_meta(sid, metadata, df, sessions):\\n # Check first and last date.\\n start_date = df.index[0]\\n end_date = df.index[-1]\\n\\n # The auto_close date is the day after the last trade.\\n ac_date = end_date + pd.Timedelta(days=1)\\n\\n symbol = df.iloc[0][\\\"symbol\\\"]\\n root_sym = df.iloc[0][\\\"root_symbol\\\"]\\n exchng = futures_lookup.loc[futures_lookup[\\\"root_symbol\\\"] == root_sym][\\n \\\"exchange\\\"\\n ].iloc[0]\\n exp_date = end_date\\n\\n # Add notice day if you have.\\n # Tip to improve: Set notice date to one month prior to\\n # expiry for commodity markets.\\n notice_date = ac_date\\n tick_size = 0.0001 # Placeholder\\n\\n # Add a row to the metadata DataFrame.\\n metadata.loc[sid] = (\\n start_date,\\n end_date,\\n ac_date,\\n symbol,\\n root_sym,\\n exp_date,\\n notice_date,\\n tick_size,\\n exchng,\\n )\";\n",
" var nbb_formatted_code = \"%%writefile ~/.zipline/random_futures_data.py\\n\\nimport pandas as pd\\nfrom os import listdir\\nfrom tqdm import tqdm # Used for progress bar\\n\\n# Change the path to where you have your data\\nbase_path = \\\"/tmp/data/\\\"\\ndata_path = base_path + \\\"random_futures/\\\"\\nmeta_path = \\\"futures_meta/meta.csv\\\"\\nfutures_lookup = pd.read_csv(base_path + meta_path, index_col=0)\\n\\n\\\"\\\"\\\"\\nThe ingest function needs to have this exact signature,\\nmeaning these arguments passed, as shown below.\\n\\\"\\\"\\\"\\n\\n\\ndef random_futures_data(\\n environ,\\n asset_db_writer,\\n minute_bar_writer,\\n daily_bar_writer,\\n adjustment_writer,\\n calendar,\\n start_session,\\n end_session,\\n cache,\\n show_progress,\\n output_dir,\\n):\\n\\n # Get list of files from path\\n # Slicing off the last part\\n # 'example.csv'[:-4] = 'example'\\n symbols = [f[:-4] for f in listdir(data_path)]\\n\\n if not symbols:\\n raise ValueError(\\\"No symbols found in folder.\\\")\\n\\n # Prepare an empty DataFrame for dividends\\n divs = pd.DataFrame(\\n columns=[\\\"sid\\\", \\\"amount\\\", \\\"ex_date\\\", \\\"record_date\\\", \\\"declared_date\\\", \\\"pay_date\\\"]\\n )\\n\\n # Prepare an empty DataFrame for splits\\n splits = pd.DataFrame(columns=[\\\"sid\\\", \\\"ratio\\\", \\\"effective_date\\\"])\\n\\n # Prepare an empty DataFrame for metadata\\n metadata = pd.DataFrame(\\n columns=(\\n \\\"start_date\\\",\\n \\\"end_date\\\",\\n \\\"auto_close_date\\\",\\n \\\"symbol\\\",\\n \\\"root_symbol\\\",\\n \\\"expiration_date\\\",\\n \\\"notice_date\\\",\\n \\\"tick_size\\\",\\n \\\"exchange\\\",\\n )\\n )\\n\\n # Check valid trading dates, according to the selected exchange calendar\\n sessions = calendar.sessions_in_range(start_session, end_session)\\n\\n # Get data for all stocks and write to Zipline\\n daily_bar_writer.write(process_futures(symbols, sessions, metadata))\\n\\n adjustment_writer.write(splits=splits, dividends=divs)\\n\\n # Prepare root level metadata\\n root_symbols = futures_lookup.copy()\\n root_symbols[\\\"root_symbol_id\\\"] = root_symbols.index.values\\n del root_symbols[\\\"minor_fx_adj\\\"]\\n\\n # write the meta data\\n asset_db_writer.write(futures=metadata, root_symbols=root_symbols)\\n\\n\\ndef process_futures(symbols, sessions, metadata):\\n # Loop the stocks, setting a unique Security ID (SID)\\n sid = 0\\n\\n # Loop the symbols with progress bar, using tqdm\\n for symbol in tqdm(symbols, desc=\\\"Loading data...\\\"):\\n sid += 1\\n\\n # Read the stock data from csv file.\\n df = pd.read_csv(\\n \\\"{}/{}.csv\\\".format(data_path, symbol), index_col=[0], parse_dates=[0]\\n )\\n\\n # Check for minor currency quotes\\n adjustment_factor = futures_lookup.loc[\\n futures_lookup[\\\"root_symbol\\\"] == df.iloc[0][\\\"root_symbol\\\"]\\n ][\\\"minor_fx_adj\\\"].iloc[0]\\n\\n df[\\\"open\\\"] *= adjustment_factor\\n df[\\\"high\\\"] *= adjustment_factor\\n df[\\\"low\\\"] *= adjustment_factor\\n df[\\\"close\\\"] *= adjustment_factor\\n\\n # Avoid potential high / low data errors in data set\\n # And apply minor currency adjustment for USc quotes\\n df[\\\"high\\\"] = df[[\\\"high\\\", \\\"close\\\"]].max(axis=1)\\n df[\\\"low\\\"] = df[[\\\"low\\\", \\\"close\\\"]].min(axis=1)\\n df[\\\"high\\\"] = df[[\\\"high\\\", \\\"open\\\"]].max(axis=1)\\n df[\\\"low\\\"] = df[[\\\"low\\\", \\\"open\\\"]].min(axis=1)\\n\\n # Synch to the official exchange calendar\\n df = df.reindex(sessions.tz_localize(None))[df.index[0] : df.index[-1]]\\n\\n # Forward fill missing data\\n df.fillna(method=\\\"ffill\\\", inplace=True)\\n\\n # Drop remaining NaN\\n df.dropna(inplace=True)\\n\\n # Cut dates before 2000, avoiding Zipline issue\\n df = df[\\\"2000-01-01\\\":]\\n\\n # Prepare contract metadata\\n make_meta(sid, metadata, df, sessions)\\n\\n del df[\\\"openinterest\\\"]\\n del df[\\\"expiration_date\\\"]\\n del df[\\\"root_symbol\\\"]\\n del df[\\\"symbol\\\"]\\n\\n yield sid, df\\n\\n\\ndef make_meta(sid, metadata, df, sessions):\\n # Check first and last date.\\n start_date = df.index[0]\\n end_date = df.index[-1]\\n\\n # The auto_close date is the day after the last trade.\\n ac_date = end_date + pd.Timedelta(days=1)\\n\\n symbol = df.iloc[0][\\\"symbol\\\"]\\n root_sym = df.iloc[0][\\\"root_symbol\\\"]\\n exchng = futures_lookup.loc[futures_lookup[\\\"root_symbol\\\"] == root_sym][\\n \\\"exchange\\\"\\n ].iloc[0]\\n exp_date = end_date\\n\\n # Add notice day if you have.\\n # Tip to improve: Set notice date to one month prior to\\n # expiry for commodity markets.\\n notice_date = ac_date\\n tick_size = 0.0001 # Placeholder\\n\\n # Add a row to the metadata DataFrame.\\n metadata.loc[sid] = (\\n start_date,\\n end_date,\\n ac_date,\\n symbol,\\n root_sym,\\n exp_date,\\n notice_date,\\n tick_size,\\n exchng,\\n )\";\n",
" var nbb_cells = Jupyter.notebook.get_cells();\n",
" for (var i = 0; i < nbb_cells.length; ++i) {\n",
" if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n",
" if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n",
" nbb_cells[i].set_text(nbb_formatted_code);\n",
" }\n",
" break;\n",
" }\n",
" }\n",
" }, 500);\n",
" "
],
"text/plain": [
"<IPython.core.display.Javascript object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"%%writefile ~/.zipline/random_futures_data.py\n",
"\n",
"import pandas as pd\n",
"from os import listdir\n",
"from tqdm import tqdm # Used for progress bar\n",
"\n",
"# Change the path to where you have your data\n",
"base_path = \"/tmp/data/\"\n",
"data_path = base_path + \"random_futures/\"\n",
"meta_path = \"futures_meta/meta.csv\"\n",
"futures_lookup = pd.read_csv(base_path + meta_path, index_col=0)\n",
"\n",
"\"\"\"\n",
"The ingest function needs to have this exact signature,\n",
"meaning these arguments passed, as shown below.\n",
"\"\"\"\n",
"\n",
"\n",
"def random_futures_data(\n",
" environ,\n",
" asset_db_writer,\n",
" minute_bar_writer,\n",
" daily_bar_writer,\n",
" adjustment_writer,\n",
" calendar,\n",
" start_session,\n",
" end_session,\n",
" cache,\n",
" show_progress,\n",
" output_dir,\n",
"):\n",
"\n",
" # Get list of files from path\n",
" # Slicing off the last part\n",
" # 'example.csv'[:-4] = 'example'\n",
" symbols = [f[:-4] for f in listdir(data_path)]\n",
"\n",
" if not symbols:\n",
" raise ValueError(\"No symbols found in folder.\")\n",
"\n",
" # Prepare an empty DataFrame for dividends\n",
" divs = pd.DataFrame(\n",
" columns=[\"sid\", \"amount\", \"ex_date\", \"record_date\", \"declared_date\", \"pay_date\"]\n",
" )\n",
"\n",
" # Prepare an empty DataFrame for splits\n",
" splits = pd.DataFrame(columns=[\"sid\", \"ratio\", \"effective_date\"])\n",
"\n",
" # Prepare an empty DataFrame for metadata\n",
" metadata = pd.DataFrame(\n",
" columns=(\n",
" \"start_date\",\n",
" \"end_date\",\n",
" \"auto_close_date\",\n",
" \"symbol\",\n",
" \"root_symbol\",\n",
" \"expiration_date\",\n",
" \"notice_date\",\n",
" \"tick_size\",\n",
" \"exchange\",\n",
" )\n",
" )\n",
"\n",
" # Check valid trading dates, according to the selected exchange calendar\n",
" sessions = calendar.sessions_in_range(start_session, end_session)\n",
"\n",
" # Get data for all stocks and write to Zipline\n",
" daily_bar_writer.write(process_futures(symbols, sessions, metadata))\n",
"\n",
" adjustment_writer.write(splits=splits, dividends=divs)\n",
"\n",
" # Prepare root level metadata\n",
" root_symbols = futures_lookup.copy()\n",
" root_symbols[\"root_symbol_id\"] = root_symbols.index.values\n",
" del root_symbols[\"minor_fx_adj\"]\n",
"\n",
" # write the meta data\n",
" asset_db_writer.write(futures=metadata, root_symbols=root_symbols)\n",
"\n",
"\n",
"def process_futures(symbols, sessions, metadata):\n",
" # Loop the stocks, setting a unique Security ID (SID)\n",
" sid = 0\n",
"\n",
" # Loop the symbols with progress bar, using tqdm\n",
" for symbol in tqdm(symbols, desc=\"Loading data...\"):\n",
" sid += 1\n",
"\n",
" # Read the stock data from csv file.\n",
" df = pd.read_csv(\n",
" \"{}/{}.csv\".format(data_path, symbol), index_col=[0], parse_dates=[0]\n",
" )\n",
"\n",
" # Check for minor currency quotes\n",
" adjustment_factor = futures_lookup.loc[\n",
" futures_lookup[\"root_symbol\"] == df.iloc[0][\"root_symbol\"]\n",
" ][\"minor_fx_adj\"].iloc[0]\n",
"\n",
" df[\"open\"] *= adjustment_factor\n",
" df[\"high\"] *= adjustment_factor\n",
" df[\"low\"] *= adjustment_factor\n",
" df[\"close\"] *= adjustment_factor\n",
"\n",
" # Avoid potential high / low data errors in data set\n",
" # And apply minor currency adjustment for USc quotes\n",
" df[\"high\"] = df[[\"high\", \"close\"]].max(axis=1)\n",
" df[\"low\"] = df[[\"low\", \"close\"]].min(axis=1)\n",
" df[\"high\"] = df[[\"high\", \"open\"]].max(axis=1)\n",
" df[\"low\"] = df[[\"low\", \"open\"]].min(axis=1)\n",
"\n",
" # Synch to the official exchange calendar\n",
" df = df.reindex(sessions.tz_localize(None))[df.index[0] : df.index[-1]]\n",
"\n",
" # Forward fill missing data\n",
" df.fillna(method=\"ffill\", inplace=True)\n",
"\n",
" # Drop remaining NaN\n",
" df.dropna(inplace=True)\n",
"\n",
" # Cut dates before 2000, avoiding Zipline issue\n",
" df = df[\"2000-01-01\":]\n",
"\n",
" # Prepare contract metadata\n",
" make_meta(sid, metadata, df, sessions)\n",
"\n",
" del df[\"openinterest\"]\n",
" del df[\"expiration_date\"]\n",
" del df[\"root_symbol\"]\n",
" del df[\"symbol\"]\n",
"\n",
" yield sid, df\n",
"\n",
"\n",
"def make_meta(sid, metadata, df, sessions):\n",
" # Check first and last date.\n",
" start_date = df.index[0]\n",
" end_date = df.index[-1]\n",
"\n",
" # The auto_close date is the day after the last trade.\n",
" ac_date = end_date + pd.Timedelta(days=1)\n",
"\n",
" symbol = df.iloc[0][\"symbol\"]\n",
" root_sym = df.iloc[0][\"root_symbol\"]\n",
" exchng = futures_lookup.loc[futures_lookup[\"root_symbol\"] == root_sym][\n",
" \"exchange\"\n",
" ].iloc[0]\n",
" exp_date = end_date\n",
"\n",
" # Add notice day if you have.\n",
" # Tip to improve: Set notice date to one month prior to\n",
" # expiry for commodity markets.\n",
" notice_date = ac_date\n",
" tick_size = 0.0001 # Placeholder\n",
"\n",
" # Add a row to the metadata DataFrame.\n",
" metadata.loc[sid] = (\n",
" start_date,\n",
" end_date,\n",
" ac_date,\n",
" symbol,\n",
" root_sym,\n",
" exp_date,\n",
" notice_date,\n",
" tick_size,\n",
" exchng,\n",
" )\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Update `extension.py` to be able to register this bundle:"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Appending to /root/.zipline/extension.py\n"
]
},
{
"data": {
"application/javascript": [
"\n",
" setTimeout(function() {\n",
" var nbb_cell_id = 8;\n",
" var nbb_unformatted_code = \"%%writefile -a ~/.zipline/extension.py\\n\\n\\nfrom zipline.data.bundles import register\\n\\nimport random_futures_data\\nregister('random_futures', random_futures_data.random_futures_data, calendar_name='us_futures')\";\n",
" var nbb_formatted_code = \"%%writefile -a ~/.zipline/extension.py\\n\\n\\nfrom zipline.data.bundles import register\\n\\nimport random_futures_data\\nregister('random_futures', random_futures_data.random_futures_data, calendar_name='us_futures')\";\n",
" var nbb_cells = Jupyter.notebook.get_cells();\n",
" for (var i = 0; i < nbb_cells.length; ++i) {\n",
" if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n",
" if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n",
" nbb_cells[i].set_text(nbb_formatted_code);\n",
" }\n",
" break;\n",
" }\n",
" }\n",
" }, 500);\n",
" "
],
"text/plain": [
"<IPython.core.display.Javascript object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"%%writefile -a ~/.zipline/extension.py\n",
"\n",
"\n",
"from zipline.data.bundles import register\n",
"\n",
"import random_futures_data\n",
"register('random_futures', random_futures_data.random_futures_data, calendar_name='us_futures')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Ingest the bundle:"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"fatal: not a git repository (or any of the parent directories): .git\n",
"<string>:12: UserWarning: Overwriting bundle with name 'random_futures'\n",
"[2020-11-14 05:31:39.579842] INFO: zipline.data.bundles.core: Ingesting random_futures.\n",
"Loading data...: 100%|██████████| 735/735 [04:16<00:00, 2.87it/s]\n"
]
},
{
"data": {
"application/javascript": [
"\n",
" setTimeout(function() {\n",
" var nbb_cell_id = 9;\n",
" var nbb_unformatted_code = \"%%bash\\n\\nPYTHONPATH=\\\"$HOME/.zipline\\\" zipline ingest -b 'random_futures'\";\n",
" var nbb_formatted_code = \"%%bash\\n\\nPYTHONPATH=\\\"$HOME/.zipline\\\" zipline ingest -b 'random_futures'\";\n",
" var nbb_cells = Jupyter.notebook.get_cells();\n",
" for (var i = 0; i < nbb_cells.length; ++i) {\n",
" if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n",
" if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n",
" nbb_cells[i].set_text(nbb_formatted_code);\n",
" }\n",
" break;\n",
" }\n",
" }\n",
" }, 500);\n",
" "
],
"text/plain": [
"<IPython.core.display.Javascript object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"%%bash\n",
"\n",
"PYTHONPATH=\"$HOME/.zipline\" zipline ingest -b 'random_futures'"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Check bundle exists:"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"csvdir <no ingestions>\n",
"quandl <no ingestions>\n",
"quantopian-quandl <no ingestions>\n",
"random_futures 2020-11-14 05:31:38.242183\n",
"random_futures 2020-11-14 04:44:32.249879\n",
"random_futures 2020-11-14 04:44:06.346443\n",
"random_futures 2020-11-14 03:46:45.976588\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"fatal: not a git repository (or any of the parent directories): .git\n",
"<string>:12: UserWarning: Overwriting bundle with name 'random_futures'\n"
]
},
{
"data": {
"application/javascript": [
"\n",
" setTimeout(function() {\n",
" var nbb_cell_id = 10;\n",
" var nbb_unformatted_code = \"%%bash\\n\\nzipline bundles\";\n",
" var nbb_formatted_code = \"%%bash\\n\\nzipline bundles\";\n",
" var nbb_cells = Jupyter.notebook.get_cells();\n",
" for (var i = 0; i < nbb_cells.length; ++i) {\n",
" if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n",
" if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n",
" nbb_cells[i].set_text(nbb_formatted_code);\n",
" }\n",
" break;\n",
" }\n",
" }\n",
" }, 500);\n",
" "
],
"text/plain": [
"<IPython.core.display.Javascript object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"%%bash\n",
"\n",
"zipline bundles"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.12"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment