Last active
October 27, 2018 22:44
-
-
Save tibkiss/e00ba8a9d762f699fb58190cd1c1418e to your computer and use it in GitHub Desktop.
Zipline Google Downloader
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import glob | |
import numpy as np | |
import pandas as pd | |
from pandas_datareader.data import DataReader | |
import datetime | |
import requests | |
from zipline.utils.calendars import get_calendar | |
from zipline.data.resample import minute_frame_to_session_frame | |
from zipline.utils.cli import maybe_show_progress | |
from zipline.data.bundles import register | |
def google_equities(symbols, start=None, end=None): | |
symbols = tuple(symbols) | |
def ingest(environ, | |
asset_db_writer, | |
minute_bar_writer, # unused | |
daily_bar_writer, | |
adjustment_writer, | |
calendar, | |
start_session, | |
end_session, | |
cache, | |
show_progress, | |
output_dir, | |
start=start, | |
end=end): | |
if start is None: | |
start = start_session | |
if end is None: | |
end = None | |
metadata = pd.DataFrame(np.empty(len(symbols), dtype=[ | |
('start_date', 'datetime64[ns]'), | |
('end_date', 'datetime64[ns]'), | |
('auto_close_date', 'datetime64[ns]'), | |
('symbol', 'object'), | |
])) | |
def _pricing_iter(): | |
sid = 0 | |
with maybe_show_progress( | |
symbols, | |
show_progress, | |
label='Downloading Google pricing data: ') as it, \ | |
requests.Session() as session: | |
for symbol in it: | |
path = _cachpath(symbol, 'ohlcv') | |
try: | |
df = cache[path] | |
except KeyError: | |
df = cache[path] = DataReader( | |
symbol, | |
'google', | |
start, | |
end, | |
session=session, | |
).sort_index() | |
# the start date is the date of the first trade and | |
# the end date is the date of the last trade | |
start_date = df.index[0] | |
end_date = df.index[-1] | |
# The auto_close date is the day after the last trade. | |
ac_date = end_date + pd.Timedelta(days=1) | |
metadata.iloc[sid] = start_date, end_date, ac_date, symbol | |
df.rename( | |
columns={ | |
'Open': 'open', | |
'High': 'high', | |
'Low': 'low', | |
'Close': 'close', | |
'Volume': 'volume', | |
}, | |
inplace=True, | |
) | |
yield sid, df | |
sid += 1 | |
daily_bar_writer.write(_pricing_iter(), show_progress=show_progress) | |
symbol_map = pd.Series(metadata.symbol.index, metadata.symbol) | |
metadata['exchange'] = "GOOGLE" | |
asset_db_writer.write(equities=metadata) | |
# Disabled for now as Yahoo Finance has changed it's API | |
# adjustments = [] | |
# with maybe_show_progress( | |
# symbols, | |
# show_progress, | |
# label='Downloading Yahoo adjustment data: ') as it, \ | |
# requests.Session() as session: | |
# for symbol in it: | |
# path = _cachpath(symbol, 'adjustment') | |
# try: | |
# df = cache[path] | |
# except KeyError: | |
# df = cache[path] = DataReader( | |
# symbol, | |
# 'yahoo-actions', | |
# start, | |
# end, | |
# session=session, | |
# ).sort_index() | |
# | |
# df['sid'] = symbol_map[symbol] | |
# adjustments.append(df) | |
# | |
# adj_df = pd.concat(adjustments) | |
# adj_df.index.name = 'date' | |
# adj_df.reset_index(inplace=True) | |
# | |
# splits = adj_df[adj_df.action == 'SPLIT'] | |
# splits = splits.rename( | |
# columns={'value': 'ratio', 'date': 'effective_date'}, | |
# ) | |
# splits.drop('action', axis=1, inplace=True) | |
# | |
# dividends = adj_df[adj_df.action == 'DIVIDEND'] | |
# dividends = dividends.rename( | |
# columns={'value': 'amount', 'date': 'ex_date'}, | |
# ) | |
# dividends.drop('action', axis=1, inplace=True) | |
# # we do not have this data in the yahoo dataset | |
# dividends['record_date'] = pd.NaT | |
# dividends['declared_date'] = pd.NaT | |
# dividends['pay_date'] = pd.NaT | |
adjustment_writer.write() | |
return ingest | |
register('google', google_equities(["VOO", "XIV",])) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment