Skip to content

Instantly share code, notes, and snippets.

@tibkiss
Last active October 27, 2018 22:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save tibkiss/e00ba8a9d762f699fb58190cd1c1418e to your computer and use it in GitHub Desktop.
Save tibkiss/e00ba8a9d762f699fb58190cd1c1418e to your computer and use it in GitHub Desktop.
Zipline Google Downloader
import os
import glob
import numpy as np
import pandas as pd
from pandas_datareader.data import DataReader
import datetime
import requests
from zipline.utils.calendars import get_calendar
from zipline.data.resample import minute_frame_to_session_frame
from zipline.utils.cli import maybe_show_progress
from zipline.data.bundles import register
def google_equities(symbols, start=None, end=None):
symbols = tuple(symbols)
def ingest(environ,
asset_db_writer,
minute_bar_writer, # unused
daily_bar_writer,
adjustment_writer,
calendar,
start_session,
end_session,
cache,
show_progress,
output_dir,
start=start,
end=end):
if start is None:
start = start_session
if end is None:
end = None
metadata = pd.DataFrame(np.empty(len(symbols), dtype=[
('start_date', 'datetime64[ns]'),
('end_date', 'datetime64[ns]'),
('auto_close_date', 'datetime64[ns]'),
('symbol', 'object'),
]))
def _pricing_iter():
sid = 0
with maybe_show_progress(
symbols,
show_progress,
label='Downloading Google pricing data: ') as it, \
requests.Session() as session:
for symbol in it:
path = _cachpath(symbol, 'ohlcv')
try:
df = cache[path]
except KeyError:
df = cache[path] = DataReader(
symbol,
'google',
start,
end,
session=session,
).sort_index()
# the start date is the date of the first trade and
# the end date is the date of the last trade
start_date = df.index[0]
end_date = df.index[-1]
# The auto_close date is the day after the last trade.
ac_date = end_date + pd.Timedelta(days=1)
metadata.iloc[sid] = start_date, end_date, ac_date, symbol
df.rename(
columns={
'Open': 'open',
'High': 'high',
'Low': 'low',
'Close': 'close',
'Volume': 'volume',
},
inplace=True,
)
yield sid, df
sid += 1
daily_bar_writer.write(_pricing_iter(), show_progress=show_progress)
symbol_map = pd.Series(metadata.symbol.index, metadata.symbol)
metadata['exchange'] = "GOOGLE"
asset_db_writer.write(equities=metadata)
# Disabled for now as Yahoo Finance has changed it's API
# adjustments = []
# with maybe_show_progress(
# symbols,
# show_progress,
# label='Downloading Yahoo adjustment data: ') as it, \
# requests.Session() as session:
# for symbol in it:
# path = _cachpath(symbol, 'adjustment')
# try:
# df = cache[path]
# except KeyError:
# df = cache[path] = DataReader(
# symbol,
# 'yahoo-actions',
# start,
# end,
# session=session,
# ).sort_index()
#
# df['sid'] = symbol_map[symbol]
# adjustments.append(df)
#
# adj_df = pd.concat(adjustments)
# adj_df.index.name = 'date'
# adj_df.reset_index(inplace=True)
#
# splits = adj_df[adj_df.action == 'SPLIT']
# splits = splits.rename(
# columns={'value': 'ratio', 'date': 'effective_date'},
# )
# splits.drop('action', axis=1, inplace=True)
#
# dividends = adj_df[adj_df.action == 'DIVIDEND']
# dividends = dividends.rename(
# columns={'value': 'amount', 'date': 'ex_date'},
# )
# dividends.drop('action', axis=1, inplace=True)
# # we do not have this data in the yahoo dataset
# dividends['record_date'] = pd.NaT
# dividends['declared_date'] = pd.NaT
# dividends['pay_date'] = pd.NaT
adjustment_writer.write()
return ingest
register('google', google_equities(["VOO", "XIV",]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment