Created
September 26, 2017 17:18
-
-
Save tibkiss/2771ed5e80dc33e7a4d72462be1e6ae6 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import numpy as np | |
import pandas as pd | |
from pandas_datareader.data import DataReader | |
from pandas_datareader._utils import RemoteDataError | |
import requests | |
from zipline.utils.cli import maybe_show_progress | |
def _cachpath(symbol, type_): | |
return '-'.join((symbol.replace(os.path.sep, '_'), type_)) | |
def google_equities(symbols, start=None, end=None): | |
symbols = tuple(symbols) | |
def ingest(environ, | |
asset_db_writer, | |
minute_bar_writer, # unused | |
daily_bar_writer, | |
adjustment_writer, | |
calendar, | |
start_session, | |
end_session, | |
cache, | |
show_progress, | |
output_dir, | |
start=start, | |
end=end): | |
if start is None: | |
start = start_session | |
if end is None: | |
end = None | |
metadata = pd.DataFrame(np.empty(len(symbols), dtype=[ | |
('start_date', 'datetime64[ns]'), | |
('end_date', 'datetime64[ns]'), | |
('auto_close_date', 'datetime64[ns]'), | |
('symbol', 'object'), | |
])) | |
def _pricing_iter(): | |
sid = 0 | |
with maybe_show_progress( | |
symbols, | |
show_progress, | |
label='Downloading Google pricing data: ') as it, \ | |
requests.Session() as session: | |
for symbol in it: | |
path = _cachpath(symbol, 'ohlcv') | |
try: | |
df = cache[path] | |
except KeyError: | |
df = cache[path] = DataReader( | |
symbol, | |
'google', | |
start, | |
end, | |
session=session, | |
).sort_index() | |
# the start date is the date of the first trade and | |
# the end date is the date of the last trade | |
start_date = df.index[0] | |
end_date = df.index[-1] | |
# The auto_close date is the day after the last trade. | |
ac_date = end_date + pd.Timedelta(days=1) | |
metadata.iloc[sid] = start_date, end_date, ac_date, symbol | |
df.rename( | |
columns={ | |
'Open': 'open', | |
'High': 'high', | |
'Low': 'low', | |
'Close': 'close', | |
'Volume': 'volume', | |
}, | |
inplace=True, | |
) | |
yield sid, df | |
sid += 1 | |
daily_bar_writer.write(_pricing_iter(), show_progress=show_progress) | |
symbol_map = pd.Series(metadata.symbol.index, metadata.symbol) | |
metadata['exchange'] = "GOOGLE" | |
asset_db_writer.write(equities=metadata) | |
adjustments = [] | |
with maybe_show_progress( | |
symbols, | |
show_progress, | |
label='Downloading Yahoo adjustment data: ') as it, \ | |
requests.Session() as session: | |
for symbol in it: | |
path = _cachpath(symbol, 'adjustment') | |
sid = symbol_map[symbol] | |
try: | |
df = cache[path] | |
except KeyError: | |
try: | |
df = cache[path] = DataReader( | |
symbol, | |
'yahoo-actions', | |
metadata.ix[sid].start_date, | |
metadata.ix[sid].end_date, | |
session=session, | |
).sort_index() | |
except RemoteDataError: | |
print("No data returned from Yahoo for %s" % symbol) | |
df = pd.DataFrame(columns=['value', 'action']) | |
df['sid'] = sid | |
adjustments.append(df) | |
adj_df = pd.concat(adjustments) | |
adj_df.index.name = 'date' | |
adj_df.reset_index(inplace=True) | |
splits = adj_df[adj_df.action == 'SPLIT'] | |
splits = splits.rename( | |
columns={'value': 'ratio', 'date': 'effective_date'}, | |
) | |
splits.drop('action', axis=1, inplace=True) | |
dividends = adj_df[adj_df.action == 'DIVIDEND'] | |
dividends = dividends.rename( | |
columns={'value': 'amount', 'date': 'ex_date'}, | |
) | |
dividends.drop('action', axis=1, inplace=True) | |
# we do not have this data in the yahoo dataset | |
dividends['record_date'] = pd.NaT | |
dividends['declared_date'] = pd.NaT | |
dividends['pay_date'] = pd.NaT | |
adjustment_writer.write(splits=splits, dividends=dividends) | |
return ingest |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi Tibor, thanks for all your help and contribution that I am finding in various zipline forums. This is currently not working for me, as it appears google has blocked automated attempts at fetching financial data.
I am really trying to find methods to backtest different streams and aggregate them with live data. Do you have a preferred method of doing this? It seems zipline's old yahoo and google scrapes have been depreciated. I'm interested in backtrading and live trading, and happy to contribute to zipline any way I can.
Best,
Derek