Skip to content

Instantly share code, notes, and snippets.

Created February 8, 2014 20:28
Show Gist options
  • Save dougvk/8889701 to your computer and use it in GitHub Desktop.
Save dougvk/8889701 to your computer and use it in GitHub Desktop.
Ingest, scrub, and store intra-day yahoo financial ticker information in pandas DataFrame
Import and process yahoo data
from cStringIO import StringIO
import requests
from datetime import datetime
from pandas import DataFrame
from import read_table
ticker_dfs = []
source_label = 'yahoo_hist'
tickers = locals()['tickers']
d = datetime.utcnow().date()
today = datetime.combine(d, datetime.min.time())
columns = ['open', 'high', 'low', 'volume', 'close']
# Helper function for 'ingest_yahoo_hist' method
def build_yahoo_hist_url(ticker):
url = '{}&f=ohgvp'
return url.format(ticker)
# Request the data and store in the original orientation by ticker.
# Yahoo limits requests to 200 tickers.
content = ''
for idx in range(0, len(tickers), 200):
ticker_list = '+'.join(tickers[idx:idx + 200])
url = build_yahoo_hist_url(ticker_list)
content += requests.get(url).content
# Make the line delimiter just '\n' because pandas can't handle '\n\r'.
content = '\n'.join(content.splitlines())
# read_table requires a buffer to read from
strio = StringIO(content)
ticker_df = read_table(strio, lineterminator='\n', names=columns, sep=',', index_col=False)
ticker_df.index = tickers
# reshape each row of the ticker df into its own per-ticker df with an index of today.
# I'm sure this would've been a good use case for panels but didn't want to bother.
for i in range(len(ticker_df.index)):
df = DataFrame(ticker_df.ix[i]).T.copy()
name = df.index[0]
df.index = [today] = name
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment