Skip to content

Instantly share code, notes, and snippets.

@dougvk
Created February 8, 2014 20:28
Show Gist options
  • Save dougvk/8889701 to your computer and use it in GitHub Desktop.
Save dougvk/8889701 to your computer and use it in GitHub Desktop.
Ingest, scrub, and store intra-day yahoo financial ticker information in pandas DataFrame
"""
Import and process yahoo data
"""
from cStringIO import StringIO
import requests
from datetime import datetime
from pandas import DataFrame
from pandas.io.parsers import read_table
ticker_dfs = []
source_label = 'yahoo_hist'
tickers = locals()['tickers']
d = datetime.utcnow().date()
today = datetime.combine(d, datetime.min.time())
columns = ['open', 'high', 'low', 'volume', 'close']
# Helper function for 'ingest_yahoo_hist' method
def build_yahoo_hist_url(ticker):
url = 'http://download.finance.yahoo.com/d/quotes.csv?s={}&f=ohgvp'
return url.format(ticker)
# Request the data and store in the original orientation by ticker.
# Yahoo limits requests to 200 tickers.
content = ''
for idx in range(0, len(tickers), 200):
ticker_list = '+'.join(tickers[idx:idx + 200])
url = build_yahoo_hist_url(ticker_list)
content += requests.get(url).content
# Make the line delimiter just '\n' because pandas can't handle '\n\r'.
content = '\n'.join(content.splitlines())
# read_table requires a buffer to read from
strio = StringIO(content)
ticker_df = read_table(strio, lineterminator='\n', names=columns, sep=',', index_col=False)
ticker_df.index = tickers
# reshape each row of the ticker df into its own per-ticker df with an index of today.
# I'm sure this would've been a good use case for panels but didn't want to bother.
for i in range(len(ticker_df.index)):
df = DataFrame(ticker_df.ix[i]).T.copy()
name = df.index[0]
df.index = [today]
df.name = name
ticker_dfs.append(df)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment