Skip to content

Instantly share code, notes, and snippets.

@ivanliu
Created July 30, 2017 23:47
Show Gist options
  • Save ivanliu/0e42e97ae06e99a9b04ecb8cc4f889b8 to your computer and use it in GitHub Desktop.
Save ivanliu/0e42e97ae06e99a9b04ecb8cc4f889b8 to your computer and use it in GitHub Desktop.
fetch stock price in batch mode
'''
scrape all history stock Data
'''
import datetime
import pandas_datareader.data as web
from dateutil.relativedelta import relativedelta
import mysql.connector
import xml.etree.ElementTree as ET
import pandas as pd
import os
def get_data(symbol):
start = datetime.datetime(2000, 01, 01);
end = datetime.datetime.now();
print 'scraping [%s]' % symbol
# read the stock information from Google finance
if symbol[0] == '^' or "VIX" in symbol : # we have to call Stooq reader
df = web.DataReader(symbol, 'stooq')
else:
df = web.DataReader(symbol, 'yahoo', start, end)
# for backward compatible we have to add column called adj. price
return df
'''Main Function'''
def main():
os.chdir("/home/ec2-user/data/stock_price")
# init mysql connection
# mysql -h sfdata.cf6ulue4mzq9.us-west-2.rds.amazonaws.com -P 3306 -u spr1ngf0rward -p
cnx = mysql.connector.connect(user='spr1ngf0rward', password='DTXpecgdTQzijTMg2',
host='sfdata.cf6ulue4mzq9.us-west-2.rds.amazonaws.com',
database='sfdata')
# queries
get_symbols = ("SELECT symbol_id, symbol FROM symbol WHERE exchange_id in (1,2,24)")
Insert_Price = ("INSERT INTO stock_prices"
"(symbol, symbol_id, date, open, high, low, close, adj_close, volume, created_on)"
"VALUES (%(symbol)s, %(symbol_id)s, %(date)s, %(open)s, %(high)s, %(low)s, %(close)s, %(adj_close)s, %(volume)s, %(created_on)s)")
symbols = []
## get symbol list
cursor = cnx.cursor()
cursor.execute(get_symbols)
for (symbol_id, symbol) in cursor:
symbols.append((symbol_id, symbol))
cursor.close()
# insert stock price into database
output = 'stock_prices.csv'
# debug
# symbols = [(1, 'AABA'), (2, 'YHOO'), (3, 'BABA')]
for (symbol_id, symbol) in symbols:
try:
data=get_data(symbol)
data["symbol"] = symbol
data["symbol_id"] = symbol_id
data["date"] =data.index
data["created_on"]= datetime.datetime.now().date()
order = ['symbol', 'symbol_id', 'date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume', 'created_on']
data = data.ix[:, order]
data.to_csv(output, index = False, mode = 'a', header = False)
print ("download %s success" % (symbol))
except Exception as ex:
print ("download %s fails - %s" % (symbol, ex))
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment