Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save dast1/0fd4e3aa3888f6b0de020dc433f7b746 to your computer and use it in GitHub Desktop.
Save dast1/0fd4e3aa3888f6b0de020dc433f7b746 to your computer and use it in GitHub Desktop.
This is a multiprocessing enabled batch Yahoo Stock Price downloader.
# Choose Ticker Universe (example: Run russell3000.py)
exec(open("russell3000(v.1.0).py").read())
# Import Libraries
import datetime as dt
import pandas_datareader as web
import multiprocessing as multi
import numpy as np
import math
# Build Yahoo fetch data function
def fetch_data(symbol):
df = web.DataReader(symbol, 'yahoo', start, end)
f_path = 'EOD_data/'
df.to_csv(f_path + symbol + '.csv')
# Build looping function
def loop(subset):
for ticker in subset:
try:
fetch_data(ticker)
print(ticker + '(%d): success!' % subset.index(ticker) )
except:
print(ticker + '(%d): failed!' % subset.index(ticker) )
if subset.index(ticker) == 5:
break # break here:
# Set default start date and end date
start = dt.datetime(1980,1,1)
end = dt.datetime.today()
# Split work among CPUs workers
num_workers = mp.cpu_count()-1
par_start_idx = list(np.arange(0, len(Russell3000), round(len(Russell3000)/num_workers)))
par_end_idx = par_start_idx[1:]
par_end_idx.append(len(Russell3000))
# Bring it all together
if __name__ == '__main__':
for i in range(num_workers):
chunk = Russell3000[par_start_idx[i]:par_end_idx[i]]
p = multi.Process(target = loop, args = (chunk,))
p.start()
del i, chunk, start, end, par_end_idx, par_start_idx
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment