Last active
February 1, 2021 12:11
-
-
Save CNuge/a8e16b880e564692a19441756fc3c292 to your computer and use it in GitHub Desktop.
Multithreading modification of the S&P 500 data downloader
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from datetime import datetime | |
from concurrent import futures | |
import pandas as pd | |
from pandas import DataFrame | |
import pandas_datareader.data as web | |
def download_stock(stock): | |
""" try to query the iex for a stock, if failed note with print """ | |
try: | |
print(stock) | |
stock_df = web.DataReader(stock,'iex', start_time, now_time) | |
stock_df['Name'] = stock | |
output_name = stock + '_data.csv' | |
stock_df.to_csv(output_name) | |
except: | |
bad_names.append(stock) | |
print('bad: %s' % (stock)) | |
if __name__ == '__main__': | |
""" set the download window """ | |
now_time = datetime.now() | |
start_time = datetime(now_time.year - 5, now_time.month , now_time.day) | |
""" list of s_anp_p companies """ | |
s_and_p = ['MMM','ABT','ABBV','ACN','ATVI','AYI','ADBE','AMD','AAP','AES','AET', | |
'AMG','AFL','A','APD','AKAM','ALK','ALB','ARE','ALXN','ALGN','ALLE', | |
'AGN','ADS','LNT','ALL','GOOGL','GOOG','MO','AMZN','AEE','AAL','AEP', | |
'AXP','AIG','AMT','AWK','AMP','ABC','AME','AMGN','APH','APC','ADI','ANDV', | |
'ANSS','ANTM','AON','AOS','APA','AIV','AAPL','AMAT','APTV','ADM','ARNC', | |
'AJG','AIZ','T','ADSK','ADP','AZO','AVB','AVY','BHGE','BLL','BAC','BK', | |
'BAX','BBT','BDX','BRK.B','BBY','BIIB','BLK','HRB','BA','BWA','BXP','BSX', | |
'BHF','BMY','AVGO','BF.B','CHRW','CA','COG','CDNS','CPB','COF','CAH','CBOE', | |
'KMX','CCL','CAT','CBG','CBS','CELG','CNC','CNP','CTL','CERN','CF','SCHW', | |
'CHTR','CHK','CVX','CMG','CB','CHD','CI','XEC','CINF','CTAS','CSCO','C','CFG', | |
'CTXS','CLX','CME','CMS','KO','CTSH','CL','CMCSA','CMA','CAG','CXO','COP', | |
'ED','STZ','COO','GLW','COST','COTY','CCI','CSRA','CSX','CMI','CVS','DHI', | |
'DHR','DRI','DVA','DE','DAL','XRAY','DVN','DLR','DFS','DISCA','DISCK','DISH', | |
'DG','DLTR','D','DOV','DWDP','DPS','DTE','DRE','DUK','DXC','ETFC','EMN','ETN', | |
'EBAY','ECL','EIX','EW','EA','EMR','ETR','EVHC','EOG','EQT','EFX','EQIX','EQR', | |
'ESS','EL','ES','RE','EXC','EXPE','EXPD','ESRX','EXR','XOM','FFIV','FB','FAST', | |
'FRT','FDX','FIS','FITB','FE','FISV','FLIR','FLS','FLR','FMC','FL','F','FTV', | |
'FBHS','BEN','FCX','GPS','GRMN','IT','GD','GE','GGP','GIS','GM','GPC','GILD', | |
'GPN','GS','GT','GWW','HAL','HBI','HOG','HRS','HIG','HAS','HCA','HCP','HP','HSIC', | |
'HSY','HES','HPE','HLT','HOLX','HD','HON','HRL','HST','HPQ','HUM','HBAN','HII', | |
'IDXX','INFO','ITW','ILMN','IR','INTC','ICE','IBM','INCY','IP','IPG','IFF','INTU', | |
'ISRG','IVZ','IQV','IRM','JEC','JBHT','SJM','JNJ','JCI','JPM','JNPR','KSU','K','KEY', | |
'KMB','KIM','KMI','KLAC','KSS','KHC','KR','LB','LLL','LH','LRCX','LEG','LEN','LUK', | |
'LLY','LNC','LKQ','LMT','L','LOW','LYB','MTB','MAC','M','MRO','MPC','MAR','MMC','MLM', | |
'MAS','MA','MAT','MKC','MCD','MCK','MDT','MRK','MET','MTD','MGM','KORS','MCHP','MU', | |
'MSFT','MAA','MHK','TAP','MDLZ','MON','MNST','MCO','MS','MOS','MSI','MYL','NDAQ', | |
'NOV','NAVI','NTAP','NFLX','NWL','NFX','NEM','NWSA','NWS','NEE','NLSN','NKE','NI', | |
'NBL','JWN','NSC','NTRS','NOC','NCLH','NRG','NUE','NVDA','ORLY','OXY','OMC','OKE', | |
'ORCL','PCAR','PKG','PH','PDCO','PAYX','PYPL','PNR','PBCT','PEP','PKI','PRGO','PFE', | |
'PCG','PM','PSX','PNW','PXD','PNC','RL','PPG','PPL','PX','PCLN','PFG','PG','PGR', | |
'PLD','PRU','PEG','PSA','PHM','PVH','QRVO','PWR','QCOM','DGX','RRC','RJF','RTN','O', | |
'RHT','REG','REGN','RF','RSG','RMD','RHI','ROK','COL','ROP','ROST','RCL','CRM','SBAC', | |
'SCG','SLB','SNI','STX','SEE','SRE','SHW','SIG','SPG','SWKS','SLG','SNA','SO','LUV', | |
'SPGI','SWK','SBUX','STT','SRCL','SYK','STI','SYMC','SYF','SNPS','SYY','TROW','TPR', | |
'TGT','TEL','FTI','TXN','TXT','TMO','TIF','TWX','TJX','TMK','TSS','TSCO','TDG','TRV', | |
'TRIP','FOXA','FOX','TSN','UDR','ULTA','USB','UAA','UA','UNP','UAL','UNH','UPS','URI', | |
'UTX','UHS','UNM','VFC','VLO','VAR','VTR','VRSN','VRSK','VZ','VRTX','VIAB','V','VNO', | |
'VMC','WMT','WBA','DIS','WM','WAT','WEC','WFC','HCN','WDC','WU','WRK','WY','WHR','WMB', | |
'WLTW','WYN','WYNN','XEL','XRX','XLNX','XL','XYL','YUM','ZBH','ZION','ZTS'] | |
bad_names =[] #to keep track of failed queries | |
"""here we use the concurrent.futures module's ThreadPoolExecutor | |
to speed up the downloads buy doing them in parallel | |
as opposed to sequentially """ | |
#set the maximum thread number | |
max_workers = 50 | |
workers = min(max_workers, len(s_and_p)) #in case a smaller number of stocks than threads was passed in | |
with futures.ThreadPoolExecutor(workers) as executor: | |
res = executor.map(download_stock, s_and_p) | |
""" Save failed queries to a text file to retry """ | |
if len(bad_names) > 0: | |
with open('failed_queries.txt','w') as outfile: | |
for name in bad_names: | |
outfile.write(name+'\n') | |
#timing: | |
finish_time = datetime.now() | |
duration = finish_time - now_time | |
minutes, seconds = divmod(duration.seconds, 60) | |
print('getSandP_threaded.py') | |
print(f'The threaded script took {minutes} minutes and {seconds} seconds to run.') | |
#The threaded script took 0 minutes and 31 seconds to run. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment