Skip to content

Instantly share code, notes, and snippets.

@homelyseven250
Created February 12, 2024 14:26
Show Gist options
  • Save homelyseven250/c88efd4c367579bb5f1fabd46322ebe5 to your computer and use it in GitHub Desktop.
Save homelyseven250/c88efd4c367579bb5f1fabd46322ebe5 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Configs"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"offset = 0\n",
"limit = 3000\n",
"period = 'max' # valid periods: 1d,5d,1mo,3mo,6mo,1y,2y,5y,10y,ytd,max"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Download all NASDAQ traded symbols"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"total number of symbols traded = 8867\n"
]
}
],
"source": [
"import pandas as pd\n",
"\n",
"data = pd.read_csv(\"http://www.nasdaqtrader.com/dynamic/SymDir/nasdaqtraded.txt\", sep='|')\n",
"data_clean = data[data['Test Issue'] == 'N']\n",
"symbols = data_clean['NASDAQ Symbol'].tolist()\n",
"print('total number of symbols traded = {}'.format(len(symbols)))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Download Historic data"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"! pip install yfinance > /dev/null 2>&1\n",
"! mkdir hist"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"import yfinance as yf\n",
"import os, contextlib"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Total number of valid symbols downloaded = 2733\n",
"CPU times: user 9min 34s, sys: 10.8 s, total: 9min 44s\n",
"Wall time: 20min 3s\n"
]
}
],
"source": [
"%%time\n",
"\n",
"limit = limit if limit else len(symbols)\n",
"end = min(offset + limit, len(symbols))\n",
"is_valid = [False] * len(symbols)\n",
"# force silencing of verbose API\n",
"with open(os.devnull, 'w') as devnull:\n",
" with contextlib.redirect_stdout(devnull):\n",
" for i in range(offset, end):\n",
" s = symbols[i]\n",
" data = yf.download(s, period=period)\n",
" if len(data.index) == 0:\n",
" continue\n",
" \n",
" is_valid[i] = True\n",
" data.to_csv('hist/{}.csv'.format(s))\n",
"\n",
"print('Total number of valid symbols downloaded = {}'.format(sum(is_valid)))"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"valid_data = data_clean[is_valid]\n",
"valid_data.to_csv('symbols_valid_meta.csv', index=False)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Separating ETFs and Stocks"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"!mkdir stocks\n",
"!mkdir etfs"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"etfs = valid_data[valid_data['ETF'] == 'Y']['NASDAQ Symbol'].tolist()\n",
"stocks = valid_data[valid_data['ETF'] == 'N']['NASDAQ Symbol'].tolist()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"import shutil\n",
"from os.path import isfile, join\n",
"\n",
"def move_symbols(symbols, dest):\n",
" for s in symbols:\n",
" filename = '{}.csv'.format(s)\n",
" shutil.move(join('hist', filename), join(dest, filename))\n",
" \n",
"move_symbols(etfs, \"etfs\")\n",
"move_symbols(stocks, \"stocks\")"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"! rmdir hist"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.6"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment