Skip to content

Instantly share code, notes, and snippets.

@benyblack
Last active April 1, 2021 09:11
Show Gist options
  • Save benyblack/090fec64d7d3c1be059959e2bf00d12a to your computer and use it in GitHub Desktop.
Save benyblack/090fec64d7d3c1be059959e2bf00d12a to your computer and use it in GitHub Desktop.
import requests
import pandas as pd
import time
from datetime import datetime
from datetime import timedelta
import sys
from fhub import Session
import os
import finnhub
# Initialise the personal token
token = 'YOUR FINNHUB TOKEN'
hub = Session(token)
data_root = './data/data/'
logs_root = './data/logs/'
finnhub_client = finnhub.Client(api_key=token)
if os.path.isdir(data_root) == False:
os.mkdir(data_root)
if os.path.isdir(logs_root) == False:
os.mkdir(logs_root)
def last_saved_date(symbol):
df = pd.read_csv(data_root + symbol + '.csv')
last_date_string = df.iloc[-1].t
return pd.to_datetime(last_date_string).date()
def save_daily_data(symbol):
res = finnhub_client.stock_candles(
symbol, 'D', history_begins, today_timestamp)
if res is None:
return
if res['s']=='no_data':
print('no_data')
return
to_dataframe(res).to_csv(symbol_file_path(symbol))
print(symbol + ' is saved')
def save_daily_data_from(symbol, from_date):
start = from_date.strftime('%Y-%m-%d')
res = finnhub_client.stock_candles(symbol, 'D', _unixtime(start), today_timestamp)
print('====' + symbol + '=====')
if res is None:
return
if res['s']=='no_data':
print('no_data')
return
try:
df = to_dataframe(res)
except:
print(res)
sys.exit(0)
# Sometimes finnhub does not respect the start_date correctly
from_datetime = datetime.fromordinal(from_date.toordinal())
df = df[df.t > from_datetime]
if(df is None or df.shape[0]==0):
print('up to date')
return
df.to_csv(symbol_file_path(symbol), mode='a', header=False)
print(str(df.shape[0]) + ' record(s) appended')
def get_now():
return datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
def get_today():
return datetime.now().strftime("%Y-%m-%d")
def log_error(symbol, error_message):
print(symbol + ', ' + error_message)
with open(logs_root + get_today() + '-error.log', "a") as myfile:
myfile.write(symbol + ', ' + error_message + '\n')
def symbol_file_path(symbol):
return data_root + symbol + '.csv'
# from https://github.com/paduel/fhub/blob/194b8f6e253d3f59a340fd60db5171f40ec7b845/fhub/utils.py#L167
def _unixtime(date):
if isinstance(date, str):
_date = int((datetime.strptime(_normalize_date(date),
"%Y-%m-%d") - datetime(1970, 1, 1, 0, 0)).total_seconds())
elif isinstance(date, datetime):
_date = int((date - datetime(1970, 1, 1, 0, 0)).total_seconds())
elif isinstance(date, (int, float)):
_date = int(date)
else:
raise AttributeError(
"A date-like string, timestamp or datetime must be passed")
return _date
def _normalize_date(date):
assert isinstance(date, str)
return date.replace('/', '-').replace('.', '-').replace(' ', '-')
def to_dataframe(data):
df = pd.DataFrame(data)
df.t = df.apply(lambda x: datetime.fromtimestamp(x.t), axis=1)
return df
# init some vars
today = datetime.today().date()
today_timestamp = _unixtime(str(today))
history_begins = _unixtime('1900-01-01')
last_spy = finnhub_client.quote('SPY')
last_spy_date = datetime.fromtimestamp(last_spy['t']).date()
# Set the command
command = 'https://finnhub.io/api/v1/stock/symbol?exchange=US&token=' + token
# make the request to finnhub REST api to get the response object
rz = requests.get(command)
cols = ['country', 'currency', 'exchange', 'finnhubIndustry', 'ipo', 'logo',
'marketCapitalization', 'name', 'phone', 'shareOutstanding', 'ticker',
'weburl']
df_company_profile = pd.DataFrame(columns=cols)
df_companies = pd.read_json(rz.content)
df_companies = df_companies[(df_companies.type == 'Common Stock') & (
df_companies.mic != "OOTC")]
df_companies.to_csv('./companies.csv')
rate = 0
tic = time.perf_counter()
for symbol in df_companies["symbol"]:
try:
toc = time.perf_counter()
print('======== ' + symbol + ' =======' + str(toc-tic) + '===' + str(rate))
# respect rate_limit for finnhub which is 60
# sometimes it raises rate limit error on 58-59
# better solution might be to monitor actuall rate limit error instead
time_diff = toc - tic
if(time_diff < 60 and rate > 57):
print('reached the rate limit, wait for ' +
str(time_diff) + ' second(s)')
time.sleep(60-time_diff)
tic = time.perf_counter()
rate = 0
if time_diff > 60:
tic = time.perf_counter()
rate = 0
file_path = symbol_file_path(symbol)
if os.path.isfile(file_path) == False:
# get full history and save it to a new file
save_daily_data(symbol)
rate += 1
else:
last = last_saved_date(symbol)
if last < last_spy_date:
# get remaining records since last fetch
save_daily_data_from(symbol, last + timedelta(days=1))
rate += 1
else:
print(symbol + ' skipped ')
except ConnectionError:
log_error(symbol, 'connection error')
except:
log_error(symbol, 'unexpected error')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment