Last active
April 1, 2021 09:11
-
-
Save benyblack/090fec64d7d3c1be059959e2bf00d12a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import pandas as pd | |
import time | |
from datetime import datetime | |
from datetime import timedelta | |
import sys | |
from fhub import Session | |
import os | |
import finnhub | |
# Initialise the personal token | |
token = 'YOUR FINNHUB TOKEN' | |
hub = Session(token) | |
data_root = './data/data/' | |
logs_root = './data/logs/' | |
finnhub_client = finnhub.Client(api_key=token) | |
if os.path.isdir(data_root) == False: | |
os.mkdir(data_root) | |
if os.path.isdir(logs_root) == False: | |
os.mkdir(logs_root) | |
def last_saved_date(symbol): | |
df = pd.read_csv(data_root + symbol + '.csv') | |
last_date_string = df.iloc[-1].t | |
return pd.to_datetime(last_date_string).date() | |
def save_daily_data(symbol): | |
res = finnhub_client.stock_candles( | |
symbol, 'D', history_begins, today_timestamp) | |
if res is None: | |
return | |
if res['s']=='no_data': | |
print('no_data') | |
return | |
to_dataframe(res).to_csv(symbol_file_path(symbol)) | |
print(symbol + ' is saved') | |
def save_daily_data_from(symbol, from_date): | |
start = from_date.strftime('%Y-%m-%d') | |
res = finnhub_client.stock_candles(symbol, 'D', _unixtime(start), today_timestamp) | |
print('====' + symbol + '=====') | |
if res is None: | |
return | |
if res['s']=='no_data': | |
print('no_data') | |
return | |
try: | |
df = to_dataframe(res) | |
except: | |
print(res) | |
sys.exit(0) | |
# Sometimes finnhub does not respect the start_date correctly | |
from_datetime = datetime.fromordinal(from_date.toordinal()) | |
df = df[df.t > from_datetime] | |
if(df is None or df.shape[0]==0): | |
print('up to date') | |
return | |
df.to_csv(symbol_file_path(symbol), mode='a', header=False) | |
print(str(df.shape[0]) + ' record(s) appended') | |
def get_now(): | |
return datetime.now().strftime("%Y-%m-%d-%H-%M-%S") | |
def get_today(): | |
return datetime.now().strftime("%Y-%m-%d") | |
def log_error(symbol, error_message): | |
print(symbol + ', ' + error_message) | |
with open(logs_root + get_today() + '-error.log', "a") as myfile: | |
myfile.write(symbol + ', ' + error_message + '\n') | |
def symbol_file_path(symbol): | |
return data_root + symbol + '.csv' | |
# from https://github.com/paduel/fhub/blob/194b8f6e253d3f59a340fd60db5171f40ec7b845/fhub/utils.py#L167 | |
def _unixtime(date): | |
if isinstance(date, str): | |
_date = int((datetime.strptime(_normalize_date(date), | |
"%Y-%m-%d") - datetime(1970, 1, 1, 0, 0)).total_seconds()) | |
elif isinstance(date, datetime): | |
_date = int((date - datetime(1970, 1, 1, 0, 0)).total_seconds()) | |
elif isinstance(date, (int, float)): | |
_date = int(date) | |
else: | |
raise AttributeError( | |
"A date-like string, timestamp or datetime must be passed") | |
return _date | |
def _normalize_date(date): | |
assert isinstance(date, str) | |
return date.replace('/', '-').replace('.', '-').replace(' ', '-') | |
def to_dataframe(data): | |
df = pd.DataFrame(data) | |
df.t = df.apply(lambda x: datetime.fromtimestamp(x.t), axis=1) | |
return df | |
# init some vars | |
today = datetime.today().date() | |
today_timestamp = _unixtime(str(today)) | |
history_begins = _unixtime('1900-01-01') | |
last_spy = finnhub_client.quote('SPY') | |
last_spy_date = datetime.fromtimestamp(last_spy['t']).date() | |
# Set the command | |
command = 'https://finnhub.io/api/v1/stock/symbol?exchange=US&token=' + token | |
# make the request to finnhub REST api to get the response object | |
rz = requests.get(command) | |
cols = ['country', 'currency', 'exchange', 'finnhubIndustry', 'ipo', 'logo', | |
'marketCapitalization', 'name', 'phone', 'shareOutstanding', 'ticker', | |
'weburl'] | |
df_company_profile = pd.DataFrame(columns=cols) | |
df_companies = pd.read_json(rz.content) | |
df_companies = df_companies[(df_companies.type == 'Common Stock') & ( | |
df_companies.mic != "OOTC")] | |
df_companies.to_csv('./companies.csv') | |
rate = 0 | |
tic = time.perf_counter() | |
for symbol in df_companies["symbol"]: | |
try: | |
toc = time.perf_counter() | |
print('======== ' + symbol + ' =======' + str(toc-tic) + '===' + str(rate)) | |
# respect rate_limit for finnhub which is 60 | |
# sometimes it raises rate limit error on 58-59 | |
# better solution might be to monitor actuall rate limit error instead | |
time_diff = toc - tic | |
if(time_diff < 60 and rate > 57): | |
print('reached the rate limit, wait for ' + | |
str(time_diff) + ' second(s)') | |
time.sleep(60-time_diff) | |
tic = time.perf_counter() | |
rate = 0 | |
if time_diff > 60: | |
tic = time.perf_counter() | |
rate = 0 | |
file_path = symbol_file_path(symbol) | |
if os.path.isfile(file_path) == False: | |
# get full history and save it to a new file | |
save_daily_data(symbol) | |
rate += 1 | |
else: | |
last = last_saved_date(symbol) | |
if last < last_spy_date: | |
# get remaining records since last fetch | |
save_daily_data_from(symbol, last + timedelta(days=1)) | |
rate += 1 | |
else: | |
print(symbol + ' skipped ') | |
except ConnectionError: | |
log_error(symbol, 'connection error') | |
except: | |
log_error(symbol, 'unexpected error') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment