Skip to content

Instantly share code, notes, and snippets.

import datetime
import requests
import pandas as pd
def daily_price_historical(symbol, comparison_symbol, limit=0, aggregate=1, exchange='CCCAGG', print_url=False):
"""Returns a pandas.Dataframe containing OHLC daily data for the specified
symbol
Parameters
----------
@xoelop
xoelop / replace_infs.py
Last active September 27, 2018 09:55
Code to replace the inf and -inf values in a np.array for the max and min of the non inf values
import numpy as np
# To substitute inf value by the max non inf value
a = np.array([1, np.inf, 10, -5, -np.inf])
print(a)
# [ 1. inf 10. -5. -inf]
a[np.isposinf(a)] = a[~np.isposinf(a)].max()
a[np.isneginf(a)] = a[~np.isneginf(a)].min()
viridis_colorscale = [
[0.0, "#440154"],
[0.0627450980392, "#48186a"],
[0.125490196078, "#472d7b"],
[0.188235294118, "#424086"],
[0.250980392157, "#3b528b"],
[0.313725490196, "#33638d"],
[0.376470588235, "#2c728e"],
[0.439215686275, "#26828e"],
[0.501960784314, "#21918c"],
Figure({
'data': [{'colorscale': 'Viridis',
'hoverinfo': 'text+name',
'name': 'Training set',
'showscale': False,
'text': array(['max_features: 0.05<br>n_estimators: 10<br>min_samples_split: 13<br>Test score: 0.8402<br>Train score: 0.9668<br>Fit time: 52.99s',
'max_features: 0.07<br>n_estimators: 10<br>min_samples_split: 13<br>Test score: 0.8845<br>Train score: 0.9715<br>Fit time: 74.24s',
'max_features: 0.09<br>n_estimators: 10<br>min_samples_split: 13<br>Test score: 0.8954<br>Train score: 0.9731<br>Fit time: 92.01s',
'max_features: 0.11<br>n_estimators: 10<br>min_samples_split: 13<br>Test score: 0.8851<br>Train score: 0.9758<br>Fit time: 116.09s',
'max_features: 0.13<br>n_estimators: 10<br>min_samples_split: 13<br>Test score: 0.8973<br>Train score: 0.9761<br>Fit time: 135.92s',
@xoelop
xoelop / gridsearch_random_forest.py
Last active November 3, 2018 18:39
grid parameters gridsearch
model = RandomForestRegressor(n_jobs=-1, random_state=42, verbose=2)
grid = {'n_estimators': [10, 13, 18, 25, 33, 45, 60, 81, 110, 148, 200],
'max_features': [0.05, 0.07, 0.09, 0.11, 0.13, 0.15, 0.17, 0.19, 0.21, 0.23, 0.25],
'min_samples_split': [2, 3, 5, 8, 13, 20, 32, 50, 80, 126, 200]}
rf_gridsearch = GridSearchCV(estimator=model, param_grid=grid, n_jobs=4,
cv=cv, verbose=2, return_train_score=True)
rf_gridsearch.fit(X1, y1)
max_scores = df_gridsearch.groupby(['param_min_samples_split',
'param_max_features']).max()
max_scores = max_scores.unstack()[['mean_test_score', 'mean_train_score']]
sns.heatmap(max_scores.mean_test_score, annot=True, fmt='.4g');
df_gridsearch['size'] = (df_gridsearch.mean_test_score /
df_gridsearch.mean_test_score.max()) ** 100 * 20 + 1
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor, as_completed
def parallel_process(array,
function,
type_pool: str = 'multithreading',
use_kwargs=False,
n_jobs=16,
front_num=3,
import pandas as pd
import numpy as np
def ratio(ret1: pd.Series,
ret2: pd.Series = 0,
ratio: str = 'sharpe',
log: bool = True) -> float:
"""log: if True, convert ret1 and ret2 to log returns"""
if log:
ret1 = np.log1p(ret1)