Skip to content

Instantly share code, notes, and snippets.

View 3catz's full-sized avatar

Peijin Chen 3catz

  • Brooklyn New York
View GitHub Profile
@3catz
3catz / tweet_dumper.py
Created February 12, 2018 21:35 — forked from yanofsky/LICENSE
A script to download all of a user's tweets into a csv
#!/usr/bin/env python
# encoding: utf-8
import tweepy #https://github.com/tweepy/tweepy
import csv
#Twitter API credentials
consumer_key = ""
consumer_secret = ""
access_key = ""
@3catz
3catz / eeg_cnn_shootout.ipynb
Created April 1, 2019 11:30
EEG_CNN_shootout.ipynb
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@3catz
3catz / adaptivenorm.py
Last active December 20, 2019 21:51
Adaptive Norming
trainstock = yf.Ticker("SPY")
start = "2009-01-01"
end = "2016-01-01"
st = trainstock.history(start = start,end = end)
st = st[["Close","Open","Volume","High","Low"]]
D = reconstruct(st["Close"].values, dim = 45, tau = 1)
win = D[:,:-1] ; s = D[:,-1]
std = np.std(win, axis = -1)
@3catz
3catz / crossval_bayesian_rf.py
Created September 28, 2020 17:44
Cross Val and Bayesian hyperparameter tuning for Random Forest
from bayes_opt import BayesianOptimization
def RF_opt(n_estimators, max_depth):
global rskf
reg = RandomForestClassifier(verbose = 0,
n_estimators = int(n_estimators),
#min_samples_split = int(min_samples_split),
#min_samples_leaf = int(min_samples_leaf),
max_depth = int(max_depth),
@3catz
3catz / repeated_SMOTE.py
Created October 8, 2020 20:11
repeated_SMOTE
oversampler = MulticlassOversampling(sv.TRIM_SMOTE(proportion = 0.1))
warnings.filterwarnings("ignore")
Scores1 = []
cmatrices1 = []
cmatrices2 = []
Scores2 = []
for i in range(50):
print("Trial {}".format(i))
print("-----------------------------")
scores1 = []
def getWeights(d,lags):
# return the weights from the series expansion of the differencing operator
# for real orders d and up to lags coefficients
w=[1]
for k in range(1,lags):
w.append(-w[-1]*((d-k+1))/k)
w=np.array(w).reshape(-1,1)
return w
def plotWeights(dRange, lags, numberPlots):
@3catz
3catz / LGBM_BHPT_quantileregression.py
Created October 22, 2020 04:40
Code for optimizing LGBM model for quantile regression
covariates = trainx; target = trainy
def lgb_trainer(num_leaves, learning_rate,
max_depth, n_estimators,
reg_lambda,
#alpha,
reg_alpha,
subsample):
lgb = LGBMRegressor(objective = "quantile",
alpha = .95,
@3catz
3catz / vetting_ga.py
Created October 28, 2020 19:46
Vetting of GA feature subsets
scores = []
for i in range(len(report)):
myfeats = report.iloc[i,1] ; print(myfeats)
X = D[myfeats] ; y = y
clf = LogisticRegression(solver = "liblinear", C = 6, tol = 1)
#clf = RandomForestClassifier()
rskf = RepeatedStratifiedKFold(n_splits = 10, n_repeats = 100)
score = np.mean(cross_val_score(clf, X, y, cv = rskf, scoring = "roc_auc"))
scores.append(score)
@3catz
3catz / select_from_model.py
Last active October 28, 2020 20:01
select_from_model_repeated
from sklearn.feature_selection import *
feat_list = []
all_scores = []
for i in range(10):
np.random.seed(i)
sfm = SelectFromModel(estimator = clf, threshold=None, prefit=False,
norm_order=1, max_features = 12)
sfm.fit(D[allfeats], y)
modfeats = sfm.get_support()
@3catz
3catz / pycaret.py
Last active November 5, 2020 21:01
Using Pycaret library for autoML
from pycaret.datasets import get_data
from pycaret.classification import *
report["Scores"] = np.round(report["Scores"], 3)
report.sort_values(by = "Scores", ascending = False, inplace = True)
#report.index
ga_feats = report.iloc[0]["Chosen Feats"]
ename = setup(data = D[used_feats], target = "DEATH_EVENT",
test_data = None,
fold_strategy = "stratifiedkfold",
fold_shuffle = True,