Skip to content

Instantly share code, notes, and snippets.

View 3catz's full-sized avatar

Peijin Chen 3catz

  • Brooklyn New York
View GitHub Profile
@3catz
3catz / fuzzytargs.py
Created December 20, 2019 21:55
Fuzzy Targets
D = reconstruct(s, dim = 22 * 2 + 5, tau = 1)
Xs = []
Ys = []
for choice in np.repeat("random",3):
X = D[:,:22 * 2] ; Y = D[:,-5:]
if choice == 'random':
import random
y = []
for i in range(len(Y)):
@3catz
3catz / ixis_data_r.ipynb
Created February 2, 2022 00:04
IXIS_data_R.ipynb
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@3catz
3catz / catboost_hp_cv.py
Last active May 29, 2021 09:22
catboost_HPopt_CV
from catboost import Pool, cv, CatBoostClassifier
from bayes_opt import BayesianOptimization
from sklearn.model_selection import *
from sklearn.metrics import *
def CB_opt(n_estimators, depth, learning_rate, max_bin,
subsample, num_leaves, l2_leaf_reg, model_size_reg):
scores = []
skf = StratifiedKFold(n_splits = 5, shuffle = True, random_state = 1944)
@3catz
3catz / secretary_simulation.py
Created February 25, 2021 23:42
Secretary Problem simulation
n_trials = 10000
n_candidates = 100
max_burn = int(0.66 * n_candidates)
for T in range(1, max_burn, 5):
global_maxes = []
diff_from_max = []
time_taken = []
final_choices = []
for i in range(n_trials):
@3catz
3catz / seoul_pycaret_setup.py
Created December 19, 2020 21:05
Seoul Bike Share pycaret setup
exper = setup(
data = bike_yes,
categorical_features = ["Seasons", "Holiday"],
silent = True,
ordinal_features = {"Hour": sorted_hours},
ignore_features = ["Functioning Day","Date"],
target = 'Rented Bike Count',
use_gpu = True,
data_split_shuffle = False,
fold_strategy = "timeseries",
acts = ["jogging","standing","downstairs","upstairs","walkfast","walkmod","walkslow","lying","sitting"]
def create_windows(subjects):
length = 100
stride = 50
sample = 10
framelist = []
targetlist = []
global acts
for act in acts:
for f in glob.glob(subjects + act):
@3catz
3catz / ga_logreg.py
Last active November 5, 2020 22:25
GA algorithms and Logistic Regression
from sklearn.metrics import *
mcc = make_scorer(matthews_corrcoef)
estimator = LogisticRegression(solver = "liblinear", C = 6, tol = 1, fit_intercept = True)
from sklearn.model_selection import *
report = pd.DataFrame()
nofeats = []
chosen_feats = []
cvscore = []
rkf = RepeatedStratifiedKFold(n_repeats = 2, n_splits = 10)
@3catz
3catz / pycaret.py
Last active November 5, 2020 21:01
Using Pycaret library for autoML
from pycaret.datasets import get_data
from pycaret.classification import *
report["Scores"] = np.round(report["Scores"], 3)
report.sort_values(by = "Scores", ascending = False, inplace = True)
#report.index
ga_feats = report.iloc[0]["Chosen Feats"]
ename = setup(data = D[used_feats], target = "DEATH_EVENT",
test_data = None,
fold_strategy = "stratifiedkfold",
fold_shuffle = True,
@3catz
3catz / select_from_model.py
Last active October 28, 2020 20:01
select_from_model_repeated
from sklearn.feature_selection import *
feat_list = []
all_scores = []
for i in range(10):
np.random.seed(i)
sfm = SelectFromModel(estimator = clf, threshold=None, prefit=False,
norm_order=1, max_features = 12)
sfm.fit(D[allfeats], y)
modfeats = sfm.get_support()
@3catz
3catz / vetting_ga.py
Created October 28, 2020 19:46
Vetting of GA feature subsets
scores = []
for i in range(len(report)):
myfeats = report.iloc[i,1] ; print(myfeats)
X = D[myfeats] ; y = y
clf = LogisticRegression(solver = "liblinear", C = 6, tol = 1)
#clf = RandomForestClassifier()
rskf = RepeatedStratifiedKFold(n_splits = 10, n_repeats = 100)
score = np.mean(cross_val_score(clf, X, y, cv = rskf, scoring = "roc_auc"))
scores.append(score)