I hereby claim:
- I am michaelgao8 on github.
- I am michaelgao8 (https://keybase.io/michaelgao8) on keybase.
- I have a public key ASD0gvLDXyHs3rX3JLhkC09CLKU7q2HrdvMna8mHPC8qMwo
To claim this, I am signing this object:
logging.basicConfig( | |
format='%(asctime)s %(levelname)-8s %(message)s', | |
level=logging.INFO, | |
datefmt='%Y-%m-%d %H:%M:%S') | |
logging.getLogger().setLevel(logging.INFO) |
class CustomScaler(BaseEstimator,TransformerMixin): | |
"""Inspired by https://stackoverflow.com/a/41461843/6248179 | |
""" | |
def __init__(self,columns,copy=True,with_mean=True,with_std=True): | |
self.scaler = StandardScaler(copy,with_mean,with_std) | |
self.columns = columns | |
def fit(self, X, y=None): | |
self.scaler.fit(X.loc[:, self.columns].values, y) | |
return self |
# SOURCE: https://news.ycombinator.com/item?id=21260001 | |
replace nvl with coalesce | |
replace rownum <= 1 with LIMIT 1 | |
replace listagg with string_agg | |
replace recursive hierarchy (start with/connect by/prior) with recursive |
def featurize_num_prior_encounters_multi_index(id_col, time_col, period_in_days, df): | |
start_col = 'start_col' | |
df[start_col] = df[time_col] - pd.Timedelta(days = period_in_days) | |
# set multi_index | |
df = df.set_index([id_col, time_col]) | |
num_adm = [] | |
id_list = [] | |
for i, (idx, data) in enumerate(df.groupby(level = id_col)): | |
num_adm.append(data.loc[data.index.get_level_values(1) > data['start_col']].shape[0]) | |
id_list.append(idx) |
I hereby claim:
To claim this, I am signing this object:
def inspect_df(DataFrame): | |
""" | |
Drop-in code for easier grading | |
input: pd.DataFrame of interest | |
""" | |
print("Head: ") | |
print(DataFrame.head()) | |
print(" ======================== ") | |
print("Shape: ") |
hash=$(docker run -d -p 8888:8888 -v /Users/michael/Projects:/home/jovyan/work jupyter/datascience-notebook jupyter notebook) && sleep 5 && docker exec "$hash" jupyter notebook list |
def cross_validate_xgboost(train_data, train_output, | |
n_folds, param_grid, | |
type_dict, | |
fixed_param_dict = {'objective': 'binary:logistic', 'eval_metric': ['auc']}, | |
metric_func_dict = {'auc': sklearn.metrics.roc_auc_score}, | |
other_metrics_dict = None, keep_data = True, **kwargs): | |
""" | |
Perform k-fold cross-validation with xgboost hyperparameters | |
Get the average performance across folds and save all of the results |
jupyter nbconvert --ClearOutputPreprocessor.enabled=True --inplace Notebook.ipynb |
import numpy as np | |
a = [1,2,3] | |
b = [3,4,5] | |
c = [6,7] | |
d = [8,9,0] | |
# Desired: | |
# All possible combinations of these 4 values. |