Michael Gao michaelgao8

## gist:2478810e168e24505241c65b9cb5467d
logging.basicConfig(
    format='%(asctime)s %(levelname)-8s %(message)s',
    level=logging.INFO,
    datefmt='%Y-%m-%d %H:%M:%S')
logging.getLogger().setLevel(logging.INFO)

## custom_scaler.py
class CustomScaler(BaseEstimator,TransformerMixin):
    """Inspired by https://stackoverflow.com/a/41461843/6248179
    """
    def __init__(self,columns,copy=True,with_mean=True,with_std=True):
        self.scaler = StandardScaler(copy,with_mean,with_std)
        self.columns = columns

    def fit(self, X, y=None):
        self.scaler.fit(X.loc[:, self.columns].values, y)
        return self

## gist:616f692d522a6dd34922e4f531b6ba63
# SOURCE: https://news.ycombinator.com/item?id=21260001

replace nvl with coalesce

replace rownum <= 1 with LIMIT 1

replace listagg with string_agg

replace recursive hierarchy (start with/connect by/prior) with recursive

## multi_index_aggregation.py
    def featurize_num_prior_encounters_multi_index(id_col, time_col, period_in_days, df):
        start_col = 'start_col'
        df[start_col] = df[time_col] - pd.Timedelta(days = period_in_days)
        # set multi_index
        df = df.set_index([id_col, time_col])
        num_adm = []
        id_list = []
        for i, (idx, data) in enumerate(df.groupby(level = id_col)):
            num_adm.append(data.loc[data.index.get_level_values(1) > data['start_col']].shape[0])
            id_list.append(idx)

## keybase.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                michaelgao8
                / keybase.md
            
            
              Created
              September 9, 2019 17:41
            
          
    Keybase proof

I hereby claim:

I am michaelgao8 on github.
I am michaelgao8 (https://keybase.io/michaelgao8) on keybase.
I have a public key ASD0gvLDXyHs3rX3JLhkC09CLKU7q2HrdvMna8mHPC8qMwo

To claim this, I am signing this object:

  
## inspect_df.py
def inspect_df(DataFrame):
    """
    Drop-in code for easier grading
    input: pd.DataFrame of interest
    """
    print("Head: ")
    print(DataFrame.head())

    print(" ======================== ")
    print("Shape: ")

## start_notebook.sh
hash=$(docker run -d -p 8888:8888 -v /Users/michael/Projects:/home/jovyan/work jupyter/datascience-notebook jupyter notebook) && sleep 5 && docker exec "$hash" jupyter notebook list

## cross_validate_xgboost.py
def cross_validate_xgboost(train_data, train_output,
                           n_folds, param_grid,
                           type_dict,
                           fixed_param_dict = {'objective': 'binary:logistic', 'eval_metric': ['auc']},
                           metric_func_dict = {'auc': sklearn.metrics.roc_auc_score},
                           other_metrics_dict = None, keep_data = True, **kwargs):

    """
    Perform k-fold cross-validation with xgboost hyperparameters
    Get the average performance across folds and save all of the results

## clear_notebook.sh
jupyter nbconvert --ClearOutputPreprocessor.enabled=True --inplace Notebook.ipynb

## expanded_grid.py
import numpy as np

a = [1,2,3]
b = [3,4,5]
c = [6,7]
d = [8,9,0]

# Desired:
# All possible combinations of these 4 values.
	logging.basicConfig(
	format='%(asctime)s %(levelname)-8s %(message)s',
	level=logging.INFO,
	datefmt='%Y-%m-%d %H:%M:%S')
	logging.getLogger().setLevel(logging.INFO)
	class CustomScaler(BaseEstimator,TransformerMixin):
	"""Inspired by https://stackoverflow.com/a/41461843/6248179
	"""
	def __init__(self,columns,copy=True,with_mean=True,with_std=True):
	self.scaler = StandardScaler(copy,with_mean,with_std)
	self.columns = columns

	def fit(self, X, y=None):
	self.scaler.fit(X.loc[:, self.columns].values, y)
	return self
	# SOURCE: https://news.ycombinator.com/item?id=21260001

	replace nvl with coalesce

	replace rownum <= 1 with LIMIT 1

	replace listagg with string_agg

	replace recursive hierarchy (start with/connect by/prior) with recursive
	def featurize_num_prior_encounters_multi_index(id_col, time_col, period_in_days, df):
	start_col = 'start_col'
	df[start_col] = df[time_col] - pd.Timedelta(days = period_in_days)
	# set multi_index
	df = df.set_index([id_col, time_col])
	num_adm = []
	id_list = []
	for i, (idx, data) in enumerate(df.groupby(level = id_col)):
	num_adm.append(data.loc[data.index.get_level_values(1) > data['start_col']].shape[0])
	id_list.append(idx)
	def inspect_df(DataFrame):
	"""
	Drop-in code for easier grading
	input: pd.DataFrame of interest
	"""
	print("Head: ")
	print(DataFrame.head())

	print(" ======================== ")
	print("Shape: ")
	def cross_validate_xgboost(train_data, train_output,
	n_folds, param_grid,
	type_dict,
	fixed_param_dict = {'objective': 'binary:logistic', 'eval_metric': ['auc']},
	metric_func_dict = {'auc': sklearn.metrics.roc_auc_score},
	other_metrics_dict = None, keep_data = True, **kwargs):

	"""
	Perform k-fold cross-validation with xgboost hyperparameters
	Get the average performance across folds and save all of the results
	import numpy as np

	a = [1,2,3]
	b = [3,4,5]
	c = [6,7]
	d = [8,9,0]

	# Desired:
	# All possible combinations of these 4 values.