Skip to content

Instantly share code, notes, and snippets.

View kbfreder's full-sized avatar

Kendra kbfreder

View GitHub Profile
@kbfreder
kbfreder / evaluating_classifiers.py
Created February 11, 2020 19:54
Comparing classifiers
# this is a custom module
import assess_clf_models as acm
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier, BaggingClassifier
from catboost import CatBoostClassifier
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder, FunctionTransformer
@kbfreder
kbfreder / tree_depths.py
Created February 11, 2020 22:15
Retrieve tree depths
# import Random Forest
base_rf = RandomForestClassifier(n_estimators=100, max_depth=None)
base_rf.fit(train_X, train_y)
depths = [est.tree_.max_depth for est in base_rf.estimators_]
@kbfreder
kbfreder / grid_cv.py
Created February 11, 2020 22:40
Using sklearn's GridSearchCV
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer
# import Random Forest
param_grid = {'n_estimators': [50, 100, 200],
'max_depth':[None, 10, 15, 20],
'criterion': ['gini', 'entropy'],
'min_impurity_decrease': [0, 1e7, 1e5]}
scorer = make_scorer(accuracy_score)
# import function, or patch it:
# Note: may need to install mlxtend
try:
from sklearn.inspection import permutation_importance
except ImportError:
print("Problem importing permutation_importance -- patching")
from mlxtend.evaluate import feature_importance_permutation
def permutation_importance(estimator, X, y, scoring='r2', n_repeats=5):
"""
@kbfreder
kbfreder / logging.py
Last active February 26, 2021 15:01
basic logging
import logging
# REFERENCES
# logging message attributes: https://docs.python.org/3/library/logging.html#logrecord-attributes
# datetime formatting: https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior
# ---------------------------------------------
# BASIC LOGGING
# ---------------------------------------------
@kbfreder
kbfreder / timing.py
Last active February 14, 2020 20:17
time how long your code takes to run
# BASIC
# ----------------------------------------------------------------------
# elapsed time
import time
start_time = time.time()
'''code to execute'''
end_time = time.time()
print('Elapsed time: {:.1f} min'.format((end_time - start_time) / 60))
@kbfreder
kbfreder / bash_loop_dates.sh
Last active March 12, 2020 22:12
loop through dates in a bash script. Also uses a while loop, and if and not/then statements
#!/bin/bash
# loop through dates, using GNU dates
# GNU date reference: https://www.gnu.org/software/coreutils/manual/html_node/Examples-of-date.html
# define paths
open_path=../data/raw
save_path=../data/processed
# define dates
start_date=2020-02-25
@kbfreder
kbfreder / argparse.py
Last active March 10, 2020 20:21
because who doesn't love a good script
import argparse
import config_file as cfg # a config file
# i like to have 'main' as a stand-alone function, so I have the option
# of importing this file as a module, and calling main directly
def main(*args):
# in which we do the things
print('Running main function')
@kbfreder
kbfreder / bash_args.sh
Last active October 7, 2020 20:44
checks for arguments; assigns them to variables if present
#!/bin/bash
#--------------------------------------
# if no argument given, 'd' defaults to yesterday's date
# the `-z` argument checks for an empty string
if [ -z "$1" ]
then
echo "No argument supplied; using yesterday's date"
d=$(date --date '-1 day' +%Y-%m-%d)
else
@kbfreder
kbfreder / keybase.md
Created June 9, 2020 16:03
Keybase gist

Keybase proof

I hereby claim:

  • I am kbfreder on github.
  • I am kbfreder (https://keybase.io/kbfreder) on keybase.
  • I have a public key ASBmBZ75k43sulYdxxEkyvL5njbPuAE2l01Q_wSlTIwwRQo

To claim this, I am signing this object: