Skip to content

Instantly share code, notes, and snippets.

View patrickbrus's full-sized avatar

patrickbrus

View GitHub Profile
@patrickbrus
patrickbrus / CAM.py
Created July 28, 2020 06:29
Function for computing CAM
def get_class_activation_map(model, img):
'''
this function computes the class activation map
Inputs:
1) model (tensorflow model) : trained model
2) img (numpy array of shape (224, 224, 3)) : input image
'''
# expand dimension to fit the image to a network accepted input size
@patrickbrus
patrickbrus / imputer.py
Created January 10, 2021 17:36
Function for imputing missing data
from sklearn.model_selection import train_test_split
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
# split into train and test sets
# data_df_converted is the dataframe containing all features
df_train, df_test = train_test_split(data_df_converted, test_size=0.10, random_state=42)
df_train, df_val = train_test_split(df_train, test_size=0.3, random_state=42)
# create the iterative imputer model
imputer = IterativeImputer(max_iter=20, random_state=42, verbose=1)
@patrickbrus
patrickbrus / upload_to_S3.py
Created January 10, 2021 18:22
Code for uploading data to S3 bucket
import boto3
import sagemaker
import os
# session and role
sagemaker_session = sagemaker.Session()
role = sagemaker.get_execution_role()
# region
region = boto3.Session().region_name
@patrickbrus
patrickbrus / train_linear_regression.py
Created January 10, 2021 18:24
Notebook code for training linear regression model
from sagemaker.sklearn.estimator import SKLearn
FRAMEWORK_VERSION = "0.23-1"
script_path = 'source/train_linear_regression.py'
sklearn_linear_regression = SKLearn(
entry_point=script_path,
framework_version=FRAMEWORK_VERSION,
instance_type="ml.c4.xlarge",
role=role,
@patrickbrus
patrickbrus / decison_tree_and_bayes_optimizer.py
Last active January 10, 2021 18:30
Fit a bayesian optimizer and a decsion tree model
# Define a model
tree = DecisionTreeRegressor(criterion="mse")
# Create the Bayesion optimization object
opt = BayesSearchCV(
tree,
{
"max_depth": (5, 15),
"splitter": ["best", "random"],
},
@patrickbrus
patrickbrus / random_forest_and_bayes_optimizer.py
Last active January 10, 2021 18:29
Fit a bayesian optimizer for a random forest
# Define a model
forest = RandomForestRegressor(criterion="mse", n_jobs=n_jobs)
# Create the Bayesion optimization object
opt = BayesSearchCV(
forest,
{
"max_depth": (5, 15),
"n_estimators": (10, 50),
"bootstrap": [True, False]
@patrickbrus
patrickbrus / xgboost_sagemaker.py
Created January 10, 2021 18:34
Code for training a xgboost estimator in sagemaker
from sagemaker.tuner import IntegerParameter
from sagemaker.tuner import ContinuousParameter
from sagemaker.tuner import HyperparameterTuner
from sagemaker.inputs import TrainingInput
# Define exploration boundaries (default suggested values from Amazon SageMaker Documentation)
hyperparameter_ranges = {
'alpha': ContinuousParameter(0, 1000, scaling_type="Auto"),
'eta': ContinuousParameter(0.1, 0.5, scaling_type='Logarithmic'),
'gamma':ContinuousParameter(0, 5, scaling_type='Auto'),
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
# read in data from csv file
df = pd.read_csv(r"data\healthcare-dataset-stroke-data.csv")
print(df.head()) # helpful as first dive into data and features
# create a list of categorical features to loop over
list_categorical_features = df.columns[(df.dtypes == np.object).values].to_list()
# create a list of numerical features to loop over
list_numerical_features = df.columns[np.logical_not((df.dtypes == np.object).values)].to_list()
# plot on histogram for each numerical feature
for numerical_feature in list_numerical_features:
plt.title(f"Histogram of {numerical_feature}")
plt.hist(df[numerical_feature])
# create a histplot of age with hue stroke to check influence of age on class stroke
sns.histplot(data=df, x="age", hue="stroke", multiple="stack")
# create histplot of gender to check if there is any influence of gender on class stroke
sns.histplot(data=df, x="gender", hue="stroke", multiple="stack")
# create histplot of smoking_satus and check influence on class stroke
sns.histplot(data=df, x="smoking_status", hue="stroke", multiple="stack")
# here: can be interesting to also print the percentage of stroke patients of each category