This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# using dictionary convert columns into categorical data types | |
convert_dict = {'gender': "category", | |
'interest':"category", | |
"age": "category"} | |
conversions_df = conversions_df.astype(convert_dict) | |
dummified_data = pd.get_dummies(conversions_df, drop_first=True) # get dummy features for categorical variables | |
# make gender the target variable for classification | |
TARGET = ["gender_M"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from typing import List | |
import numpy as np | |
import pandas as pd | |
from sklearn.base import BaseEstimator | |
def bootstrap_fit(X: np.ndarray, y: np.array, model_class, B=200, **model_params) -> List[BaseEstimator]: | |
""" | |
Fits and returns a list of B models based upon bootstrap sampling with replacement | |
:param X: features to fit model on | |
:param y: target (continuous for regression |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from typing import List | |
import numpy as np | |
from sklearn.base import BaseEstimator | |
def bootstrap_contour_predict(bootstrapped_models: List[BaseEstimator], xx: np.array, yy: np.array) -> np.ndarray: | |
""" | |
Makes a prediction for len(xx) * len(yy) data points - a mesh grid | |
:param bootstrapped_models: a list of fitted sklearn estimators | |
:param xx: Numpy array of values from 1st dimension mesh axis | |
:param yy: Numpy array of values from 2nd dimension mesh axis |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.ensemble import RandomForestClassifier | |
B = 500 | |
rf = RandomForestClassifier(n_estimators=B) | |
rf.fit(X, y) | |
# apply() runs the each data point through the bootstrapped tree models, recording their final terminal leaf node indices | |
final_positions = rf.apply(X) # final positions will be N x 500 (N rows, one per data point, and 500 columns, one per tree) | |
proximity_matrix = np.zeros((len(X), len(X))) # proximity matrix is N x N | |
# I've adapted implementation found here: | |
# https://stackoverflow.com/questions/18703136/proximity-matrix-in-sklearn-ensemble-randomforestclassifier |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.manifold import SpectralEmbedding | |
import seaborn as sns | |
for threshold in range(1,8): | |
nonzeros = (conversions_df["Approved_Conversion"] >= threshold) | |
sizes = conversions_df["Spent"].values ** 2 + 10 | |
mds = SpectralEmbedding(n_components=2, affinity="precomputed") | |
reduced_dimensions = mds.fit_transform(proximity_matrix) | |
sns.scatterplot(x=reduced_dimensions[:,0], y=reduced_dimensions[:, 1], | |
hue=nonzeros, alpha=0.5, legend=False, size=sizes) | |
plt.title(f"Spectral Embedding Visualization of \n Random Forest Proximity Matrix ({threshold} or More Conversions)") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from math import sqrt | |
import base64 | |
from itertools import count, islice | |
import random | |
from typing import List | |
from dataclasses import dataclass | |
from Crypto.Cipher import AES | |
import hashlib | |
from Crypto.Random import get_random_bytes |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import com.amazonaws.AmazonServiceException; | |
import com.amazonaws.SdkClientException; | |
import com.amazonaws.auth.profile.ProfileCredentialsProvider; | |
import com.amazonaws.regions.RegionUtils; | |
import com.amazonaws.regions.Regions; | |
import com.amazonaws.services.kms.AWSKMS; | |
import com.amazonaws.services.kms.AWSKMSClientBuilder; | |
import com.amazonaws.services.kms.model.CreateKeyResult; | |
import com.amazonaws.services.s3.AmazonS3Encryption; | |
import com.amazonaws.services.s3.AmazonS3EncryptionClientBuilder; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
from typing import List | |
def get_binary_for_char(char: str, encoding="utf-8") -> str: | |
""" | |
Encodes a character using the desired encoding into its corresponding hex, then converts the | |
hex code into binary, formatted with tab spaces between byte marks. | |
""" | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# helper class taken from https://www.geeksforgeeks.org/print-colors-python-terminal/ | |
class colors: | |
'''Colors class:reset all colors with colors.reset; two | |
sub classes fg for foreground | |
and bg for background; use as colors.subclass.colorname. | |
i.e. colors.fg.red or colors.bg.greenalso, the generic bold, disable, | |
underline, reverse, strike through, | |
and invisible work with the main class i.e. colors.bold''' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from flask.ext.sqlalchemy import SQLAlchemy | |
db = SQLAlchemy() | |
class Student(db.Model): | |
student_id = db.Column(db.Integer, primary_key=True) | |
first_name = db.Column(db.String(80)) | |
last_name = db.Column(db.String(80)) | |
enrolled_school = db.Column(db.String(30)) |