Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

View ychennay's full-sized avatar

Yu Chen ychennay

View GitHub Profile
# using dictionary convert columns into categorical data types
convert_dict = {'gender': "category",
'interest':"category",
"age": "category"}
conversions_df = conversions_df.astype(convert_dict)
dummified_data = pd.get_dummies(conversions_df, drop_first=True) # get dummy features for categorical variables
# make gender the target variable for classification
TARGET = ["gender_M"]
from typing import List
import numpy as np
import pandas as pd
from sklearn.base import BaseEstimator
def bootstrap_fit(X: np.ndarray, y: np.array, model_class, B=200, **model_params) -> List[BaseEstimator]:
"""
Fits and returns a list of B models based upon bootstrap sampling with replacement
:param X: features to fit model on
:param y: target (continuous for regression
from typing import List
import numpy as np
from sklearn.base import BaseEstimator
def bootstrap_contour_predict(bootstrapped_models: List[BaseEstimator], xx: np.array, yy: np.array) -> np.ndarray:
"""
Makes a prediction for len(xx) * len(yy) data points - a mesh grid
:param bootstrapped_models: a list of fitted sklearn estimators
:param xx: Numpy array of values from 1st dimension mesh axis
:param yy: Numpy array of values from 2nd dimension mesh axis
from sklearn.ensemble import RandomForestClassifier
B = 500
rf = RandomForestClassifier(n_estimators=B)
rf.fit(X, y)
# apply() runs the each data point through the bootstrapped tree models, recording their final terminal leaf node indices
final_positions = rf.apply(X) # final positions will be N x 500 (N rows, one per data point, and 500 columns, one per tree)
proximity_matrix = np.zeros((len(X), len(X))) # proximity matrix is N x N
# I've adapted implementation found here:
# https://stackoverflow.com/questions/18703136/proximity-matrix-in-sklearn-ensemble-randomforestclassifier
from sklearn.manifold import SpectralEmbedding
import seaborn as sns
for threshold in range(1,8):
nonzeros = (conversions_df["Approved_Conversion"] >= threshold)
sizes = conversions_df["Spent"].values ** 2 + 10
mds = SpectralEmbedding(n_components=2, affinity="precomputed")
reduced_dimensions = mds.fit_transform(proximity_matrix)
sns.scatterplot(x=reduced_dimensions[:,0], y=reduced_dimensions[:, 1],
hue=nonzeros, alpha=0.5, legend=False, size=sizes)
plt.title(f"Spectral Embedding Visualization of \n Random Forest Proximity Matrix ({threshold} or More Conversions)")
@ychennay
ychennay / public-key.py
Created August 25, 2019 02:25
Public-Key Encryption Exchange and Encryption Algorithm Example
from math import sqrt
import base64
from itertools import count, islice
import random
from typing import List
from dataclasses import dataclass
from Crypto.Cipher import AES
import hashlib
from Crypto.Random import get_random_bytes
@ychennay
ychennay / customer_side_encryption.java
Created November 10, 2019 22:16
AWS CMK Encryption for S3
import com.amazonaws.AmazonServiceException;
import com.amazonaws.SdkClientException;
import com.amazonaws.auth.profile.ProfileCredentialsProvider;
import com.amazonaws.regions.RegionUtils;
import com.amazonaws.regions.Regions;
import com.amazonaws.services.kms.AWSKMS;
import com.amazonaws.services.kms.AWSKMSClientBuilder;
import com.amazonaws.services.kms.model.CreateKeyResult;
import com.amazonaws.services.s3.AmazonS3Encryption;
import com.amazonaws.services.s3.AmazonS3EncryptionClientBuilder;
@ychennay
ychennay / encodings.py
Last active November 27, 2019 01:29
encodings_demonstration.py
import re
from typing import List
def get_binary_for_char(char: str, encoding="utf-8") -> str:
"""
Encodes a character using the desired encoding into its corresponding hex, then converts the
hex code into binary, formatted with tab spaces between byte marks.
"""
@ychennay
ychennay / generators_send_example.py
Last active December 29, 2019 20:49
Simple example highlight execution of concurrent function calls (a top level main function and a generator function), yielding control back and forth and sending messages to each other.
# helper class taken from https://www.geeksforgeeks.org/print-colors-python-terminal/
class colors:
'''Colors class:reset all colors with colors.reset; two
sub classes fg for foreground
and bg for background; use as colors.subclass.colorname.
i.e. colors.fg.red or colors.bg.greenalso, the generic bold, disable,
underline, reverse, strike through,
and invisible work with the main class i.e. colors.bold'''
@ychennay
ychennay / flask_model.py
Created February 22, 2020 18:22
Simple Flask Data Model for a Student
from flask.ext.sqlalchemy import SQLAlchemy
db = SQLAlchemy()
class Student(db.Model):
student_id = db.Column(db.Integer, primary_key=True)
first_name = db.Column(db.String(80))
last_name = db.Column(db.String(80))
enrolled_school = db.Column(db.String(30))