This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
X = np.array([[0, 0]]) | |
y = np.array([[1.2]]) | |
gp = GaussianProcess(X, y) | |
gp.update([[1.5, -1.5]], [[2.3]]) # second data point | |
gp.update([[-2,1.5]], [[-1.0]]) # third data point | |
gp.update([[2.1,1.3]], [[-0.6]]) # fourth data point | |
delta = 0.05 # changes granularity of the contour map | |
x = np.arange(-3.0, 3.0, delta) | |
y = np.arange(-2.0, 2.0, delta) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import time | |
from sklearn.metrics.pairwise import rbf_kernel | |
x = np.random.rand(10000) | |
x = x.reshape(100,100) | |
distances = [] | |
γ = -.5 | |
start = time.time() | |
for i in x: | |
for j in x: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import statsmodels.api as sm | |
import numpy as np | |
from sklearn.linear_model import LinearRegression | |
# get original NumPy implementation | |
β = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y).flatten() | |
# get sklearn's LinearRegression implementation weights | |
lr = LinearRegression() | |
sklearn_coefficients = lr.fit(X,y).coef_.flatten() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# generate 100 different models with different lambda values, from 0 to 50 | |
lambdas = np.linspace(0, 50, 200) | |
bias = [] | |
variance = [] | |
for λ in lambdas: | |
# refit model with new λ hyperparameter | |
β = np.linalg.inv(X.T.dot(X) + np.diag([λ] * len(X.T))).dot(X.T).dot(y).flatten() | |
bias.append(np.mean(np.mean(np.abs(X.dot(β) - y.values.flatten())))) | |
variance.append(np.var(X.dot(β))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class RandomForestRegressor(ForestRegressor): | |
def __init__(self, | |
n_estimators='warn', criterion="mse", max_depth=None, min_samples_split=2, min_samples_leaf=1, | |
min_weight_fraction_leaf=0.,max_features="auto",max_leaf_nodes=None,min_impurity_decrease=0., | |
min_impurity_split=None,bootstrap=True,oob_score=False,n_jobs=None,random_state=None,verbose=0, | |
warm_start=False): | |
super().__init__( | |
base_estimator=DecisionTreeRegressor(), | |
... # pass through to ForestRegressor parent class many other constructor parameters omitted |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fig, ax1 = plt.subplots() | |
color = 'tab:red' | |
ax1.set_xlabel('λ (Inverse Model Complexity)') | |
ax1.set_ylabel('Prediction Bias', color=color) | |
ax1.plot(lambdas, bias, color=color) | |
ax1.tick_params(axis='y', labelcolor=color) | |
ax2 = ax1.twinx() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
true_median = np.median(X["Impressions"]) | |
errors = [] | |
variances = [] | |
for b in range(1, 500): | |
sample_medians = [np.median(X.sample(len(X), replace=True)["Impressions"]) for i in range(b)] | |
error = np.mean(sample_medians) - true_median | |
variances.append(np.std(sample_medians) ** 2) # record the variance of the bootstrap estimates | |
errors.append(error) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import seaborn as sns | |
corr = X.corr() | |
# randomly pick 30 of the interest columns to drop to make the visualization more readable | |
drops = np.random.choice(X.filter(regex=('interest_1?[0-9]{2}')).columns, | |
size=30, replace=False) | |
corr = corr.loc[~corr.index.isin(drops)] | |
corr.drop(columns=drops, inplace=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# using dictionary convert columns into categorical data types | |
convert_dict = {'gender': "category", | |
'interest':"category", | |
"age": "category"} | |
conversions_df = conversions_df.astype(convert_dict) | |
dummified_data = pd.get_dummies(conversions_df, drop_first=True) # get dummy features for categorical variables | |
# make gender the target variable for classification | |
TARGET = ["gender_M"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
def plot_decision_boundaries(X, y, model_class, bootstrap=False, | |
x_label=None, y_label=None, title=None, **model_params): | |
# adapted from https://gist.github.com/anandology/772d44d291a9daa198d4 | |
reduced_data = X[:, :2] # take only the first two feature columns (since we are plotting a 2D contour map) | |
# we need to recombine the data and target together since we need | |
# to bootstrap sample from them with replacement |
OlderNewer