Skip to content

Instantly share code, notes, and snippets.

@arnawldo
Last active June 13, 2017 15:13
Show Gist options
  • Save arnawldo/91612e80a3fb297b6c834d98f7f8b448 to your computer and use it in GitHub Desktop.
Save arnawldo/91612e80a3fb297b6c834d98f7f8b448 to your computer and use it in GitHub Desktop.
Skeleton for hw2
# uses python3
import sys
import numpy as np
# helper functions
def est_class_mean(x, y, label):
"""Given n * d numpy array of predictors, n * 1 array of labels and class label of interest,
return empirical mean for each variable in given class
Input: X = n * d array
y = n * 1 array
label = int
Output: d * 1 array"""
subset_x = x[y == label, ]
# TODO
# use numpy mean function
return col_means
def est_class_cov(x, y, label):
"""Given n * d numpy array of predictors, n * 1 array of labels and class label of interest,
return empirical covariance for each variable in given class
Input: X = n * d array
y = n * 1 array
label = int
Output: d * d array"""
subset_x = x[y == label, ]
# TODO
# use numpy cov function
return col_cov
def est_class_prior(y, label):
"""Given all training labels, return estimated proir probability of choosing
a particular label
Input: y = n * 1 array
label = int
Output: int"""
# TODO
return prior
def est_class_prob(x, prior, cov, mean):
"""Given a single test observation, estimated class prior, covariance and mean array,
output an estimated class probability
Input x = d * 1 array
prior = int
cov = d * d array
mean = d * 1
Output prob = double
"""
# x - mu
....
# magnitude of sigma
# use numpy linalg.norm fucntion
...
# term in exponent
exp_term = np.dot(
np.dot(
# transpose of x - mu...,
# inverse of covariance... ),
# x - mu...)
# probability
prob = ...
return prob
def calc_all_prob(x_train, y_train, x_test):
"""Given training predictors and labels, calculate Gaussian
class conditional densities for each class for each test observation
Input: X_train = n * d array
y_train = n * 1 array
X_test = (n_test_cases) * d array
Output: (n_test_cases) * (n_classes) array"""
# all unique class labels
class_labels = sorted(np.unique(y_train))
# list of numpy arrays of predictor means for each class
class_predictor_means = []
# list of numpy arrays of predictor covariances for each class
class_predictor_cov = []
# list of class priors
class_priors = []
# list of predicted class probabilities for all test observation
class_prob_predictions = []
# calculate class predictor means, predictor covariances. and class priors
for cls in class_labels:
class_predictor_means.append(est_class_mean(x_train, y_train, cls))
class_predictor_cov.append(est_class_cov(x_train, y_train, cls))
class_priors.append(est_class_prior(y_train, cls))
# calculate class probabilities for each test observation
n_test_obs = x_test.shape[0] # number of test observations
for i in range(n_test_obs):
predicted_probs = [] # class probabilities for single observation
obs = x_test[i, ].reshape(-1, ) # current test observation
for k in range(len(class_labels)):
# calculate probability observation is from this class
predicted_probs.append(
est_class_prob(
obs,
class_priors[k],
class_predictor_cov[k],
class_predictor_means[k]))
# store predictions
class_prob_predictions.append(predicted_probs)
class_prob_predictions = np.array(class_prob_predictions)
# Normalize each row
# for each row, for each prob, divide prob by row sum
# each row must sum to 1
if __name__ == "__main__":
# command line args
X_train_filename, y_train_filename, X_test_filename = sys.argv[1:]
# read in files
X_train = np.loadtxt(X_train_filename, delimiter=',')
y_train = np.loadtxt(y_train_filename, delimiter=',')
X_test = np.loadtxt(X_test_filename, delimiter=',')
# calculate probabilities
predictions = calc_all_prob(X_train, y_train, X_test)
# save results as csv
np.savetxt("probs_test.csv", predictions, delimiter=',')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment