Skip to content

Instantly share code, notes, and snippets.

View amansharma2910's full-sized avatar
🥳

Aman Sharma amansharma2910

🥳
View GitHub Profile
acc = accuracy(y_test, test_predictions)
print('Model accuracy (Scratch) = ', acc*100)
def accuracy(y_test, y_preds):
"""Calculates inference accuracy of the model.
Args-
y_test- Original target labels of the test set
y_preds- Predicted target lables
Returns-
acc
"""
total_correct = 0
# running inference on the test data
test_predictions = KNNClassifier(X_train, y_train, X_test, k = 5)
test_predictions
def KNNClassifier(train_features, train_target, test_features, k = 5):
"""Performs KNN classification on the test feature set.
Args-
train_features- This denotes the feature set of the training data
train_target- Target lables of the training data
test_features- Feature set of the test data; assumed unlabeled
k (default = 5)- Number of closest neighboring training data points to be considered
Returns-
predictions- Array of target predictions for each test data instance
"""
dist_array = distance_from_all_training(X_test[0])
dist_array
def distance_from_all_training(test_point):
"""Calculates euclidean distance of test point from all the points in the training dataset
Args-
test_point- Data point from test set
Returns-
dist_array- Array holding distance values for all training data points
"""
dist_array = np.array([])
for train_point in X_train:
def euclidean_dist(pointA, pointB):
"""Calculates the euclidean distance between two vectors (numpy arrays).
Args-
pointA- First vector
pointB- Second vector
Returns-
distance- Euclidean distance between A and B
"""
distance = np.square(pointA - pointB) # (ai-bi)**2 for every point in the vectors
# running split function on our dataset
X_train, y_train, X_test, y_test = train_test_split(features_scaled, target, test_size = 0.2)
# printing the shape of the splits
X_train.shape, y_train.shape, X_test.shape, y_test.shape
def train_test_split(features, target, test_size = 0.2):
"""Splits dataset into training and testing sets.
Args-
features- The feature matrix of the dataset (numpy array)
target- The target array (numpy array)
test_size- Size of test dataset that you want
Returns-
train_features, train_target, test_features, test_target
sns.countplot('quality', data = wine_df)