Skip to content

Instantly share code, notes, and snippets.

@lazuxd
lazuxd / 1.create_dataframes.py
Last active February 7, 2020 18:00
Building a Sentiment Classifier using Scikit-Learn
import pandas as pd
import re
from os import system, listdir
from os.path import isfile, join
from random import shuffle
system('wget "http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz"')
system('tar -xzf "aclImdb_v1.tar.gz"')
def create_data_frame(folder: str) -> pd.DataFrame:
@lazuxd
lazuxd / 2.text_vectorization.py
Created February 7, 2020 17:55
Building a Sentiment Classifier using Scikit-Learn
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from joblib import dump, load # used for saving and loading sklearn objects
from scipy.sparse import save_npz, load_npz # used for saving and loading sparse matrices
system("mkdir 'data_preprocessors'")
system("mkdir 'vectorized_data'")
# Unigram Counts
@lazuxd
lazuxd / 3.choosing_data_format.py
Created February 7, 2020 17:57
Building a Sentiment Classifier using Scikit-Learn
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import train_test_split
from scipy.sparse import csr_matrix
import numpy as np
def train_and_show_scores(X: csr_matrix, y: np.array, title: str) -> None:
X_train, X_valid, y_train, y_valid = train_test_split(
X, y, train_size=0.75, stratify=y
)
@lazuxd
lazuxd / 4.hyper_parameter_tuning_1.py
Created February 7, 2020 17:57
Building a Sentiment Classifier using Scikit-Learn
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import uniform
X_train = X_train_bigram_tf_idf
# Phase 1: loss, learning rate and initial learning rate
clf = SGDClassifier()
@lazuxd
lazuxd / 5.hyper_parameter_tuning_2.py
Last active February 7, 2020 18:15
Building a Sentiment Classifier using Scikit-Learn
# Phase 2: penalty and alpha
clf = SGDClassifier()
distributions = dict(
penalty=['l1', 'l2', 'elasticnet'],
alpha=uniform(loc=1e-6, scale=1e-4)
)
random_search_cv = RandomizedSearchCV(
@lazuxd
lazuxd / 6.saving_best_classifier.py
Created February 7, 2020 17:59
Building a Sentiment Classifier using Scikit-Learn
system("mkdir 'classifiers'")
sgd_classifier = random_search_cv.best_estimator_
dump(random_search_cv.best_estimator_, 'classifiers/sgd_classifier.joblib')
# sgd_classifier = load('classifiers/sgd_classifier.joblib')
@lazuxd
lazuxd / 7.testing_model.py
Created February 7, 2020 17:59
Building a Sentiment Classifier using Scikit-Learn
X_test = bigram_vectorizer.transform(imdb_test['text'].values)
X_test = bigram_tf_idf_transformer.transform(X_test)
y_test = imdb_test['label'].values
score = sgd_classifier.score(X_test, y_test)
print(score)
@lazuxd
lazuxd / MLClassifier.py
Created February 26, 2020 21:03
Maximum Likelihood Classification
import numpy as np
class MLClassifier:
def fit(self, x: np.ndarray, y: np.ndarray) -> None:
'''
x - numpy array of shape (n, d); n = #observations; d = #variables
y - numpy array of shape (n,)
'''
# no. of variables / dimension
self.d = x.shape[1]
@lazuxd
lazuxd / predict_heart_disease.py
Created February 26, 2020 21:15
Maximum Likelihood Classification
import pandas as pd
from sklearn.model_selection import train_test_split
df = pd.read_csv('heart.csv')
(x_train, x_test, y_train, y_test) = train_test_split(
df.iloc[:, 0:13].values, df.iloc[:, 13].values, train_size=0.8)
mlc = MLClassifier()
mlc.fit(x_train, y_train)
@lazuxd
lazuxd / res_net.py
Created March 5, 2020 21:36
Building a ResNet in Keras
from tensorflow import Tensor
from tensorflow.keras.layers import Input, Conv2D, ReLU, BatchNormalization,\
Add, AveragePooling2D, Flatten, Dense
from tensorflow.keras.models import Model
def relu_bn(inputs: Tensor) -> Tensor:
relu = ReLU()(inputs)
bn = BatchNormalization()(relu)
return bn