This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import re | |
from os import system, listdir | |
from os.path import isfile, join | |
from random import shuffle | |
system('wget "http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz"') | |
system('tar -xzf "aclImdb_v1.tar.gz"') | |
def create_data_frame(folder: str) -> pd.DataFrame: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer | |
from joblib import dump, load # used for saving and loading sklearn objects | |
from scipy.sparse import save_npz, load_npz # used for saving and loading sparse matrices | |
system("mkdir 'data_preprocessors'") | |
system("mkdir 'vectorized_data'") | |
# Unigram Counts |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.linear_model import SGDClassifier | |
from sklearn.model_selection import train_test_split | |
from scipy.sparse import csr_matrix | |
import numpy as np | |
def train_and_show_scores(X: csr_matrix, y: np.array, title: str) -> None: | |
X_train, X_valid, y_train, y_valid = train_test_split( | |
X, y, train_size=0.75, stratify=y | |
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.model_selection import RandomizedSearchCV | |
from scipy.stats import uniform | |
X_train = X_train_bigram_tf_idf | |
# Phase 1: loss, learning rate and initial learning rate | |
clf = SGDClassifier() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Phase 2: penalty and alpha | |
clf = SGDClassifier() | |
distributions = dict( | |
penalty=['l1', 'l2', 'elasticnet'], | |
alpha=uniform(loc=1e-6, scale=1e-4) | |
) | |
random_search_cv = RandomizedSearchCV( |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
system("mkdir 'classifiers'") | |
sgd_classifier = random_search_cv.best_estimator_ | |
dump(random_search_cv.best_estimator_, 'classifiers/sgd_classifier.joblib') | |
# sgd_classifier = load('classifiers/sgd_classifier.joblib') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
X_test = bigram_vectorizer.transform(imdb_test['text'].values) | |
X_test = bigram_tf_idf_transformer.transform(X_test) | |
y_test = imdb_test['label'].values | |
score = sgd_classifier.score(X_test, y_test) | |
print(score) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
class MLClassifier: | |
def fit(self, x: np.ndarray, y: np.ndarray) -> None: | |
''' | |
x - numpy array of shape (n, d); n = #observations; d = #variables | |
y - numpy array of shape (n,) | |
''' | |
# no. of variables / dimension | |
self.d = x.shape[1] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from sklearn.model_selection import train_test_split | |
df = pd.read_csv('heart.csv') | |
(x_train, x_test, y_train, y_test) = train_test_split( | |
df.iloc[:, 0:13].values, df.iloc[:, 13].values, train_size=0.8) | |
mlc = MLClassifier() | |
mlc.fit(x_train, y_train) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from tensorflow import Tensor | |
from tensorflow.keras.layers import Input, Conv2D, ReLU, BatchNormalization,\ | |
Add, AveragePooling2D, Flatten, Dense | |
from tensorflow.keras.models import Model | |
def relu_bn(inputs: Tensor) -> Tensor: | |
relu = ReLU()(inputs) | |
bn = BatchNormalization()(relu) | |
return bn |
OlderNewer