Skip to content

Instantly share code, notes, and snippets.

View seanbenhur's full-sized avatar
🚀
Teaching machines to learn!!

Sean Benhur seanbenhur

🚀
Teaching machines to learn!!
View GitHub Profile
from datasets import load_dataset
from transformers import AutoTokenizer
#load the dataset
dataset = load_dataset("imdb")
#create tokenizer
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
def encode_batch(batch):
"""Encodes a batch of input data using the model tokenizer."""
from reg_resampler import resampler
# Initialize the resampler object
rs = resampler()
# You might recieve info about class merger for low sample classes
# Generate classes
Y_classes = rs.fit(train, target=target, bins=num_bins)
# Create the actual target variable
Y = df_train[target]
# printing the number of samples before smote
print('majority class: %d' % np.sum(y == 0))
print('minority class: %d' % np.sum(y == 1))
#majority class: 100
#minority class: 50
#The oversampling is carried out by instantiating any oversampler implemented in the package and calling the sample function.
oversampler= sv.distance_SMOTE()
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
import numpy as np
import matplotlib.pyplot as plt
import smote_variants as sv
import imbalanced_databases as imbd
# loading the dataset
dataset= imbd.load_iris0()
features, target= dataset['data'], dataset['target']
model = make_pipeline_imb(TfidfVectorizer(), RandomUnderSampler(), MultinomialNB())
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print(classification_report(y_test,y_pred))
# precision recall f1-score support
#
# 0 0.73 0.87 0.79 319
model = make_pipeline(TfidfVectorizer(), MultinomialNB())
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print(classification_report(y_test,y_pred))
#precision recall f1-score support
# 0 0.67 0.94 0.79 319
# 1 0.96 0.92 0.94 389
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline
from sklearn.metrics import classification_report
from imblearn.under_sampling import RandomUnderSampler
from imblearn.pipeline import make_pipeline as make_pipeline_imb
from collections import Counter
categories = [
@seanbenhur
seanbenhur / scripts.ipynb
Last active April 22, 2021 04:12
Scripts .ipynb
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@seanbenhur
seanbenhur / fire-better-lstm-torchtext.ipynb
Created April 12, 2021 11:29
FIRE Better-LSTM Torchtext.ipynb
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.