Skip to content

Instantly share code, notes, and snippets.

@seanbenhur
Created May 15, 2021 12:10
Show Gist options
  • Save seanbenhur/132961975de8137e35d35b12308eeb88 to your computer and use it in GitHub Desktop.
Save seanbenhur/132961975de8137e35d35b12308eeb88 to your computer and use it in GitHub Desktop.
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline
from sklearn.metrics import classification_report
from imblearn.under_sampling import RandomUnderSampler
from imblearn.pipeline import make_pipeline as make_pipeline_imb
from collections import Counter
categories = [
"alt.atheism",
"talk.religion.misc",
"comp.graphics",
"sci.space",
]
newsgroups_train = fetch_20newsgroups(subset="train", categories=categories)
newsgroups_test = fetch_20newsgroups(subset="test", categories=categories)
X_train = newsgroups_train.data
X_test = newsgroups_test.data
y_train = newsgroups_train.target
y_test = newsgroups_test.target
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment