Skip to content

Instantly share code, notes, and snippets.

View shubham-singh-ss's full-sized avatar

Shubham Singh shubham-singh-ss

View GitHub Profile
from sklearn.feature_extraction.text import CountVectorizer  
cv = CountVectorizer(max_features = 1500)
X = cv.fit_transform(corpus, corpus1).toarray()
y = df_new.iloc[:, 3].values
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 0)
from sklearn.ensemble import RandomForestClassifier
classifier = RandomForestClassifier(n_estimators = 1000, criterion = 'entropy')
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)
classifier.score(X_test, y_test)
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
cm
import re
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
driver = webdriver.Chrome()
driver.get("YOUR_LINK_HERE")
df = pd.DataFrame(columns = ['link', 'title', 'description', 'category'])
print(classification_report(y_test, y_pred))
!git clone https://github.com/openai/gpt-2.git