Shubham Singh shubham-singh-ss

## bag_of_words.py
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_features = 1500)
X = cv.fit_transform(corpus, corpus1).toarray()
y = df_new.iloc[:, 3].values

## split.py
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 0)

## model.py
from sklearn.ensemble import RandomForestClassifier
classifier = RandomForestClassifier(n_estimators = 1000, criterion = 'entropy')
classifier.fit(X_train, y_train)

## result.py
y_pred = classifier.predict(X_test)
classifier.score(X_test, y_test)

## confusion_matrix.py
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
cm

## cleaning.py
import re
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer

## driver.py
driver = webdriver.Chrome()
driver.get("YOUR_LINK_HERE")

## df.py
df = pd.DataFrame(columns = ['link', 'title', 'description', 'category'])

## analyse.py
 print(classification_report(y_test, y_pred))

## clone.py
!git clone https://github.com/openai/gpt-2.git
	from sklearn.feature_extraction.text import CountVectorizer
	cv = CountVectorizer(max_features = 1500)
	X = cv.fit_transform(corpus, corpus1).toarray()
	y = df_new.iloc[:, 3].values
	from sklearn.model_selection import train_test_split
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 0)
	from sklearn.ensemble import RandomForestClassifier
	classifier = RandomForestClassifier(n_estimators = 1000, criterion = 'entropy')
	classifier.fit(X_train, y_train)
	y_pred = classifier.predict(X_test)
	classifier.score(X_test, y_test)
	from sklearn.metrics import confusion_matrix
	cm = confusion_matrix(y_test, y_pred)
	cm
	import re
	import nltk
	nltk.download('stopwords')
	from nltk.corpus import stopwords
	from nltk.stem.porter import PorterStemmer