Skip to content

Instantly share code, notes, and snippets.

real = pd.read_csv("/content/drive/My Drive/News/True.csv")
fake = pd.read_csv("/content/drive/My Drive/News/Fake.csv")
#Shape of real news dataset
print("Real news: ",real.shape)
#Shape of fake news dataset
print("Fake News: ", fake.shape)
# Assigning a value of 0 for all real news data and placing it in the dataframe
#Compile model on adam optimizer, binary_crossentropy loss, and accuracy metrics
model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])
#Train model on 5 epochs
model.fit(title_train,y_train,epochs=5)
#Evaluate model on test data
model.evaluate(title_test,y_test)
import tensorflow as tf
# Input Layers
input_layer = tf.keras.layers.Input(shape=(), dtype=tf.string, name='news')
# BERT layers
processed = bert_preprocess(input_layer)
output = bert_encoder(processed)
# Fully Connected Layers
!pip install tensorflow-text
import tensorflow_hub as hub
import tensorflow_text as text
#Use the bert preprocesser and bert encoder from tensorflow_hub
bert_preprocess = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3")
bert_encoder = hub.KerasLayer('https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/4')
#Sequential model has a 50 cell LSTM layer before Dense layers
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.LSTM(50))
model.add(tf.keras.layers.Dense(20,activation='relu'))
model.add(tf.keras.layers.Dense(5,activation='relu'))
model.add(tf.keras.layers.Dense(1,activation='sigmoid'))
#Convert each of the testing data series to a Word2Vec embedding
test = []
for i in title_test:
temp = np.array(embed(i))
test.append(temp)
#Accounts for the different length of words in test data
test = tf.keras.preprocessing.sequence.pad_sequences(test,dtype='float')
from sklearn.model_selection import train_test_split
#Split data into training and testing dataset
title_train, title_test, y_train, y_test = train_test_split(titles, labels, test_size=0.2,\
random_state=1000)
import tensorflow_hub as hub
import tensorflow as tf
embed = hub.load("https://tfhub.dev/google/Wiki-words-250/2")
#Convert each series of words to a word2vec embedding
indiv = []
for i in title_train:
temp = np.array(embed(i))
@shayaf84
shayaf84 / nltk.py
Last active February 19, 2022 19:48
#Import nltk preprocessing library to convert text into a readable format
import nltk
from nltk.tokenize import sent_tokenize
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('stopwords')
import string
#Lowercase letters
data['title'] = data['title'].str.lower()
data.head()
#Ensure that all necessary punctuations are in one list
#Include ' and " as they are not default
punc = list(string.punctuation)