Skip to content

Instantly share code, notes, and snippets.

@amankharwal
Created December 20, 2020 10:47
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save amankharwal/a5833f193f5e0b009b408c76bd7a345a to your computer and use it in GitHub Desktop.
Save amankharwal/a5833f193f5e0b009b408c76bd7a345a to your computer and use it in GitHub Desktop.
from keras.preprocessing.sequence import pad_sequences
from keras.layers import Embedding, LSTM, Dense, Dropout
from keras.preprocessing.text import Tokenizer
from keras.callbacks import EarlyStopping
from keras.models import Sequential
import keras.utils as ku
# set seeds for reproducability
from tensorflow import set_random_seed
from numpy.random import seed
set_random_seed(2)
seed(1)
import pandas as pd
import numpy as np
import string, os
import warnings
warnings.filterwarnings("ignore")
warnings.simplefilter(action='ignore', category=FutureWarning)
curr_dir = 'dataset directory'
all_headlines = []
for filename in os.listdir(curr_dir):
if 'Articles' in filename:
article_df = pd.read_csv(curr_dir + filename)
all_headlines.extend(list(article_df.headline.values))
break
all_headlines = [h for h in all_headlines if h != "Unknown"]
len(all_headlines)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment