Aravind Pai aravindpai

## readfile.py
data=pd.read_csv("../input/amazon-fine-food-reviews/Reviews.csv",nrows=100000)

## attention.py
from attention import AttentionLayer

## libraries.py
import numpy as np
import pandas as pd
import re
from bs4 import BeautifulSoup
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from nltk.corpus import stopwords
from tensorflow.keras.layers import Input, LSTM, Embedding, Dense, Concatenate, TimeDistributed, Bidirectional
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping

## duplicates.py
data.drop_duplicates(subset=['Text'],inplace=True)  #dropping duplicates
data.dropna(axis=0,inplace=True)   #dropping na

## info.py
data.info()       #information about the dataset

## contraction.py
contraction_mapping = {"ain't": "is not", "aren't": "are not","can't": "cannot", "'cause": "because", "could've": "could have", "couldn't": "could not",

                           "didn't": "did not", "doesn't": "does not", "don't": "do not", "hadn't": "had not", "hasn't": "has not", "haven't": "have not",

                           "he'd": "he would","he'll": "he will", "he's": "he is", "how'd": "how did", "how'd'y": "how do you", "how'll": "how will", "how's": "how is",

                           "I'd": "I would", "I'd've": "I would have", "I'll": "I will", "I'll've": "I will have","I'm": "I am", "I've": "I have", "i'd": "i would",

                           "i'd've": "i would have", "i'll": "i will",  "i'll've": "i will have","i'm": "i am", "i've": "i have", "isn't": "is not", "it'd": "it would",

## text.py
data['Text'][:10]

## textcleaning.py
stop_words = set(stopwords.words('english'))
def text_cleaner(text):
    newString = text.lower()
    newString = BeautifulSoup(newString, "lxml").text
    newString = re.sub(r'\([^)]*\)', '', newString)
    newString = re.sub('"','', newString)
    newString = ' '.join([contraction_mapping[t] if t in contraction_mapping else t for t in newString.split(" ")])
    newString = re.sub(r"'s\b","",newString)
    newString = re.sub("[^a-zA-Z]", " ", newString)
    tokens = [w for w in newString.split() if not w in stop_words]

## readsummary.py
data['Summary'][:10]

## summarycleaning.py
def summary_cleaner(text):
    newString = re.sub('"','', text)
    newString = ' '.join([contraction_mapping[t] if t in contraction_mapping else t for t in newString.split(" ")])
    newString = re.sub(r"'s\b","",newString)
    newString = re.sub("[^a-zA-Z]", " ", newString)
    newString = newString.lower()
    tokens=newString.split()
    newString=''
    for i in tokens:
        if len(i)>1:
	import numpy as np
	import pandas as pd
	import re
	from bs4 import BeautifulSoup
	from keras.preprocessing.text import Tokenizer
	from keras.preprocessing.sequence import pad_sequences
	from nltk.corpus import stopwords
	from tensorflow.keras.layers import Input, LSTM, Embedding, Dense, Concatenate, TimeDistributed, Bidirectional
	from tensorflow.keras.models import Model
	from tensorflow.keras.callbacks import EarlyStopping
	data.drop_duplicates(subset=['Text'],inplace=True) #dropping duplicates
	data.dropna(axis=0,inplace=True) #dropping na
	contraction_mapping = {"ain't": "is not", "aren't": "are not","can't": "cannot", "'cause": "because", "could've": "could have", "couldn't": "could not",

	"didn't": "did not", "doesn't": "does not", "don't": "do not", "hadn't": "had not", "hasn't": "has not", "haven't": "have not",

	"he'd": "he would","he'll": "he will", "he's": "he is", "how'd": "how did", "how'd'y": "how do you", "how'll": "how will", "how's": "how is",

	"I'd": "I would", "I'd've": "I would have", "I'll": "I will", "I'll've": "I will have","I'm": "I am", "I've": "I have", "i'd": "i would",

	"i'd've": "i would have", "i'll": "i will", "i'll've": "i will have","i'm": "i am", "i've": "i have", "isn't": "is not", "it'd": "it would",
	stop_words = set(stopwords.words('english'))
	def text_cleaner(text):
	newString = text.lower()
	newString = BeautifulSoup(newString, "lxml").text
	newString = re.sub(r'\([^)]*\)', '', newString)
	newString = re.sub('"','', newString)
	newString = ' '.join([contraction_mapping[t] if t in contraction_mapping else t for t in newString.split(" ")])
	newString = re.sub(r"'s\b","",newString)
	newString = re.sub("[^a-zA-Z]", " ", newString)
	tokens = [w for w in newString.split() if not w in stop_words]
	def summary_cleaner(text):
	newString = re.sub('"','', text)
	newString = ' '.join([contraction_mapping[t] if t in contraction_mapping else t for t in newString.split(" ")])
	newString = re.sub(r"'s\b","",newString)
	newString = re.sub("[^a-zA-Z]", " ", newString)
	newString = newString.lower()
	tokens=newString.split()
	newString=''
	for i in tokens:
	if len(i)>1: