Raman Shinde Raman-Raje

## bilinear_similarity.py
class BilinearSimilarity(tf.keras.Model):
    """
    This function calcualtes biliear term used for answer span prediction.
    Referance took from --> https://github.com/kellywzhang/reading-comprehension/blob/master/attention.py

    """

    def __init__(self,hidden_size):

        super(BilinearSimilarity,self).__init__()

## attention_weighted_sum.py
class Attention(tf.keras.Model):

    """
    This class implements a weighted sum attention.

    """
    def __init__(self, units = 1):

        super(Attention, self).__init__()
        self.W = tf.keras.layers.Dense(units)

## data_preprocess.py
# getting train data and dev data into csv file

# this snippet of code is for data extranction from json file.
contexts = []
questions = []
answers_text = []
answers_start = []
title = []
for i in range(train.shape[0]):
    topic = train.iloc[i,0]['paragraphs']

## co_occurance matrix
def countOccurences(word,context_window):

    """
    This function returns the count of context word.
    """
    return context_window.count(word)


# https://stackoverflow.com/a/41663359/9371069

## lstm_model.py
# Initiliazing the sequential model
model = Sequential()

#batch_size not given i.e None
#input_shape = (timesteps, input_dim)

model.add(LSTM(32,return_sequences=True,input_shape=(timesteps, input_dim)))
model.add(LSTM(16,input_shape=(timesteps, 32)))
model.add(Dropout(0.5))
model.add(Dense(n_classes, activation='sigmoid'))

## repeat_vector.py
model = Sequential()
model.add(Dense(32, input_dim=32))
# now: model.output_shape == (None, 32)
# note: `None` is the batch dimension

model.add(RepeatVector(3))
# now: model.output_shape == (None, 3, 32)

## LSTM_input_shape.py
#for sequential modeling
model = Sequential()

# when batch size is not known
model.add(LSTM(5,input_shape = (128,1)))

# if batch size is known
model.add(LSTM(5,batch_input_shape=(16,128,1)))

## inferance.py
# startseq kick starts caption generation proces
in_text = 'startseq'
for i in range(MAX_LENGTH):
        sequence = [word_to_ix[w] for w in in_text.split() if w in word_to_ix]
        sequence = pad_sequences([sequence], maxlen=MAX_LENGTH)
        yhat = model.predict([photo,sequence], verbose=0)
        # get word corresponding to maximum probability
        yhat = np.argmax(yhat)
        word = ix_to_word[yhat]
        in_text += ' ' + word

## generator.py
# data generator, intended to be used in a call to model.fit_generator()

def data_generator(descriptions, photos, MAX_LENGTH,VOCAB_SIZE, num_photos_per_batch):
    X1, X2, y = list(), list(), list()
    n=0
    # loop for ever over images
    while 1:
        for key, desc_list in train_descriptions.items():
            n+=1
            # retrieve the photo feature

## dataset_generation.py
# iterate for all images in train
for key, desc_list in train_descriptions.items():
    n+=1
    # retrieve the photo feature
    photo = photos[key]
    for desc in desc_list:
        seq = token.texts_to_sequences([desc])
        seq = seq[0]
        for i in range(1,len(seq)):
            in_seq , op_seq = seq[:i],seq[i]
	class BilinearSimilarity(tf.keras.Model):
	"""
	This function calcualtes biliear term used for answer span prediction.
	Referance took from --> https://github.com/kellywzhang/reading-comprehension/blob/master/attention.py

	"""

	def __init__(self,hidden_size):

	super(BilinearSimilarity,self).__init__()
	class Attention(tf.keras.Model):

	"""
	This class implements a weighted sum attention.

	"""
	def __init__(self, units = 1):

	super(Attention, self).__init__()
	self.W = tf.keras.layers.Dense(units)
	# getting train data and dev data into csv file

	# this snippet of code is for data extranction from json file.
	contexts = []
	questions = []
	answers_text = []
	answers_start = []
	title = []
	for i in range(train.shape[0]):
	topic = train.iloc[i,0]['paragraphs']
	def countOccurences(word,context_window):

	"""
	This function returns the count of context word.
	"""
	return context_window.count(word)



	# https://stackoverflow.com/a/41663359/9371069
	# Initiliazing the sequential model
	model = Sequential()

	#batch_size not given i.e None
	#input_shape = (timesteps, input_dim)

	model.add(LSTM(32,return_sequences=True,input_shape=(timesteps, input_dim)))
	model.add(LSTM(16,input_shape=(timesteps, 32)))
	model.add(Dropout(0.5))
	model.add(Dense(n_classes, activation='sigmoid'))
	model = Sequential()
	model.add(Dense(32, input_dim=32))
	# now: model.output_shape == (None, 32)
	# note: `None` is the batch dimension

	model.add(RepeatVector(3))
	# now: model.output_shape == (None, 3, 32)
	#for sequential modeling
	model = Sequential()

	# when batch size is not known
	model.add(LSTM(5,input_shape = (128,1)))

	# if batch size is known
	model.add(LSTM(5,batch_input_shape=(16,128,1)))
	# startseq kick starts caption generation proces
	in_text = 'startseq'
	for i in range(MAX_LENGTH):
	sequence = [word_to_ix[w] for w in in_text.split() if w in word_to_ix]
	sequence = pad_sequences([sequence], maxlen=MAX_LENGTH)
	yhat = model.predict([photo,sequence], verbose=0)
	# get word corresponding to maximum probability
	yhat = np.argmax(yhat)
	word = ix_to_word[yhat]
	in_text += ' ' + word
	# data generator, intended to be used in a call to model.fit_generator()

	def data_generator(descriptions, photos, MAX_LENGTH,VOCAB_SIZE, num_photos_per_batch):
	X1, X2, y = list(), list(), list()
	n=0
	# loop for ever over images
	while 1:
	for key, desc_list in train_descriptions.items():
	n+=1
	# retrieve the photo feature
	# iterate for all images in train
	for key, desc_list in train_descriptions.items():
	n+=1
	# retrieve the photo feature
	photo = photos[key]
	for desc in desc_list:
	seq = token.texts_to_sequences([desc])
	seq = seq[0]
	for i in range(1,len(seq)):
	in_seq , op_seq = seq[:i],seq[i]