Pranjal Saxena pranjalAI

## text_processing.py
import re
def processTweet(chat):
    chat = chat.lower()
    chat = re.sub('((www\.[^\s]+)|(https?://[^\s]+))','',chat)
    chat = re.sub('@[^\s]+','',chat)
    chat = re.sub('[\s]+', ' ', chat)
    chat = re.sub(r'#([^\s]+)', r'\1', chat)
    chat = re.sub(r'[\.!:\?\-\'\"\\/]', r'', chat)
    chat = chat.strip('\'"')
    return chat

## using_bot.py
for _ in range(10):
    try:
        states_values = enc_model.predict( str_to_tokens( input( 'Enter question : ' ) ) )
        empty_target_seq = np.zeros( ( 1 , 1 ) )
        empty_target_seq[0, 0] = tokenizer.word_index['start']
        stop_condition = False
        decoded_translation = ''
        while not stop_condition :
            dec_outputs , h , c = dec_model.predict([ empty_target_seq ] + states_values )
            sampled_word_index = np.argmax( dec_outputs[0, -1, :] )

## inference_code.py
def make_inference_models():

    encoder_model = tf.keras.models.Model(encoder_inputs, encoder_states)

    decoder_state_input_h = tf.keras.layers.Input(shape=( 200 ,))
    decoder_state_input_c = tf.keras.layers.Input(shape=( 200 ,))

    decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]

    decoder_outputs, state_h, state_c = decoder_lstm(

## train.py
Prepared_data=prepare_data(questions_for_token,answers_for_token)
encoder_input_data=Prepared_data[0]
decoder_input_data=Prepared_data[1]
decoder_output_data=Prepared_data[2]
maxlen_answers=Prepared_data[3]
nb_words=Prepared_data[4]
word_index=Prepared_data[5]
tokenizer=Prepared_data[6]
embedding_matrix=emb_mat(nb_words)
encoder_inputs = tf.keras.layers.Input(shape=( None , ))

## prepare_data.py
def prepare_data(questions,answers):
    answers=pd.DataFrame(answers, columns=["Ans"])
    questions=pd.DataFrame(questions, columns=["Question"])
    questions["TokQues"]=questions["Question"].apply(getFeatureVector)

    answers=np.array(answers["Ans"])
    questions=np.array(questions["TokQues"])

    answers_with_tags = list()
    for i in range( len( answers ) ):

## tokenized_data.py
def tokenized_data(questions,answers,VOCAB_SIZE,tokenizer):
    # encoder_input_data
    import numpy as np
    tokenized_questions = tokenizer.texts_to_sequences( questions )
    maxlen_questions = max( [ len(x) for x in tokenized_questions ] )
    padded_questions = preprocessing.sequence.pad_sequences( tokenized_questions , maxlen=maxlen , padding='post' )
    encoder_input_data = np.array( padded_questions )
    #print( encoder_input_data.shape , maxlen_questions )

    # decoder_input_data

## reading_word_embedding_model.py
def emb_mat(nb_words):
    EMBEDDING_FILE="glove.6B.100d.txt"
    def get_coefs(word,*arr):
        return word, np.asarray(arr, dtype='float32')
    embeddings_index = dict(get_coefs(*o.strip().split()) for o in open(EMBEDDING_FILE, encoding="utf8"))

    all_embs = np.stack(embeddings_index.values())
    emb_mean,emb_std = all_embs.mean(), all_embs.std()
    emb_mean,emb_std

## chunks.py
questions_for_token = list()
answers_for_token = list()
c=1
for con in docs:
    if(c==2868):
        pass
    else:
        con=con.strip().split("\t")
        questions_for_token.append(con[0])
        answers_for_token.append(con[1])

## chatbot_data.py
import numpy as np
import tensorflow as tf
import pickle
from tensorflow.keras import layers , activations , models , preprocessing
from tensorflow.keras import preprocessing , utils
import os
import yaml
import json
import pandas as pd
from tensorflow.keras.callbacks import ModelCheckpoint

## yolo to opencv.py
# Importing Libraries
import os
import numpy as np
import cv2
import argparse
import time
from tqdm import tqdm
#convert from Yolo_mark to opencv format
def yoloFormattocv(x1, y1, x2, y2, H, W):
    bbox_width = x2 * W
	import re
	def processTweet(chat):
	chat = chat.lower()
	chat = re.sub('((www\.[^\s]+)\|(https?://[^\s]+))','',chat)
	chat = re.sub('@[^\s]+','',chat)
	chat = re.sub('[\s]+', ' ', chat)
	chat = re.sub(r'#([^\s]+)', r'\1', chat)
	chat = re.sub(r'[\.!:\?\-\'\"\\/]', r'', chat)
	chat = chat.strip('\'"')
	return chat
	for _ in range(10):
	try:
	states_values = enc_model.predict( str_to_tokens( input( 'Enter question : ' ) ) )
	empty_target_seq = np.zeros( ( 1 , 1 ) )
	empty_target_seq[0, 0] = tokenizer.word_index['start']
	stop_condition = False
	decoded_translation = ''
	while not stop_condition :
	dec_outputs , h , c = dec_model.predict([ empty_target_seq ] + states_values )
	sampled_word_index = np.argmax( dec_outputs[0, -1, :] )
	def make_inference_models():

	encoder_model = tf.keras.models.Model(encoder_inputs, encoder_states)

	decoder_state_input_h = tf.keras.layers.Input(shape=( 200 ,))
	decoder_state_input_c = tf.keras.layers.Input(shape=( 200 ,))

	decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]

	decoder_outputs, state_h, state_c = decoder_lstm(
	Prepared_data=prepare_data(questions_for_token,answers_for_token)
	encoder_input_data=Prepared_data[0]
	decoder_input_data=Prepared_data[1]
	decoder_output_data=Prepared_data[2]
	maxlen_answers=Prepared_data[3]
	nb_words=Prepared_data[4]
	word_index=Prepared_data[5]
	tokenizer=Prepared_data[6]
	embedding_matrix=emb_mat(nb_words)
	encoder_inputs = tf.keras.layers.Input(shape=( None , ))
	def prepare_data(questions,answers):
	answers=pd.DataFrame(answers, columns=["Ans"])
	questions=pd.DataFrame(questions, columns=["Question"])
	questions["TokQues"]=questions["Question"].apply(getFeatureVector)

	answers=np.array(answers["Ans"])
	questions=np.array(questions["TokQues"])

	answers_with_tags = list()
	for i in range( len( answers ) ):
	def tokenized_data(questions,answers,VOCAB_SIZE,tokenizer):
	# encoder_input_data
	import numpy as np
	tokenized_questions = tokenizer.texts_to_sequences( questions )
	maxlen_questions = max( [ len(x) for x in tokenized_questions ] )
	padded_questions = preprocessing.sequence.pad_sequences( tokenized_questions , maxlen=maxlen , padding='post' )
	encoder_input_data = np.array( padded_questions )
	#print( encoder_input_data.shape , maxlen_questions )

	# decoder_input_data
	def emb_mat(nb_words):
	EMBEDDING_FILE="glove.6B.100d.txt"
	def get_coefs(word,*arr):
	return word, np.asarray(arr, dtype='float32')
	embeddings_index = dict(get_coefs(*o.strip().split()) for o in open(EMBEDDING_FILE, encoding="utf8"))

	all_embs = np.stack(embeddings_index.values())
	emb_mean,emb_std = all_embs.mean(), all_embs.std()
	emb_mean,emb_std
	questions_for_token = list()
	answers_for_token = list()
	c=1
	for con in docs:
	if(c==2868):
	pass
	else:
	con=con.strip().split("\t")
	questions_for_token.append(con[0])
	answers_for_token.append(con[1])
	import numpy as np
	import tensorflow as tf
	import pickle
	from tensorflow.keras import layers , activations , models , preprocessing
	from tensorflow.keras import preprocessing , utils
	import os
	import yaml
	import json
	import pandas as pd
	from tensorflow.keras.callbacks import ModelCheckpoint
	# Importing Libraries
	import os
	import numpy as np
	import cv2
	import argparse
	import time
	from tqdm import tqdm
	#convert from Yolo_mark to opencv format
	def yoloFormattocv(x1, y1, x2, y2, H, W):
	bbox_width = x2 * W