Skip to content

Instantly share code, notes, and snippets.

View pranjalAI's full-sized avatar

Pranjal Saxena pranjalAI

View GitHub Profile
import re
def processTweet(chat):
chat = chat.lower()
chat = re.sub('((www\.[^\s]+)|(https?://[^\s]+))','',chat)
chat = re.sub('@[^\s]+','',chat)
chat = re.sub('[\s]+', ' ', chat)
chat = re.sub(r'#([^\s]+)', r'\1', chat)
chat = re.sub(r'[\.!:\?\-\'\"\\/]', r'', chat)
chat = chat.strip('\'"')
return chat
for _ in range(10):
try:
states_values = enc_model.predict( str_to_tokens( input( 'Enter question : ' ) ) )
empty_target_seq = np.zeros( ( 1 , 1 ) )
empty_target_seq[0, 0] = tokenizer.word_index['start']
stop_condition = False
decoded_translation = ''
while not stop_condition :
dec_outputs , h , c = dec_model.predict([ empty_target_seq ] + states_values )
sampled_word_index = np.argmax( dec_outputs[0, -1, :] )
def make_inference_models():
encoder_model = tf.keras.models.Model(encoder_inputs, encoder_states)
decoder_state_input_h = tf.keras.layers.Input(shape=( 200 ,))
decoder_state_input_c = tf.keras.layers.Input(shape=( 200 ,))
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
decoder_outputs, state_h, state_c = decoder_lstm(
Prepared_data=prepare_data(questions_for_token,answers_for_token)
encoder_input_data=Prepared_data[0]
decoder_input_data=Prepared_data[1]
decoder_output_data=Prepared_data[2]
maxlen_answers=Prepared_data[3]
nb_words=Prepared_data[4]
word_index=Prepared_data[5]
tokenizer=Prepared_data[6]
embedding_matrix=emb_mat(nb_words)
encoder_inputs = tf.keras.layers.Input(shape=( None , ))
def prepare_data(questions,answers):
answers=pd.DataFrame(answers, columns=["Ans"])
questions=pd.DataFrame(questions, columns=["Question"])
questions["TokQues"]=questions["Question"].apply(getFeatureVector)
answers=np.array(answers["Ans"])
questions=np.array(questions["TokQues"])
answers_with_tags = list()
for i in range( len( answers ) ):
def tokenized_data(questions,answers,VOCAB_SIZE,tokenizer):
# encoder_input_data
import numpy as np
tokenized_questions = tokenizer.texts_to_sequences( questions )
maxlen_questions = max( [ len(x) for x in tokenized_questions ] )
padded_questions = preprocessing.sequence.pad_sequences( tokenized_questions , maxlen=maxlen , padding='post' )
encoder_input_data = np.array( padded_questions )
#print( encoder_input_data.shape , maxlen_questions )
# decoder_input_data
def emb_mat(nb_words):
EMBEDDING_FILE="glove.6B.100d.txt"
def get_coefs(word,*arr):
return word, np.asarray(arr, dtype='float32')
embeddings_index = dict(get_coefs(*o.strip().split()) for o in open(EMBEDDING_FILE, encoding="utf8"))
all_embs = np.stack(embeddings_index.values())
emb_mean,emb_std = all_embs.mean(), all_embs.std()
emb_mean,emb_std
@pranjalAI
pranjalAI / chunks.py
Last active February 10, 2024 08:25
questions_for_token = list()
answers_for_token = list()
c=1
for con in docs:
if(c==2868):
pass
else:
con=con.strip().split("\t")
questions_for_token.append(con[0])
answers_for_token.append(con[1])
import numpy as np
import tensorflow as tf
import pickle
from tensorflow.keras import layers , activations , models , preprocessing
from tensorflow.keras import preprocessing , utils
import os
import yaml
import json
import pandas as pd
from tensorflow.keras.callbacks import ModelCheckpoint
# Importing Libraries
import os
import numpy as np
import cv2
import argparse
import time
from tqdm import tqdm
#convert from Yolo_mark to opencv format
def yoloFormattocv(x1, y1, x2, y2, H, W):
bbox_width = x2 * W