This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
def processTweet(chat): | |
chat = chat.lower() | |
chat = re.sub('((www\.[^\s]+)|(https?://[^\s]+))','',chat) | |
chat = re.sub('@[^\s]+','',chat) | |
chat = re.sub('[\s]+', ' ', chat) | |
chat = re.sub(r'#([^\s]+)', r'\1', chat) | |
chat = re.sub(r'[\.!:\?\-\'\"\\/]', r'', chat) | |
chat = chat.strip('\'"') | |
return chat |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
for _ in range(10): | |
try: | |
states_values = enc_model.predict( str_to_tokens( input( 'Enter question : ' ) ) ) | |
empty_target_seq = np.zeros( ( 1 , 1 ) ) | |
empty_target_seq[0, 0] = tokenizer.word_index['start'] | |
stop_condition = False | |
decoded_translation = '' | |
while not stop_condition : | |
dec_outputs , h , c = dec_model.predict([ empty_target_seq ] + states_values ) | |
sampled_word_index = np.argmax( dec_outputs[0, -1, :] ) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def make_inference_models(): | |
encoder_model = tf.keras.models.Model(encoder_inputs, encoder_states) | |
decoder_state_input_h = tf.keras.layers.Input(shape=( 200 ,)) | |
decoder_state_input_c = tf.keras.layers.Input(shape=( 200 ,)) | |
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c] | |
decoder_outputs, state_h, state_c = decoder_lstm( |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Prepared_data=prepare_data(questions_for_token,answers_for_token) | |
encoder_input_data=Prepared_data[0] | |
decoder_input_data=Prepared_data[1] | |
decoder_output_data=Prepared_data[2] | |
maxlen_answers=Prepared_data[3] | |
nb_words=Prepared_data[4] | |
word_index=Prepared_data[5] | |
tokenizer=Prepared_data[6] | |
embedding_matrix=emb_mat(nb_words) | |
encoder_inputs = tf.keras.layers.Input(shape=( None , )) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def prepare_data(questions,answers): | |
answers=pd.DataFrame(answers, columns=["Ans"]) | |
questions=pd.DataFrame(questions, columns=["Question"]) | |
questions["TokQues"]=questions["Question"].apply(getFeatureVector) | |
answers=np.array(answers["Ans"]) | |
questions=np.array(questions["TokQues"]) | |
answers_with_tags = list() | |
for i in range( len( answers ) ): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def tokenized_data(questions,answers,VOCAB_SIZE,tokenizer): | |
# encoder_input_data | |
import numpy as np | |
tokenized_questions = tokenizer.texts_to_sequences( questions ) | |
maxlen_questions = max( [ len(x) for x in tokenized_questions ] ) | |
padded_questions = preprocessing.sequence.pad_sequences( tokenized_questions , maxlen=maxlen , padding='post' ) | |
encoder_input_data = np.array( padded_questions ) | |
#print( encoder_input_data.shape , maxlen_questions ) | |
# decoder_input_data |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def emb_mat(nb_words): | |
EMBEDDING_FILE="glove.6B.100d.txt" | |
def get_coefs(word,*arr): | |
return word, np.asarray(arr, dtype='float32') | |
embeddings_index = dict(get_coefs(*o.strip().split()) for o in open(EMBEDDING_FILE, encoding="utf8")) | |
all_embs = np.stack(embeddings_index.values()) | |
emb_mean,emb_std = all_embs.mean(), all_embs.std() | |
emb_mean,emb_std |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
questions_for_token = list() | |
answers_for_token = list() | |
c=1 | |
for con in docs: | |
if(c==2868): | |
pass | |
else: | |
con=con.strip().split("\t") | |
questions_for_token.append(con[0]) | |
answers_for_token.append(con[1]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import tensorflow as tf | |
import pickle | |
from tensorflow.keras import layers , activations , models , preprocessing | |
from tensorflow.keras import preprocessing , utils | |
import os | |
import yaml | |
import json | |
import pandas as pd | |
from tensorflow.keras.callbacks import ModelCheckpoint |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Importing Libraries | |
import os | |
import numpy as np | |
import cv2 | |
import argparse | |
import time | |
from tqdm import tqdm | |
#convert from Yolo_mark to opencv format | |
def yoloFormattocv(x1, y1, x2, y2, H, W): | |
bbox_width = x2 * W |
NewerOlder