Skip to content

Instantly share code, notes, and snippets.

@vpeopleonatank
Last active May 11, 2021 13:33
Show Gist options
  • Save vpeopleonatank/69a16ec979d8349e4a53f940d742965d to your computer and use it in GitHub Desktop.
Save vpeopleonatank/69a16ec979d8349e4a53f940d742965d to your computer and use it in GitHub Desktop.
from tensorflow import keras
import tensorflow as tf
from tensorflow import keras
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from pydantic import BaseModel, Field
import re
def tokenize(x):
x_tk = Tokenizer()
x_tk.fit_on_texts(x)
return x_tk.texts_to_sequences(x), x_tk
def pad(x, length = None):
if length is None:
length = max([len(sentence) for sentence in x])
return pad_sequences(x, maxlen = length, padding = 'post')
def preprocess(x, y):
preprocess_x, x_tk = tokenize(x)
preprocess_y, y_tk = tokenize(y)
preprocess_x = pad(preprocess_x)
preprocess_y = pad(preprocess_y)
# Keras's sparse_categorical_crossentropy function requires the labels to be in 3 dimensions
preprocess_y = preprocess_y.reshape(*preprocess_y.shape, 1)
return preprocess_x, preprocess_y, x_tk, y_tk
preproc_english_sentences, preproc_vietnamese_sentences, english_tokenizer, vietnamese_tokenizer =\
preprocess(english_sentences, vietnamese_sentences)
y_id_to_word = {value: key for key, value in vietnamese_tokenizer.word_index.items()}
y_id_to_word[0] = '<PAD>'
translation_model = keras.models.load_model('en_vi_1.h5')
class EngToViTranslationInput(BaseModel):
text: str = Field(
...,
title="Text Input",
description="The input text to use as to translate text.",
max_length=1000,
)
class EngToViTranslationOutput(BaseModel):
generated_text: str = Field(...)
patt = re.compile('(\s*)<PAD>(\s*)')
def translate_eng_to_vi(input: EngToViTranslationInput) -> EngToViTranslationOutput:
"""Generate text based on a given prompt."""
sentence = [english_tokenizer.word_index[word] for word in input.split()]
sentence = pad_sequences([sentence], maxlen=x.shape[-1], padding='post')
sentences = np.array([sentence[0], x[0]])
predictions = translation_model.predict(sentences, len(sentences))
res = ' '.join([y_id_to_word[np.argmax(x)] for x in predictions[0]])
res = patt.sub('', res)
return EngToViTranslationOutput(generated_text=res)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment