Skip to content

Instantly share code, notes, and snippets.

@AlexMikhalev
Created April 19, 2021 19:55
Show Gist options
  • Save AlexMikhalev/b723e15017389de7aa1850de3f690eba to your computer and use it in GitHub Desktop.
Save AlexMikhalev/b723e15017389de7aa1850de3f690eba to your computer and use it in GitHub Desktop.
BERT QA on RedisAI inside RedisGears
### This gears will pre-compute (encode) all sentences using BERT tokenizer for QA
tokenizer = None
def loadTokeniser():
global tokenizer
from transformers import BertTokenizerFast
tokenizer = BertTokenizerFast.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")
return tokenizer
def qa(record):
log("Called with "+ str(record))
log("Trigger "+str(record[0]))
log("Key "+ str(record[1]))
log("Question "+ str(record[2]))
global tokenizer
import redisAI
import numpy as np
sentence_key=record[1]
question=record[2]
hash_tag="{%s}" % hashtag()
log("Shard_id "+hash_tag)
if not tokenizer:
tokenizer=loadTokeniser()
token_key = f"tokenized:bert:qa:{sentence_key}"
input_ids_question = tokenizer.encode(question, add_special_tokens=True, truncation=True, return_tensors="np")
input_ids_context=redisAI.getTensorFromKey(token_key)
input_ids = np.append(input_ids_question,input_ids_context)
log(str(input_ids.shape))
attention_mask = np.array([[1]*len(input_ids)])
input_idss=np.array([input_ids])
log(str(input_idss.shape))
log("Attention mask shape "+str(attention_mask.shape))
num_seg_a=input_ids_question.shape[1]
log(str(num_seg_a))
# num_seg_b=input_ids_context.shape[0]
num_seg_b=redisAI.tensorGetDims(token_key)[0]
log("Tensor get dims "+str(num_seg_b))
token_type_ids = np.array([0]*num_seg_a + [1]*num_seg_b)
log("Segments id "+token_type_ids.shape)
modelRunner = redisAI.createModelRunner(f'bert-qa{hash_tag}')
redisAI.modelRunnerAddInput(modelRunner, 'input_ids', input_idss)
redisAI.modelRunnerAddInput(modelRunner, 'attention_mask', attention_mask)
redisAI.modelRunnerAddInput(modelRunner, 'token_type_ids', token_type_ids)
redisAI.modelRunnerAddOutput(modelRunner, 'answer_start_scores')
redisAI.modelRunnerAddOutput(modelRunner, 'answer_end_scores')
res = redisAI.modelRunnerRun(modelRunner)
# redisAI.setTensorInKey('c{1}', res[0])
log(str(res[0]))
log("answer end"+str(res[1]))
log(f"Model run on {hash_tag}")
answer_start_scores = res[0]
answer_end_scores = res[1]
answer_start = np.argmax(answer_start_scores)
answer_end = np.argmax(answer_end_scores) + 1
input_ids = inputs["input_ids"].tolist()[0]
answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end]))
return answer
gb = GB('CommandReader')
gb.map(qa)
gb.register(trigger='RunQABERT',mode="async_local")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment