Skip to content

Instantly share code, notes, and snippets.

@AlexMikhalev
Created April 17, 2021 22:58
Show Gist options
  • Save AlexMikhalev/c36b1dd8f43ca1dc682b829ea252b3ad to your computer and use it in GitHub Desktop.
Save AlexMikhalev/c36b1dd8f43ca1dc682b829ea252b3ad to your computer and use it in GitHub Desktop.
QA bert interim working
tokenizer = None
import numpy as np
import torch
import os
config_switch=os.getenv('DOCKER', 'local')
if config_switch=='local':
startup_nodes = [{"host": "127.0.0.1", "port": "30001"}, {"host": "127.0.0.1", "port":"30002"}, {"host":"127.0.0.1", "port":"30003"}]
else:
startup_nodes = [{"host": "rgcluster", "port": "30001"}, {"host": "rgcluster", "port":"30002"}, {"host":"rgcluster", "port":"30003"}]
try:
from redisai import ClusterClient
redisai_cluster_client = ClusterClient(startup_nodes=startup_nodes)
except:
print("Redis Cluster is not available")
def loadTokeniser():
global tokenizer
from transformers import BertTokenizerFast
tokenizer = BertTokenizerFast.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")
return tokenizer
def qa(question, sentence_key,hash_tag):
### question is encoded
### use pre-computed context/answer text tensor
global tokenizer
if not tokenizer:
tokenizer=loadTokeniser()
token_key = f"tokenized:bert:qa:{sentence_key}"
input_ids_question = tokenizer.encode(question, add_special_tokens=True, truncation=True, return_tensors="np")
input_ids_context=redisai_cluster_client.tensorget(token_key)
input_ids = np.append(input_ids_question,input_ids_context)
print(input_ids.shape)
print(input_ids)
attention_mask = np.array([[1]*len(input_ids)])
input_idss=np.array([input_ids])
print(input_idss.shape)
print("Attention mask shape ",attention_mask.shape)
num_seg_a=input_ids_question.shape[1]
print(num_seg_a)
num_seg_b=input_ids_context.shape[0]
print(num_seg_b)
token_type_ids = np.array([0]*num_seg_a + [1]*num_seg_b)
print("Segments id",token_type_ids.shape)
redisai_cluster_client.tensorset(f'input_ids{hash_tag}', input_idss)
redisai_cluster_client.tensorset(f'attention_mask{hash_tag}', attention_mask)
redisai_cluster_client.tensorset(f'token_type_ids{hash_tag}', token_type_ids)
redisai_cluster_client.modelrun(f'bert-qa{hash_tag}', [f'input_ids{hash_tag}', f'attention_mask{hash_tag}', f'token_type_ids{hash_tag}'],
[f'answer_start_scores{hash_tag}', f'answer_end_scores{hash_tag}'])
print(f"Model run on {hash_tag}")
answer_start_scores = redisai_cluster_client.tensorget(f'answer_start_scores{hash_tag}')
answer_end_scores = redisai_cluster_client.tensorget(f'answer_end_scores{hash_tag}')
answer_start = np.argmax(answer_start_scores)
answer_end = np.argmax(answer_end_scores) + 1
input_ids = inputs["input_ids"].tolist()[0]
answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end]))
return answer
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment