Skip to content

Instantly share code, notes, and snippets.

@thiagomarzagao
Created April 16, 2020 22:59
Show Gist options
  • Save thiagomarzagao/77e641fe347fabcb5bd46111ec3316fe to your computer and use it in GitHub Desktop.
Save thiagomarzagao/77e641fe347fabcb5bd46111ec3316fe to your computer and use it in GitHub Desktop.
import torch
from transformers import AutoTokenizer, AutoModelForTokenClassification
tokenizer = AutoTokenizer.from_pretrained('neuralmind/bert-large-portuguese-cased')
model = AutoModelForTokenClassification.from_pretrained('neuralmind/bert-large-portuguese-cased')
label_list = [
'O', # outside of a named entity
'B-MISC', # beginning of a miscellaneous entity right after another miscellaneous entity
'I-MISC', # miscellaneous entity
'B-PER', # beginning of a person's name right after another person's name
'I-PER', # person's name
'B-ORG', # beginning of a location right after another location
'I-LOC' # location
]
text = 'Luiz Henrique Mandetta (Campo Grande, 30 de novembro de 1964) é um médico ortopedista e político brasileiro. Foi deputado federal e atualmente é Ministro da Saúde no governo de Jair Bolsonaro.'
tokens = tokenizer.tokenize(tokenizer.decode(tokenizer.encode(text)))
inputs = tokenizer.encode(text, return_tensors = 'pt')
outputs = model(inputs)[0]
predictions = torch.argmax(outputs, dim = 2)
print([(token, label_list[prediction]) for token, prediction in zip(tokens, predictions[0].tolist())])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment