Skip to content

Instantly share code, notes, and snippets.

@theorm
Last active January 28, 2020 19:42
Show Gist options
  • Save theorm/69da31c5aa86c5b6e11e0e4a53a93c42 to your computer and use it in GitHub Desktop.
Save theorm/69da31c5aa86c5b6e11e0e4a53a93c42 to your computer and use it in GitHub Desktop.
from flair.data import Sentence
from flair.models import SequenceTagger
tagger = SequenceTagger.load('ner')
filename = 'ts.txt'
out_filename = 'ts_with_entities.txt'
cnt = 0
with open(filename) as f:
with open(out_filename, 'w') as of:
for line in f.readlines():
line = line.replace('\n', '')
# run model
sentence = Sentence(line)
tagger.predict(sentence)
# extract entities
entities = [f'{span.text} ({span.tag})' for span in sentence.get_spans('ner')]
entities_string = ', '.join(entities)
# write to file
of.write(f'{line}\n')
of.write(f'😁: {entities_string}\n')
cnt = cnt + 1
if cnt % 100 == 0:
print(f'Processed {cnt} lines')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment