Skip to content

Instantly share code, notes, and snippets.

@hunterhector
Last active May 13, 2022 18:15
Show Gist options
  • Save hunterhector/ad5408065a3b5fb45e905fcd758f2185 to your computer and use it in GitHub Desktop.
Save hunterhector/ad5408065a3b5fb45e905fcd758f2185 to your computer and use it in GitHub Desktop.
forte sample
# Install first
# conda create --name forte python=3.9 pip
# pip install forte
# pip install forte.nltk
# pip install forte.spacy
# if you want to use it in the notebook
# pip install jupyter lab
# Find our medium blog at: medium.com/casl-project/
# Code adapted from: https://medium.com/casl-project/building-a-question-answering-system-part-1-query-understanding-in-18-lines-916110f9f2b2
import forte
from forte import Pipeline
from forte.data.readers import TerminalReader
from fortex.nltk.nltk_processors import NLTKLemmatizer, NLTKWordTokenizer, NLTKPOSTagger, NLTKSentenceSegmenter
nlp = Pipeline().set_reader(
TerminalReader()
).add(NLTKSentenceSegmenter()).add(NLTKWordTokenizer()).add(NLTKPOSTagger()).add(NLTKLemmatizer())
nlp.initialize()
from ft.onto.base_ontology import Token, Sentence
data_pack = nlp.process()
for sent in data_pack.get(Sentence):
print("Results for every sentence:")
for token in data_pack.get(Token, sent, components=["fortex.nltk.nltk_processors.NLTKWordTokenizer"]):
print(f" text: {token.text}, pos: {token.pos}, lemma: {token.lemma}")
nlp.save("temp.yml")
nlp2 = Pipeline()
nlp2.init_from_config_path("temp.yml")
from forte.processors.misc import LowerCaserProcessor
from fortex.spacy import SpacyProcessor
nlp2.add(LowerCaserProcessor())
nlp2.add(
SpacyProcessor(), {
"processors": ["sentence", "tokenize", "pos", "lemma", "ner"]
}
)
nlp2.initialize()
data_pack = nlp2.process()
from ft.onto.base_ontology import EntityMention
for sent in data_pack.get(Sentence, components="fortex.nltk.nltk_processors.NLTKSentenceSegmenter"):
print("Show sentence:")
print(sent.text)
for entity_mention in sent.get(EntityMention):
print(" - Show entity mention:")
print(f" - Entity Mention in this sentence is {entity_mention.text}, {entity_mention.ner_type}")
for token in entity_mention.get(Token, components="fortex.spacy.spacy_processors.SpacyProcessor"):
print(f" -Token in this Entity Mention: {token.text} with pos {token.pos}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment