Instantly share code, notes, and snippets.

Embed
What would you like to do?
Named Entity Recognition
options(java.parameters = "- Xmx1024m")
#load libraries
library(openxlsx)
library(rJava)
library(NLP)
library(openNLP)
library(RWeka)
#read text
text <- c("My name is Catherine Gitau, I work at Ongair Limited in Nairobi, Kenya")
#convert the character vectors into one character vector
text <- paste(text, collapse = " ")
print(text)
#converts bio variable into a string
text<- as.String(text)
#create annotators for words and sentences
word_ann <- Maxent_Word_Token_Annotator()
sent_ann <- Maxent_Sent_Token_Annotator()
#Identifies where the sentences are and the words
text_annotations <- annotate(text, list(sent_ann, word_ann))
head(text_annotations)
#combines bio and the annotations
text_doc <- AnnotatedPlainTextDocument(text, text_annotations)
words(text_doc)
#creates annotators of kind person, location and organization
person_ann <- Maxent_Entity_Annotator(kind = "person")
location_ann <- Maxent_Entity_Annotator(kind = "location")
organization_ann <- Maxent_Entity_Annotator(kind = "organization")
#holds annotators in the order to be applied
pipeline <- list(sent_ann,
word_ann,
person_ann,
location_ann,
organization_ann)
text_annotations <- annotate(text, pipeline)
text_doc <- AnnotatedPlainTextDocument(text, text_annotations)
# Extract entities from an AnnotatedPlainTextDocument
entities <- function(text, kind) {
s <- text$content
a <- annotations(text)[[1]]
if(hasArg(kind)) {
k <- sapply(a$features, `[[`, "kind")
s[a[k == kind]]
} else {
s[a[a$type == "entity"]]
}
}
entities(text_doc, kind = "person")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment