CateGitau/NER.r

## NER.r
options(java.parameters = "- Xmx1024m")
#load libraries
library(openxlsx)
library(rJava)
library(NLP)
library(openNLP)
library(RWeka)

#read text
text <- c("My name is Catherine Gitau, I work at Ongair Limited in Nairobi, Kenya")

#convert the character vectors into one character vector
text <- paste(text, collapse = " ")
print(text)

#converts bio variable into a string
text<- as.String(text)

#create annotators for words and sentences
word_ann <- Maxent_Word_Token_Annotator()
sent_ann <- Maxent_Sent_Token_Annotator()

#Identifies where the sentences are and the words
text_annotations <- annotate(text, list(sent_ann, word_ann))
head(text_annotations)

#combines bio and the annotations
text_doc <- AnnotatedPlainTextDocument(text, text_annotations)
words(text_doc)

#creates annotators of kind person, location and organization
person_ann <- Maxent_Entity_Annotator(kind = "person")
location_ann <- Maxent_Entity_Annotator(kind = "location")
organization_ann <- Maxent_Entity_Annotator(kind = "organization")

#holds annotators in the order to be applied

pipeline <- list(sent_ann,
                 word_ann,
                 person_ann,
                 location_ann,
                 organization_ann)
text_annotations <- annotate(text, pipeline)
text_doc <- AnnotatedPlainTextDocument(text, text_annotations)

# Extract entities from an AnnotatedPlainTextDocument
entities <- function(text, kind) {
  s <- text$content
  a <- annotations(text)[[1]]
  if(hasArg(kind)) {
    k <- sapply(a$features, `[[`, "kind")
    s[a[k == kind]]
  } else {
    s[a[a$type == "entity"]]
  }
}

entities(text_doc, kind = "person")
	options(java.parameters = "- Xmx1024m")
	#load libraries
	library(openxlsx)
	library(rJava)
	library(NLP)
	library(openNLP)
	library(RWeka)

	#read text
	text <- c("My name is Catherine Gitau, I work at Ongair Limited in Nairobi, Kenya")

	#convert the character vectors into one character vector
	text <- paste(text, collapse = " ")
	print(text)

	#converts bio variable into a string
	text<- as.String(text)

	#create annotators for words and sentences
	word_ann <- Maxent_Word_Token_Annotator()
	sent_ann <- Maxent_Sent_Token_Annotator()

	#Identifies where the sentences are and the words
	text_annotations <- annotate(text, list(sent_ann, word_ann))
	head(text_annotations)

	#combines bio and the annotations
	text_doc <- AnnotatedPlainTextDocument(text, text_annotations)
	words(text_doc)

	#creates annotators of kind person, location and organization
	person_ann <- Maxent_Entity_Annotator(kind = "person")
	location_ann <- Maxent_Entity_Annotator(kind = "location")
	organization_ann <- Maxent_Entity_Annotator(kind = "organization")

	#holds annotators in the order to be applied

	pipeline <- list(sent_ann,
	word_ann,
	person_ann,
	location_ann,
	organization_ann)
	text_annotations <- annotate(text, pipeline)
	text_doc <- AnnotatedPlainTextDocument(text, text_annotations)

	# Extract entities from an AnnotatedPlainTextDocument
	entities <- function(text, kind) {
	s <- text$content
	a <- annotations(text)[[1]]
	if(hasArg(kind)) {
	k <- sapply(a$features, `[[`, "kind")
	s[a[k == kind]]
	} else {
	s[a[a$type == "entity"]]
	}
	}

	entities(text_doc, kind = "person")