Skip to content

Instantly share code, notes, and snippets.

@M4lF3s
Created September 18, 2020 08:39
Show Gist options
  • Save M4lF3s/7ba26be8b0225a40e7bb9b01f20f83e0 to your computer and use it in GitHub Desktop.
Save M4lF3s/7ba26be8b0225a40e7bb9b01f20f83e0 to your computer and use it in GitHub Desktop.
Spark NLP SentenceDetector
override def annotate(annotations: Seq[Annotation]): Seq[Annotation] = {
val sentences = SentenceSplit.unpack(annotations)
val embeddingsSentences = WordpieceEmbeddingsSentence.unpack(annotations)
val embeddings = embeddingsSentences.map {
case (tokenEmbedding) =>
val allEmbeddings = tokenEmbedding.tokens.map { token =>
token.embeddings
}
calculateSentenceEmbeddings(allEmbeddings)
}.toArray
val tup = sentences zip embeddings
tup.map {
(element: (Sentence, Array[Float])) => {
Annotation(
annotatorType = outputAnnotatorType,
begin = element._1.start,
end = element._1.end,
result = element._1.content,
metadata = Map("sentence" -> element._1.index.toString,
"token" -> element._1.content,
"pieceId" -> "-1",
"isWordStart" -> "true"
),
embeddings = element._2)
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment