Skip to content

Instantly share code, notes, and snippets.

@willprice76
willprice76 / pom-update.xml
Created Aug 27, 2020
POM update for training
View pom-update.xml
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId>
<version>${spark.version}</version>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
</exclusions>
@willprice76
willprice76 / curl.sh
Last active Aug 27, 2020
example request to train
View curl.sh
curl -X POST -H --data
"[{\"text\": \"I love Spark NLP\", \"label\": \"positive\"}, {\"text\": \"I hate using Scala objects in Java\", \"label\": \"negative\"}]"
http://localhost:8080/sentiment/train
@willprice76
willprice76 / train.java
Created Aug 27, 2020
controller endpoint to train sentiment model
View train.java
@PostMapping("/sentiment/train")
public String train(@RequestBody List<TextData> data) throws IOException {
Instant start = Instant.now();
Dataset<Row> input = spark.createDataFrame(data, TextData.class);
LOG.debug("Running training with {} rows of text data", data.size());
Pipeline pipeline = getSentimentTrainingPipeline();
PipelineModel newPipelineModel = pipeline.fit(input);
long trainingTime = Duration.between(start, Instant.now()).toMillis();
//Overwrite the existing scoring pipeline
scoringPipeline = new LightPipeline(newPipelineModel, false);
@willprice76
willprice76 / TextData.java
Created Aug 27, 2020
Class to represent training data
View TextData.java
package org.example.sparknlp;
public class TextData {
private String text;
private String label;
public TextData(String text, String label) {
this.text = text;
this.label = label;
}
@willprice76
willprice76 / getSentimentTrainingPipeline.java
Last active Aug 27, 2020
method to build training pipeline for sentiment analysis with Spark NLP
View getSentimentTrainingPipeline.java
private Pipeline getSentimentTrainingPipeline() {
DocumentAssembler document = new DocumentAssembler();
document.setInputCol("text");
document.setOutputCol("document");
String[] tokenizerInputCols = {"document"};
Tokenizer tokenizer = new Tokenizer();
tokenizer.setInputCols(tokenizerInputCols);
tokenizer.setOutputCol("token");
@willprice76
willprice76 / curl.sh
Last active Aug 27, 2020
Example post request for scoring text data
View curl.sh
curl -X POST -H "Content-Type: application/json"
--data "[\"I love Spark NLP\",\"I hate using Scala objects in Java\"]"
http://localhost:8080/sentiment/score
@willprice76
willprice76 / SparkNlpController.java
Last active Aug 27, 2020
Adding scoring to the Spark NLP controller
View SparkNlpController.java
package org.example.sparknlp;
import com.johnsnowlabs.nlp.LightPipeline;
import com.johnsnowlabs.nlp.SparkNLP;
import com.johnsnowlabs.nlp.pretrained.PretrainedPipeline;
import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestBody;
@willprice76
willprice76 / SparkNlpController.java
Last active Aug 27, 2020
Adding Spark NLP initialization to controller
View SparkNlpController.java
package org.example.sparknlp;
import org.apache.spark.sql.SparkSession;
import com.johnsnowlabs.nlp.LightPipeline;
import com.johnsnowlabs.nlp.SparkNLP;
import com.johnsnowlabs.nlp.pretrained.PretrainedPipeline;
import com.sun.org.slf4j.internal.Logger;
import com.sun.org.slf4j.internal.LoggerFactory;
import org.springframework.web.bind.annotation.GetMapping;
@willprice76
willprice76 / SparkNlpController.java
Created Aug 27, 2020
Stub controller for Spark NLP
View SparkNlpController.java
package org.example.sparknlp;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RestController;
@RestController
public class SparkNlpController {
@GetMapping("/hello")
public String hello() {
return "Hello world";
@willprice76
willprice76 / ExampleController.java
Created Aug 26, 2020
Basic controller to see if everything is working
View ExampleController.java
package org.example.sparknlp;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RestController;
@RestController
public class ExampleController {
@GetMapping("/hello")
public String hello() {
return "Hello world";