Skip to content

Instantly share code, notes, and snippets.

@dan-lind
Created August 3, 2016 19:28
Show Gist options
  • Save dan-lind/e7ed80b0d64b807ef32e68dbbfa58919 to your computer and use it in GitHub Desktop.
Save dan-lind/e7ed80b0d64b807ef32e68dbbfa58919 to your computer and use it in GitHub Desktop.
package kaggle;
import org.apache.commons.io.FilenameUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.datavec.api.records.reader.RecordReader;
import org.datavec.api.records.reader.impl.csv.CSVRecordReader;
import org.datavec.api.transform.TransformProcess;
import org.datavec.api.transform.analysis.DataAnalysis;
import org.datavec.api.transform.condition.string.StringRegexColumnCondition;
import org.datavec.api.transform.quality.DataQualityAnalysis;
import org.datavec.api.transform.schema.Schema;
import org.datavec.api.transform.transform.string.ReplaceEmptyStringTransform;
import org.datavec.api.writable.DoubleWritable;
import org.datavec.api.writable.Writable;
import org.datavec.spark.transform.AnalyzeSpark;
import org.datavec.spark.transform.SparkTransformExecutor;
import org.datavec.spark.transform.misc.StringToWritablesFunction;
import java.util.List;
import java.util.Map;
/**
* Simple example performing some preprocessing/aggregation operations on some web log data using DataVec.
* Specifically:
* - Load some data
* - Perform data quality analysis
* - Perform basic data cleaning and preprocessing
* - Group records by host, and calculate some aggregate values for each (such as number of requests and total number of bytes)
* - Analyze the resulting data, and print some results
*
*
* Data is automatically downloaded from: http://ita.ee.lbl.gov/html/contrib/NASA-HTTP.html
*
* Examples of some log lines
* 199.72.81.55 - - [01/Jul/1995:00:00:01 -0400] "GET /history/apollo/ HTTP/1.0" 200 6245
* unicomp6.unicomp.net - - [01/Jul/1995:00:00:06 -0400] "GET /shuttle/countdown/ HTTP/1.0" 200 3985
*
* @author Alex Black
*/
public class Analysis {
/** Data URL for downloading */
public static final String DATA_URL = "ftp://ita.ee.lbl.gov/traces/NASA_access_log_Jul95.gz";
/** Location to save and extract the training/testing data */
public static final String DATA_PATH = FilenameUtils.concat(System.getProperty("java.io.tmpdir"), "datavec_log_example/");
public static final String EXTRACTED_PATH = "/Users/danlin/git/dl4j-lab/src/main/resources/train_mod.csv";
public static void main(String[] args) throws Exception {
//Setup
// downloadData();
SparkConf conf = new SparkConf();
conf.setMaster("local[*]");
conf.setAppName("DataVec Analysis Data Example");
JavaSparkContext sc = new JavaSparkContext(conf);
//=====================================================================
// Step 1: Define the input data schema
//=====================================================================
//First: let's specify a schema for the data. This is based on the information from: http://ita.ee.lbl.gov/html/contrib/NASA-HTTP.html
Schema schema = new Schema.Builder()
.addColumnInteger("PassengerId")
.addColumnCategorical("Survived","0","1")
.addColumnCategorical("Pclass","1","2","3")
.addColumnString("Name")
.addColumnCategorical("Sex","male","female")
.addColumnDouble("Age")
.addColumnInteger("SibSp")
.addColumnInteger("Parch")
.addColumnString("Ticket")
.addColumnString("Fare")
.addColumnString("Cabin")
.addColumnCategorical("Embarked", "C", "Q", "S")
.build();
//=====================================================================
// Step 2: Clean Invalid Lines
//=====================================================================
//Second: let's load the data. Initially as Strings
JavaRDD<String> logLines = sc.textFile(EXTRACTED_PATH);
//=====================================================================
// Step 3: Parse Raw Data and Perform Initial Analysis
//=====================================================================
RecordReader rr = new CSVRecordReader();
JavaRDD<List<Writable>> parsed = logLines.map(new StringToWritablesFunction(rr));
Map<Writable, Long> occurences = parsed.map(x -> x.get(11)).countByValue();
occurences.forEach((x,y) -> System.out.println(x.toString() + " : " + y.toString()));
double sum = parsed.map(x -> x.get(5)).filter(f -> f.toString().isEmpty() == false).map(y -> y.toDouble()).reduce((a,b) -> a + b);
double count = parsed.map(x -> x.get(5)).filter(f -> f.toString().isEmpty() == false).count();
double mean = sum / count;
System.out.println("Sum of Age: " + sum);
System.out.println("Count of Age: " + count);
System.out.println("Mean of Age: " + mean);
//Now, let's check the quality, so we know if there's anything we need to clean up first...
DataQualityAnalysis dqa = AnalyzeSpark.analyzeQuality(schema, parsed);
System.out.println("----- Data Quality -----");
System.out.println(dqa);
//====================================================================
// Step 4: Perform Cleaning, Parsing and Aggregation
//=====================================================================
//Let's specify the transforms we want to do
TransformProcess tp = new TransformProcess.Builder(schema)
//First: clean up the "replyBytes" column by replacing any non-integer entries with the value 0
.conditionalReplaceValueTransform("Age",new DoubleWritable(mean),new StringRegexColumnCondition ("Age","^$"))
.transform(new ReplaceEmptyStringTransform("Embarked","S"))
.build();
SparkTransformExecutor executor = new SparkTransformExecutor();
JavaRDD<List<Writable>> processed = executor.execute(parsed, tp);
processed.cache();
//
//
// //=====================================================================
// // Step 5: Perform Analysis on Final Data; Display Results
// //=====================================================================
//
Schema finalDataSchema = tp.getFinalSchema();
long finalDataCount = processed.count();
List<List<Writable>> sample = processed.take(10);
DataAnalysis analysis = AnalyzeSpark.analyze(finalDataSchema, processed);
sc.stop();
Thread.sleep(4000); //Give spark some time to shut down (and stop spamming console)
System.out.println("----- Final Data Schema -----");
System.out.println(finalDataSchema);
System.out.println("\n\nFinal data count: " + finalDataCount);
System.out.println("\n\n----- Samples of final data -----");
for(List<Writable> l : sample){
System.out.println(l);
}
System.out.println("\n\n----- Analysis -----");
System.out.println(analysis);
}
}
/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/bin/java -Didea.launcher.port=7545 "-Didea.launcher.bin.path=/Applications/IntelliJ IDEA CE.app/Contents/bin" -Dfile.encoding=UTF-8 -classpath "/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/charsets.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/deploy.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/ext/cldrdata.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/ext/dnsns.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/ext/jfxrt.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/ext/localedata.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/ext/nashorn.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/ext/sunec.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/ext/sunjce_provider.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/ext/sunpkcs11.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/ext/zipfs.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/javaws.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/jce.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/jfr.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/jfxswt.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/jsse.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/management-agent.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/plugin.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/resources.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/rt.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/lib/ant-javafx.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/lib/dt.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/lib/javafx-mx.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/lib/jconsole.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/lib/sa-jdi.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/lib/tools.jar:/Users/danlin/git/dl4j-lab/target/classes:/Users/danlin/.m2/repository/org/deeplearning4j/deeplearning4j-nlp/0.4.0/deeplearning4j-nlp-0.4.0.jar:/Users/danlin/.m2/repository/org/apache/lucene/lucene-analyzers-common/5.3.1/lucene-analyzers-common-5.3.1.jar:/Users/danlin/.m2/repository/org/apache/lucene/lucene-core/5.3.1/lucene-core-5.3.1.jar:/Users/danlin/.m2/repository/org/apache/lucene/lucene-queryparser/5.3.1/lucene-queryparser-5.3.1.jar:/Users/danlin/.m2/repository/org/apache/lucene/lucene-queries/5.3.1/lucene-queries-5.3.1.jar:/Users/danlin/.m2/repository/org/apache/lucene/lucene-sandbox/5.3.1/lucene-sandbox-5.3.1.jar:/Users/danlin/.m2/repository/org/apache/directory/studio/org.apache.commons.codec/1.8/org.apache.commons.codec-1.8.jar:/Users/danlin/.m2/repository/commons-codec/commons-codec/1.8/commons-codec-1.8.jar:/Users/danlin/.m2/repository/it/unimi/dsi/dsiutils/2.2.2/dsiutils-2.2.2.jar:/Users/danlin/.m2/repository/it/unimi/dsi/fastutil/6.5.15/fastutil-6.5.15.jar:/Users/danlin/.m2/repository/com/martiansoftware/jsap/2.1/jsap-2.1.jar:/Users/danlin/.m2/repository/commons-configuration/commons-configuration/1.8/commons-configuration-1.8.jar:/Users/danlin/.m2/repository/commons-lang/commons-lang/2.6/commons-lang-2.6.jar:/Users/danlin/.m2/repository/commons-logging/commons-logging/1.1.1/commons-logging-1.1.1.jar:/Users/danlin/.m2/repository/commons-collections/commons-collections/20040616/commons-collections-20040616.jar:/Users/danlin/.m2/repository/org/cleartk/cleartk-snowball/2.0.0/cleartk-snowball-2.0.0.jar:/Users/danlin/.m2/repository/org/apache/lucene/lucene-snowball/3.0.3/lucene-snowball-3.0.3.jar:/Users/danlin/.m2/repository/org/cleartk/cleartk-util/2.0.0/cleartk-util-2.0.0.jar:/Users/danlin/.m2/repository/org/apache/uima/uimaj-core/2.5.0/uimaj-core-2.5.0.jar:/Users/danlin/.m2/repository/org/apache/uima/uimafit-core/2.0.0/uimafit-core-2.0.0.jar:/Users/danlin/.m2/repository/commons-logging/commons-logging-api/1.1/commons-logging-api-1.1.jar:/Users/danlin/.m2/repository/org/springframework/spring-core/3.1.2.RELEASE/spring-core-3.1.2.RELEASE.jar:/Users/danlin/.m2/repository/org/springframework/spring-asm/3.1.2.RELEASE/spring-asm-3.1.2.RELEASE.jar:/Users/danlin/.m2/repository/org/springframework/spring-context/3.1.2.RELEASE/spring-context-3.1.2.RELEASE.jar:/Users/danlin/.m2/repository/org/springframework/spring-aop/3.1.2.RELEASE/spring-aop-3.1.2.RELEASE.jar:/Users/danlin/.m2/repository/aopalliance/aopalliance/1.0/aopalliance-1.0.jar:/Users/danlin/.m2/repository/org/springframework/spring-expression/3.1.2.RELEASE/spring-expression-3.1.2.RELEASE.jar:/Users/danlin/.m2/repository/org/springframework/spring-beans/3.1.2.RELEASE/spring-beans-3.1.2.RELEASE.jar:/Users/danlin/.m2/repository/org/cleartk/cleartk-type-system/2.0.0/cleartk-type-system-2.0.0.jar:/Users/danlin/.m2/repository/org/cleartk/cleartk-opennlp-tools/2.0.0/cleartk-opennlp-tools-2.0.0.jar:/Users/danlin/.m2/repository/org/apache/opennlp/opennlp-maxent/3.0.3/opennlp-maxent-3.0.3.jar:/Users/danlin/.m2/repository/org/apache/opennlp/opennlp-tools/1.5.3/opennlp-tools-1.5.3.jar:/Users/danlin/.m2/repository/net/sf/jwordnet/jwnl/1.3.3/jwnl-1.3.3.jar:/Users/danlin/.m2/repository/org/apache/opennlp/opennlp-uima/1.5.3/opennlp-uima-1.5.3.jar:/Users/danlin/.m2/repository/io/dropwizard/dropwizard-assets/0.8.0/dropwizard-assets-0.8.0.jar:/Users/danlin/.m2/repository/io/dropwizard/dropwizard-core/0.8.0/dropwizard-core-0.8.0.jar:/Users/danlin/.m2/repository/io/dropwizard/dropwizard-util/0.8.0/dropwizard-util-0.8.0.jar:/Users/danlin/.m2/repository/io/dropwizard/dropwizard-jackson/0.8.0/dropwizard-jackson-0.8.0.jar:/Users/danlin/.m2/repository/com/fasterxml/jackson/datatype/jackson-datatype-jdk7/2.5.1/jackson-datatype-jdk7-2.5.1.jar:/Users/danlin/.m2/repository/com/fasterxml/jackson/datatype/jackson-datatype-guava/2.5.1/jackson-datatype-guava-2.5.1.jar:/Users/danlin/.m2/repository/com/fasterxml/jackson/module/jackson-module-afterburner/2.5.1/jackson-module-afterburner-2.5.1.jar:/Users/danlin/.m2/repository/com/fasterxml/jackson/datatype/jackson-datatype-joda/2.5.1/jackson-datatype-joda-2.5.1.jar:/Users/danlin/.m2/repository/io/dropwizard/dropwizard-validation/0.8.0/dropwizard-validation-0.8.0.jar:/Users/danlin/.m2/repository/org/hibernate/hibernate-validator/5.1.3.Final/hibernate-validator-5.1.3.Final.jar:/Users/danlin/.m2/repository/org/jboss/logging/jboss-logging/3.1.3.GA/jboss-logging-3.1.3.GA.jar:/Users/danlin/.m2/repository/com/fasterxml/classmate/1.0.0/classmate-1.0.0.jar:/Users/danlin/.m2/repository/org/glassfish/javax.el/3.0.0/javax.el-3.0.0.jar:/Users/danlin/.m2/repository/io/dropwizard/dropwizard-configuration/0.8.0/dropwizard-configuration-0.8.0.jar:/Users/danlin/.m2/repository/io/dropwizard/dropwizard-logging/0.8.0/dropwizard-logging-0.8.0.jar:/Users/danlin/.m2/repository/io/dropwizard/metrics/metrics-logback/3.1.0/metrics-logback-3.1.0.jar:/Users/danlin/.m2/repository/org/slf4j/log4j-over-slf4j/1.7.10/log4j-over-slf4j-1.7.10.jar:/Users/danlin/.m2/repository/org/eclipse/jetty/jetty-util/9.2.9.v20150224/jetty-util-9.2.9.v20150224.jar:/Users/danlin/.m2/repository/io/dropwizard/dropwizard-metrics/0.8.0/dropwizard-metrics-0.8.0.jar:/Users/danlin/.m2/repository/io/dropwizard/dropwizard-jersey/0.8.0/dropwizard-jersey-0.8.0.jar:/Users/danlin/.m2/repository/org/glassfish/jersey/ext/jersey-metainf-services/2.16/jersey-metainf-services-2.16.jar:/Users/danlin/.m2/repository/io/dropwizard/metrics/metrics-jersey2/3.1.0/metrics-jersey2-3.1.0.jar:/Users/danlin/.m2/repository/com/fasterxml/jackson/jaxrs/jackson-jaxrs-json-provider/2.5.1/jackson-jaxrs-json-provider-2.5.1.jar:/Users/danlin/.m2/repository/com/fasterxml/jackson/jaxrs/jackson-jaxrs-base/2.5.1/jackson-jaxrs-base-2.5.1.jar:/Users/danlin/.m2/repository/com/fasterxml/jackson/module/jackson-module-jaxb-annotations/2.5.1/jackson-module-jaxb-annotations-2.5.1.jar:/Users/danlin/.m2/repository/org/eclipse/jetty/jetty-server/9.2.9.v20150224/jetty-server-9.2.9.v20150224.jar:/Users/danlin/.m2/repository/org/eclipse/jetty/jetty-io/9.2.9.v20150224/jetty-io-9.2.9.v20150224.jar:/Users/danlin/.m2/repository/org/eclipse/jetty/jetty-webapp/9.2.9.v20150224/jetty-webapp-9.2.9.v20150224.jar:/Users/danlin/.m2/repository/org/eclipse/jetty/jetty-xml/9.2.9.v20150224/jetty-xml-9.2.9.v20150224.jar:/Users/danlin/.m2/repository/org/eclipse/jetty/jetty-continuation/9.2.9.v20150224/jetty-continuation-9.2.9.v20150224.jar:/Users/danlin/.m2/repository/io/dropwizard/dropwizard-jetty/0.8.0/dropwizard-jetty-0.8.0.jar:/Users/danlin/.m2/repository/io/dropwizard/metrics/metrics-jetty9/3.1.0/metrics-jetty9-3.1.0.jar:/Users/danlin/.m2/repository/org/eclipse/jetty/jetty-servlet/9.2.9.v20150224/jetty-servlet-9.2.9.v20150224.jar:/Users/danlin/.m2/repository/org/eclipse/jetty/jetty-security/9.2.9.v20150224/jetty-security-9.2.9.v20150224.jar:/Users/danlin/.m2/repository/org/eclipse/jetty/jetty-servlets/9.2.9.v20150224/jetty-servlets-9.2.9.v20150224.jar:/Users/danlin/.m2/repository/org/eclipse/jetty/jetty-http/9.2.9.v20150224/jetty-http-9.2.9.v20150224.jar:/Users/danlin/.m2/repository/io/dropwizard/dropwizard-lifecycle/0.8.0/dropwizard-lifecycle-0.8.0.jar:/Users/danlin/.m2/repository/io/dropwizard/metrics/metrics-servlets/3.1.0/metrics-servlets-3.1.0.jar:/Users/danlin/.m2/repository/io/dropwizard/metrics/metrics-healthchecks/3.1.0/metrics-healthchecks-3.1.0.jar:/Users/danlin/.m2/repository/net/sourceforge/argparse4j/argparse4j/0.4.4/argparse4j-0.4.4.jar:/Users/danlin/.m2/repository/org/eclipse/jetty/toolchain/setuid/jetty-setuid-java/1.0.2/jetty-setuid-java-1.0.2.jar:/Users/danlin/.m2/repository/io/dropwizard/dropwizard-servlets/0.8.0/dropwizard-servlets-0.8.0.jar:/Users/danlin/.m2/repository/io/dropwizard/metrics/metrics-annotation/3.1.0/metrics-annotation-3.1.0.jar:/Users/danlin/.m2/repository/io/dropwizard/dropwizard-views-mustache/0.8.0/dropwizard-views-mustache-0.8.0.jar:/Users/danlin/.m2/repository/io/dropwizard/dropwizard-views/0.8.0/dropwizard-views-0.8.0.jar:/Users/danlin/.m2/repository/com/github/spullara/mustache/java/compiler/0.8.17/compiler-0.8.17.jar:/Users/danlin/.m2/repository/io/dropwizard/dropwizard-views-freemarker/0.8.0/dropwizard-views-freemarker-0.8.0.jar:/Users/danlin/.m2/repository/org/nd4j/nd4j-jackson/0.4.0/nd4j-jackson-0.4.0.jar:/Users/danlin/.m2/repository/org/deeplearning4j/deeplearning4j-core/0.4.0/deeplearning4j-core-0.4.0.jar:/Users/danlin/.m2/repository/org/nd4j/canova-nd4j-common/0.0.0.17/canova-nd4j-common-0.0.0.17.jar:/Users/danlin/.m2/repository/org/nd4j/canova-api/0.0.0.17/canova-api-0.0.0.17.jar:/Users/danlin/.m2/repository/org/slf4j/slf4j-api/1.7.12/slf4j-api-1.7.12.jar:/Users/danlin/.m2/repository/ch/qos/logback/logback-classic/1.1.2/logback-classic-1.1.2.jar:/Users/danlin/.m2/repository/ch/qos/logback/logback-core/1.1.2/logback-core-1.1.2.jar:/Users/danlin/.m2/repository/org/apache/commons/commons-math3/3.4.1/commons-math3-3.4.1.jar:/Users/danlin/.m2/repository/commons-io/commons-io/2.4/commons-io-2.4.jar:/Users/danlin/.m2/repository/org/apache/commons/commons-compress/1.8/commons-compress-1.8.jar:/Users/danlin/.m2/repository/org/tukaani/xz/1.5/xz-1.5.jar:/Users/danlin/.m2/repository/org/nd4j/nd4j-api/0.4.0/nd4j-api-0.4.0.jar:/Users/danlin/.m2/repository/org/nd4j/nd4j-context/0.4.0/nd4j-context-0.4.0.jar:/Users/danlin/.m2/repository/org/apache/commons/commons-lang3/3.3.1/commons-lang3-3.3.1.jar:/Users/danlin/.m2/repository/com/fasterxml/jackson/core/jackson-core/2.5.1/jackson-core-2.5.1.jar:/Users/danlin/.m2/repository/com/fasterxml/jackson/core/jackson-databind/2.4.4/jackson-databind-2.4.4.jar:/Users/danlin/.m2/repository/org/json/json/20131018/json-20131018.jar:/Users/danlin/.m2/repository/com/fasterxml/jackson/core/jackson-annotations/2.5.1/jackson-annotations-2.5.1.jar:/Users/danlin/.m2/repository/org/projectlombok/lombok/1.16.4/lombok-1.16.4.jar:/Users/danlin/.m2/repository/com/fasterxml/jackson/dataformat/jackson-dataformat-yaml/2.5.1/jackson-dataformat-yaml-2.5.1.jar:/Users/danlin/.m2/repository/org/yaml/snakeyaml/1.12/snakeyaml-1.12.jar:/Users/danlin/.m2/repository/org/datavec/datavec-nd4j-common/0.4.0/datavec-nd4j-common-0.4.0.jar:/Users/danlin/.m2/repository/org/datavec/datavec-data-image/0.4.0/datavec-data-image-0.4.0.jar:/Users/danlin/.m2/repository/org/deeplearning4j/deeplearning4j-ui/0.4.0/deeplearning4j-ui-0.4.0.jar:/Users/danlin/.m2/repository/io/dropwizard/dropwizard-client/0.8.0/dropwizard-client-0.8.0.jar:/Users/danlin/.m2/repository/org/glassfish/jersey/core/jersey-client/2.16/jersey-client-2.16.jar:/Users/danlin/.m2/repository/javax/ws/rs/javax.ws.rs-api/2.0.1/javax.ws.rs-api-2.0.1.jar:/Users/danlin/.m2/repository/org/glassfish/hk2/hk2-api/2.4.0-b09/hk2-api-2.4.0-b09.jar:/Users/danlin/.m2/repository/org/glassfish/hk2/hk2-utils/2.4.0-b09/hk2-utils-2.4.0-b09.jar:/Users/danlin/.m2/repository/org/glassfish/hk2/external/aopalliance-repackaged/2.4.0-b09/aopalliance-repackaged-2.4.0-b09.jar:/Users/danlin/.m2/repository/org/glassfish/hk2/external/javax.inject/2.4.0-b09/javax.inject-2.4.0-b09.jar:/Users/danlin/.m2/repository/org/glassfish/hk2/hk2-locator/2.4.0-b09/hk2-locator-2.4.0-b09.jar:/Users/danlin/.m2/repository/org/javassist/javassist/3.18.1-GA/javassist-3.18.1-GA.jar:/Users/danlin/.m2/repository/io/dropwizard/metrics/metrics-httpclient/3.1.0/metrics-httpclient-3.1.0.jar:/Users/danlin/.m2/repository/org/apache/httpcomponents/httpclient/4.3.5/httpclient-4.3.5.jar:/Users/danlin/.m2/repository/org/apache/httpcomponents/httpcore/4.3.2/httpcore-4.3.2.jar:/Users/danlin/.m2/repository/org/glassfish/jersey/connectors/jersey-apache-connector/2.16/jersey-apache-connector-2.16.jar:/Users/danlin/.m2/repository/io/dropwizard/dropwizard-forms/0.8.0/dropwizard-forms-0.8.0.jar:/Users/danlin/.m2/repository/org/glassfish/jersey/media/jersey-media-multipart/2.16/jersey-media-multipart-2.16.jar:/Users/danlin/.m2/repository/org/glassfish/jersey/core/jersey-common/2.16/jersey-common-2.16.jar:/Users/danlin/.m2/repository/javax/annotation/javax.annotation-api/1.2/javax.annotation-api-1.2.jar:/Users/danlin/.m2/repository/org/glassfish/jersey/bundles/repackaged/jersey-guava/2.16/jersey-guava-2.16.jar:/Users/danlin/.m2/repository/org/glassfish/hk2/osgi-resource-locator/1.0.1/osgi-resource-locator-1.0.1.jar:/Users/danlin/.m2/repository/org/jvnet/mimepull/mimepull/1.9.3/mimepull-1.9.3.jar:/Users/danlin/.m2/repository/com/google/guava/guava/19.0/guava-19.0.jar:/Users/danlin/.m2/repository/org/nd4j/nd4j-native/0.4.0/nd4j-native-0.4.0.jar:/Users/danlin/.m2/repository/org/nd4j/nd4j-native/0.4.0/nd4j-native-0.4.0-macosx-x86_64.jar:/Users/danlin/.m2/repository/org/nd4j/nd4j-native-api/0.4.0/nd4j-native-api-0.4.0.jar:/Users/danlin/.m2/repository/org/bytedeco/javacpp/1.2.2/javacpp-1.2.2.jar:/Users/danlin/.m2/repository/org/nd4j/canova-nd4j-image/0.0.0.17/canova-nd4j-image-0.0.0.17.jar:/Users/danlin/.m2/repository/org/nd4j/nd4j-buffer/0.4.0/nd4j-buffer-0.4.0.jar:/Users/danlin/.m2/repository/org/nd4j/nd4j-common/0.4.0/nd4j-common-0.4.0.jar:/Users/danlin/.m2/repository/org/reflections/reflections/0.9.10/reflections-0.9.10.jar:/Users/danlin/.m2/repository/com/google/code/findbugs/annotations/2.0.1/annotations-2.0.1.jar:/Users/danlin/.m2/repository/org/nd4j/canova-data-image/0.0.0.17/canova-data-image-0.0.0.17.jar:/Users/danlin/.m2/repository/com/github/jai-imageio/jai-imageio-core/1.3.0/jai-imageio-core-1.3.0.jar:/Users/danlin/.m2/repository/com/twelvemonkeys/imageio/imageio-jpeg/3.1.1/imageio-jpeg-3.1.1.jar:/Users/danlin/.m2/repository/com/twelvemonkeys/imageio/imageio-core/3.1.1/imageio-core-3.1.1.jar:/Users/danlin/.m2/repository/com/twelvemonkeys/imageio/imageio-metadata/3.1.1/imageio-metadata-3.1.1.jar:/Users/danlin/.m2/repository/com/twelvemonkeys/common/common-lang/3.1.1/common-lang-3.1.1.jar:/Users/danlin/.m2/repository/com/twelvemonkeys/common/common-io/3.1.1/common-io-3.1.1.jar:/Users/danlin/.m2/repository/com/twelvemonkeys/common/common-image/3.1.1/common-image-3.1.1.jar:/Users/danlin/.m2/repository/com/twelvemonkeys/imageio/imageio-tiff/3.1.1/imageio-tiff-3.1.1.jar:/Users/danlin/.m2/repository/com/twelvemonkeys/imageio/imageio-psd/3.1.1/imageio-psd-3.1.1.jar:/Users/danlin/.m2/repository/com/twelvemonkeys/imageio/imageio-bmp/3.1.1/imageio-bmp-3.1.1.jar:/Users/danlin/.m2/repository/org/bytedeco/javacv/1.2/javacv-1.2.jar:/Users/danlin/.m2/repository/org/bytedeco/javacpp-presets/opencv/3.1.0-1.2/opencv-3.1.0-1.2.jar:/Users/danlin/.m2/repository/org/bytedeco/javacpp-presets/opencv/3.1.0-1.2/opencv-3.1.0-1.2-macosx-x86_64.jar:/Users/danlin/.m2/repository/org/bytedeco/javacpp-presets/ffmpeg/3.0.2-1.2/ffmpeg-3.0.2-1.2.jar:/Users/danlin/.m2/repository/org/bytedeco/javacpp-presets/ffmpeg/3.0.2-1.2/ffmpeg-3.0.2-1.2-macosx-x86_64.jar:/Users/danlin/.m2/repository/org/bytedeco/javacpp-presets/leptonica/1.73-1.2/leptonica-1.73-1.2.jar:/Users/danlin/.m2/repository/org/bytedeco/javacpp-presets/leptonica/1.73-1.2/leptonica-1.73-1.2-macosx-x86_64.jar:/Users/danlin/.m2/repository/org/nd4j/canova-nd4j-codec/0.0.0.17/canova-nd4j-codec-0.0.0.17.jar:/Users/danlin/.m2/repository/org/jcodec/jcodec/0.1.5/jcodec-0.1.5.jar:/Users/danlin/.m2/repository/org/deeplearning4j/arbiter-deeplearning4j/0.4.1-SNAPSHOT/arbiter-deeplearning4j-0.4.1-SNAPSHOT.jar:/Users/danlin/.m2/repository/org/deeplearning4j/arbiter-core/0.4.1-SNAPSHOT/arbiter-core-0.4.1-SNAPSHOT.jar:/Users/danlin/.m2/repository/args4j/args4j/2.33/args4j-2.33.jar:/Users/danlin/.m2/repository/org/deeplearning4j/deeplearning4j-ui-components/0.4.1-SNAPSHOT/deeplearning4j-ui-components-0.4.1-SNAPSHOT.jar:/Users/danlin/.m2/repository/org/datavec/datavec-api/0.4.0/datavec-api-0.4.0.jar:/Users/danlin/.m2/repository/joda-time/joda-time/2.9.2/joda-time-2.9.2.jar:/Users/danlin/.m2/repository/org/freemarker/freemarker/2.3.23/freemarker-2.3.23.jar:/Users/danlin/.m2/repository/org/datavec/datavec-spark_2.10/0.4.0/datavec-spark_2.10-0.4.0.jar:/Users/danlin/.m2/repository/org/apache/spark/spark-core_2.11/1.5.1/spark-core_2.11-1.5.1.jar:/Users/danlin/.m2/repository/org/apache/avro/avro-mapred/1.7.7/avro-mapred-1.7.7-hadoop2.jar:/Users/danlin/.m2/repository/org/apache/avro/avro-ipc/1.7.7/avro-ipc-1.7.7.jar:/Users/danlin/.m2/repository/org/apache/avro/avro/1.7.7/avro-1.7.7.jar:/Users/danlin/.m2/repository/org/apache/avro/avro-ipc/1.7.7/avro-ipc-1.7.7-tests.jar:/Users/danlin/.m2/repository/org/codehaus/jackson/jackson-core-asl/1.9.13/jackson-core-asl-1.9.13.jar:/Users/danlin/.m2/repository/org/codehaus/jackson/jackson-mapper-asl/1.9.13/jackson-mapper-asl-1.9.13.jar:/Users/danlin/.m2/repository/com/twitter/chill_2.11/0.5.0/chill_2.11-0.5.0.jar:/Users/danlin/.m2/repository/com/esotericsoftware/kryo/kryo/2.21/kryo-2.21.jar:/Users/danlin/.m2/repository/com/esotericsoftware/reflectasm/reflectasm/1.07/reflectasm-1.07-shaded.jar:/Users/danlin/.m2/repository/com/esotericsoftware/minlog/minlog/1.2/minlog-1.2.jar:/Users/danlin/.m2/repository/org/objenesis/objenesis/1.2/objenesis-1.2.jar:/Users/danlin/.m2/repository/com/twitter/chill-java/0.5.0/chill-java-0.5.0.jar:/Users/danlin/.m2/repository/org/apache/hadoop/hadoop-client/2.2.0/hadoop-client-2.2.0.jar:/Users/danlin/.m2/repository/org/apache/hadoop/hadoop-common/2.2.0/hadoop-common-2.2.0.jar:/Users/danlin/.m2/repository/commons-cli/commons-cli/1.2/commons-cli-1.2.jar:/Users/danlin/.m2/repository/org/apache/commons/commons-math/2.1/commons-math-2.1.jar:/Users/danlin/.m2/repository/xmlenc/xmlenc/0.52/xmlenc-0.52.jar:/Users/danlin/.m2/repository/org/apache/hadoop/hadoop-auth/2.2.0/hadoop-auth-2.2.0.jar:/Users/danlin/.m2/repository/org/apache/hadoop/hadoop-hdfs/2.2.0/hadoop-hdfs-2.2.0.jar:/Users/danlin/.m2/repository/org/mortbay/jetty/jetty-util/6.1.26/jetty-util-6.1.26.jar:/Users/danlin/.m2/repository/org/apache/hadoop/hadoop-mapreduce-client-app/2.2.0/hadoop-mapreduce-client-app-2.2.0.jar:/Users/danlin/.m2/repository/org/apache/hadoop/hadoop-mapreduce-client-common/2.2.0/hadoop-mapreduce-client-common-2.2.0.jar:/Users/danlin/.m2/repository/org/apache/hadoop/hadoop-yarn-client/2.2.0/hadoop-yarn-client-2.2.0.jar:/Users/danlin/.m2/repository/com/google/inject/guice/3.0/guice-3.0.jar:/Users/danlin/.m2/repository/javax/inject/javax.inject/1/javax.inject-1.jar:/Users/danlin/.m2/repository/com/sun/jersey/jersey-test-framework/jersey-test-framework-grizzly2/1.9/jersey-test-framework-grizzly2-1.9.jar:/Users/danlin/.m2/repository/com/sun/jersey/jersey-test-framework/jersey-test-framework-core/1.9/jersey-test-framework-core-1.9.jar:/Users/danlin/.m2/repository/com/sun/jersey/jersey-client/1.9/jersey-client-1.9.jar:/Users/danlin/.m2/repository/com/sun/jersey/jersey-grizzly2/1.9/jersey-grizzly2-1.9.jar:/Users/danlin/.m2/repository/org/glassfish/grizzly/grizzly-http/2.1.2/grizzly-http-2.1.2.jar:/Users/danlin/.m2/repository/org/glassfish/grizzly/grizzly-framework/2.1.2/grizzly-framework-2.1.2.jar:/Users/danlin/.m2/repository/org/glassfish/gmbal/gmbal-api-only/3.0.0-b023/gmbal-api-only-3.0.0-b023.jar:/Users/danlin/.m2/repository/org/glassfish/external/management-api/3.0.0-b012/management-api-3.0.0-b012.jar:/Users/danlin/.m2/repository/org/glassfish/grizzly/grizzly-http-server/2.1.2/grizzly-http-server-2.1.2.jar:/Users/danlin/.m2/repository/org/glassfish/grizzly/grizzly-rcm/2.1.2/grizzly-rcm-2.1.2.jar:/Users/danlin/.m2/repository/org/glassfish/grizzly/grizzly-http-servlet/2.1.2/grizzly-http-servlet-2.1.2.jar:/Users/danlin/.m2/repository/org/glassfish/javax.servlet/3.1/javax.servlet-3.1.jar:/Users/danlin/.m2/repository/com/sun/jersey/jersey-json/1.9/jersey-json-1.9.jar:/Users/danlin/.m2/repository/org/codehaus/jettison/jettison/1.1/jettison-1.1.jar:/Users/danlin/.m2/repository/stax/stax-api/1.0.1/stax-api-1.0.1.jar:/Users/danlin/.m2/repository/com/sun/xml/bind/jaxb-impl/2.2.3-1/jaxb-impl-2.2.3-1.jar:/Users/danlin/.m2/repository/javax/xml/bind/jaxb-api/2.2.2/jaxb-api-2.2.2.jar:/Users/danlin/.m2/repository/javax/activation/activation/1.1/activation-1.1.jar:/Users/danlin/.m2/repository/org/codehaus/jackson/jackson-jaxrs/1.8.3/jackson-jaxrs-1.8.3.jar:/Users/danlin/.m2/repository/org/codehaus/jackson/jackson-xc/1.8.3/jackson-xc-1.8.3.jar:/Users/danlin/.m2/repository/com/sun/jersey/contribs/jersey-guice/1.9/jersey-guice-1.9.jar:/Users/danlin/.m2/repository/org/apache/hadoop/hadoop-yarn-server-common/2.2.0/hadoop-yarn-server-common-2.2.0.jar:/Users/danlin/.m2/repository/org/apache/hadoop/hadoop-mapreduce-client-shuffle/2.2.0/hadoop-mapreduce-client-shuffle-2.2.0.jar:/Users/danlin/.m2/repository/org/apache/hadoop/hadoop-yarn-api/2.2.0/hadoop-yarn-api-2.2.0.jar:/Users/danlin/.m2/repository/org/apache/hadoop/hadoop-mapreduce-client-core/2.2.0/hadoop-mapreduce-client-core-2.2.0.jar:/Users/danlin/.m2/repository/org/apache/hadoop/hadoop-yarn-common/2.2.0/hadoop-yarn-common-2.2.0.jar:/Users/danlin/.m2/repository/org/apache/hadoop/hadoop-mapreduce-client-jobclient/2.2.0/hadoop-mapreduce-client-jobclient-2.2.0.jar:/Users/danlin/.m2/repository/org/apache/hadoop/hadoop-annotations/2.2.0/hadoop-annotations-2.2.0.jar:/Users/danlin/.m2/repository/org/apache/spark/spark-launcher_2.11/1.5.1/spark-launcher_2.11-1.5.1.jar:/Users/danlin/.m2/repository/org/apache/spark/spark-network-common_2.11/1.5.1/spark-network-common_2.11-1.5.1.jar:/Users/danlin/.m2/repository/org/apache/spark/spark-network-shuffle_2.11/1.5.1/spark-network-shuffle_2.11-1.5.1.jar:/Users/danlin/.m2/repository/org/apache/spark/spark-unsafe_2.11/1.5.1/spark-unsafe_2.11-1.5.1.jar:/Users/danlin/.m2/repository/net/java/dev/jets3t/jets3t/0.7.1/jets3t-0.7.1.jar:/Users/danlin/.m2/repository/commons-httpclient/commons-httpclient/3.1/commons-httpclient-3.1.jar:/Users/danlin/.m2/repository/org/apache/curator/curator-recipes/2.4.0/curator-recipes-2.4.0.jar:/Users/danlin/.m2/repository/org/apache/curator/curator-framework/2.4.0/curator-framework-2.4.0.jar:/Users/danlin/.m2/repository/org/apache/zookeeper/zookeeper/3.4.5/zookeeper-3.4.5.jar:/Users/danlin/.m2/repository/jline/jline/0.9.94/jline-0.9.94.jar:/Users/danlin/.m2/repository/org/eclipse/jetty/orbit/javax.servlet/3.0.0.v201112011016/javax.servlet-3.0.0.v201112011016.jar:/Users/danlin/.m2/repository/com/google/code/findbugs/jsr305/1.3.9/jsr305-1.3.9.jar:/Users/danlin/.m2/repository/org/slf4j/jul-to-slf4j/1.7.10/jul-to-slf4j-1.7.10.jar:/Users/danlin/.m2/repository/org/slf4j/jcl-over-slf4j/1.7.10/jcl-over-slf4j-1.7.10.jar:/Users/danlin/.m2/repository/com/ning/compress-lzf/1.0.3/compress-lzf-1.0.3.jar:/Users/danlin/.m2/repository/org/xerial/snappy/snappy-java/1.1.1.7/snappy-java-1.1.1.7.jar:/Users/danlin/.m2/repository/net/jpountz/lz4/lz4/1.3.0/lz4-1.3.0.jar:/Users/danlin/.m2/repository/org/roaringbitmap/RoaringBitmap/0.4.5/RoaringBitmap-0.4.5.jar:/Users/danlin/.m2/repository/commons-net/commons-net/2.2/commons-net-2.2.jar:/Users/danlin/.m2/repository/com/typesafe/akka/akka-remote_2.11/2.3.11/akka-remote_2.11-2.3.11.jar:/Users/danlin/.m2/repository/com/typesafe/akka/akka-actor_2.11/2.3.11/akka-actor_2.11-2.3.11.jar:/Users/danlin/.m2/repository/com/typesafe/config/1.2.1/config-1.2.1.jar:/Users/danlin/.m2/repository/com/google/protobuf/protobuf-java/2.5.0/protobuf-java-2.5.0.jar:/Users/danlin/.m2/repository/org/uncommons/maths/uncommons-maths/1.2.2a/uncommons-maths-1.2.2a.jar:/Users/danlin/.m2/repository/com/typesafe/akka/akka-slf4j_2.11/2.3.11/akka-slf4j_2.11-2.3.11.jar:/Users/danlin/.m2/repository/org/scala-lang/scala-library/2.11.7/scala-library-2.11.7.jar:/Users/danlin/.m2/repository/org/json4s/json4s-jackson_2.11/3.2.10/json4s-jackson_2.11-3.2.10.jar:/Users/danlin/.m2/repository/org/json4s/json4s-core_2.11/3.2.10/json4s-core_2.11-3.2.10.jar:/Users/danlin/.m2/repository/org/json4s/json4s-ast_2.11/3.2.10/json4s-ast_2.11-3.2.10.jar:/Users/danlin/.m2/repository/org/scala-lang/scalap/2.11.0/scalap-2.11.0.jar:/Users/danlin/.m2/repository/org/scala-lang/scala-compiler/2.11.0/scala-compiler-2.11.0.jar:/Users/danlin/.m2/repository/org/scala-lang/modules/scala-xml_2.11/1.0.1/scala-xml_2.11-1.0.1.jar:/Users/danlin/.m2/repository/org/scala-lang/modules/scala-parser-combinators_2.11/1.0.1/scala-parser-combinators_2.11-1.0.1.jar:/Users/danlin/.m2/repository/com/sun/jersey/jersey-server/1.9/jersey-server-1.9.jar:/Users/danlin/.m2/repository/asm/asm/3.1/asm-3.1.jar:/Users/danlin/.m2/repository/com/sun/jersey/jersey-core/1.9/jersey-core-1.9.jar:/Users/danlin/.m2/repository/org/apache/mesos/mesos/0.21.1/mesos-0.21.1-shaded-protobuf.jar:/Users/danlin/.m2/repository/io/netty/netty-all/4.0.29.Final/netty-all-4.0.29.Final.jar:/Users/danlin/.m2/repository/com/clearspring/analytics/stream/2.7.0/stream-2.7.0.jar:/Users/danlin/.m2/repository/io/dropwizard/metrics/metrics-core/3.1.2/metrics-core-3.1.2.jar:/Users/danlin/.m2/repository/io/dropwizard/metrics/metrics-jvm/3.1.2/metrics-jvm-3.1.2.jar:/Users/danlin/.m2/repository/io/dropwizard/metrics/metrics-json/3.1.2/metrics-json-3.1.2.jar:/Users/danlin/.m2/repository/io/dropwizard/metrics/metrics-graphite/3.1.2/metrics-graphite-3.1.2.jar:/Users/danlin/.m2/repository/com/fasterxml/jackson/module/jackson-module-scala_2.11/2.4.4/jackson-module-scala_2.11-2.4.4.jar:/Users/danlin/.m2/repository/org/scala-lang/scala-reflect/2.11.2/scala-reflect-2.11.2.jar:/Users/danlin/.m2/repository/com/thoughtworks/paranamer/paranamer/2.6/paranamer-2.6.jar:/Users/danlin/.m2/repository/org/apache/ivy/ivy/2.4.0/ivy-2.4.0.jar:/Users/danlin/.m2/repository/oro/oro/2.0.8/oro-2.0.8.jar:/Users/danlin/.m2/repository/org/tachyonproject/tachyon-client/0.7.1/tachyon-client-0.7.1.jar:/Users/danlin/.m2/repository/org/apache/curator/curator-client/2.1.0-incubating/curator-client-2.1.0-incubating.jar:/Users/danlin/.m2/repository/org/tachyonproject/tachyon-underfs-hdfs/0.7.1/tachyon-underfs-hdfs-0.7.1.jar:/Users/danlin/.m2/repository/org/tachyonproject/tachyon-underfs-local/0.7.1/tachyon-underfs-local-0.7.1.jar:/Users/danlin/.m2/repository/net/razorvine/pyrolite/4.4/pyrolite-4.4.jar:/Users/danlin/.m2/repository/net/sf/py4j/py4j/0.8.2.1/py4j-0.8.2.1.jar:/Users/danlin/.m2/repository/org/apache/spark/spark-mllib_2.10/2.0.0/spark-mllib_2.10-2.0.0.jar:/Users/danlin/.m2/repository/org/apache/spark/spark-core_2.10/2.0.0/spark-core_2.10-2.0.0.jar:/Users/danlin/.m2/repository/com/twitter/chill_2.10/0.8.0/chill_2.10-0.8.0.jar:/Users/danlin/.m2/repository/com/esotericsoftware/kryo-shaded/3.0.3/kryo-shaded-3.0.3.jar:/Users/danlin/.m2/repository/com/esotericsoftware/minlog/1.3.0/minlog-1.3.0.jar:/Users/danlin/.m2/repository/org/apache/xbean/xbean-asm5-shaded/4.4/xbean-asm5-shaded-4.4.jar:/Users/danlin/.m2/repository/org/apache/spark/spark-launcher_2.10/2.0.0/spark-launcher_2.10-2.0.0.jar:/Users/danlin/.m2/repository/org/apache/spark/spark-network-common_2.10/2.0.0/spark-network-common_2.10-2.0.0.jar:/Users/danlin/.m2/repository/org/apache/spark/spark-network-shuffle_2.10/2.0.0/spark-network-shuffle_2.10-2.0.0.jar:/Users/danlin/.m2/repository/org/fusesource/leveldbjni/leveldbjni-all/1.8/leveldbjni-all-1.8.jar:/Users/danlin/.m2/repository/org/apache/spark/spark-unsafe_2.10/2.0.0/spark-unsafe_2.10-2.0.0.jar:/Users/danlin/.m2/repository/javax/servlet/javax.servlet-api/3.1.0/javax.servlet-api-3.1.0.jar:/Users/danlin/.m2/repository/org/json4s/json4s-jackson_2.10/3.2.11/json4s-jackson_2.10-3.2.11.jar:/Users/danlin/.m2/repository/org/json4s/json4s-core_2.10/3.2.11/json4s-core_2.10-3.2.11.jar:/Users/danlin/.m2/repository/org/json4s/json4s-ast_2.10/3.2.11/json4s-ast_2.10-3.2.11.jar:/Users/danlin/.m2/repository/org/glassfish/jersey/core/jersey-server/2.22.2/jersey-server-2.22.2.jar:/Users/danlin/.m2/repository/org/glassfish/jersey/media/jersey-media-jaxb/2.22.2/jersey-media-jaxb-2.22.2.jar:/Users/danlin/.m2/repository/javax/validation/validation-api/1.1.0.Final/validation-api-1.1.0.Final.jar:/Users/danlin/.m2/repository/org/glassfish/jersey/containers/jersey-container-servlet/2.22.2/jersey-container-servlet-2.22.2.jar:/Users/danlin/.m2/repository/org/glassfish/jersey/containers/jersey-container-servlet-core/2.22.2/jersey-container-servlet-core-2.22.2.jar:/Users/danlin/.m2/repository/io/netty/netty/3.8.0.Final/netty-3.8.0.Final.jar:/Users/danlin/.m2/repository/com/fasterxml/jackson/module/jackson-module-scala_2.10/2.6.5/jackson-module-scala_2.10-2.6.5.jar:/Users/danlin/.m2/repository/com/fasterxml/jackson/module/jackson-module-paranamer/2.6.5/jackson-module-paranamer-2.6.5.jar:/Users/danlin/.m2/repository/org/apache/spark/spark-streaming_2.10/2.0.0/spark-streaming_2.10-2.0.0.jar:/Users/danlin/.m2/repository/org/apache/spark/spark-sql_2.10/2.0.0/spark-sql_2.10-2.0.0.jar:/Users/danlin/.m2/repository/com/univocity/univocity-parsers/2.1.1/univocity-parsers-2.1.1.jar:/Users/danlin/.m2/repository/org/apache/spark/spark-sketch_2.10/2.0.0/spark-sketch_2.10-2.0.0.jar:/Users/danlin/.m2/repository/org/apache/spark/spark-catalyst_2.10/2.0.0/spark-catalyst_2.10-2.0.0.jar:/Users/danlin/.m2/repository/org/codehaus/janino/janino/2.7.8/janino-2.7.8.jar:/Users/danlin/.m2/repository/org/codehaus/janino/commons-compiler/2.7.8/commons-compiler-2.7.8.jar:/Users/danlin/.m2/repository/org/antlr/antlr4-runtime/4.5.3/antlr4-runtime-4.5.3.jar:/Users/danlin/.m2/repository/org/apache/parquet/parquet-column/1.7.0/parquet-column-1.7.0.jar:/Users/danlin/.m2/repository/org/apache/parquet/parquet-common/1.7.0/parquet-common-1.7.0.jar:/Users/danlin/.m2/repository/org/apache/parquet/parquet-encoding/1.7.0/parquet-encoding-1.7.0.jar:/Users/danlin/.m2/repository/org/apache/parquet/parquet-generator/1.7.0/parquet-generator-1.7.0.jar:/Users/danlin/.m2/repository/org/apache/parquet/parquet-hadoop/1.7.0/parquet-hadoop-1.7.0.jar:/Users/danlin/.m2/repository/org/apache/parquet/parquet-format/2.3.0-incubating/parquet-format-2.3.0-incubating.jar:/Users/danlin/.m2/repository/org/apache/parquet/parquet-jackson/1.7.0/parquet-jackson-1.7.0.jar:/Users/danlin/.m2/repository/org/apache/spark/spark-graphx_2.10/2.0.0/spark-graphx_2.10-2.0.0.jar:/Users/danlin/.m2/repository/com/github/fommil/netlib/core/1.1.2/core-1.1.2.jar:/Users/danlin/.m2/repository/net/sourceforge/f2j/arpack_combined_all/0.1/arpack_combined_all-0.1.jar:/Users/danlin/.m2/repository/org/apache/spark/spark-mllib-local_2.10/2.0.0/spark-mllib-local_2.10-2.0.0.jar:/Users/danlin/.m2/repository/org/scalanlp/breeze_2.10/0.11.2/breeze_2.10-0.11.2.jar:/Users/danlin/.m2/repository/org/scalanlp/breeze-macros_2.10/0.11.2/breeze-macros_2.10-0.11.2.jar:/Users/danlin/.m2/repository/org/scalamacros/quasiquotes_2.10/2.0.0-M8/quasiquotes_2.10-2.0.0-M8.jar:/Users/danlin/.m2/repository/net/sf/opencsv/opencsv/2.3/opencsv-2.3.jar:/Users/danlin/.m2/repository/com/github/rwl/jtransforms/2.4.0/jtransforms-2.4.0.jar:/Users/danlin/.m2/repository/org/spire-math/spire_2.10/0.7.4/spire_2.10-0.7.4.jar:/Users/danlin/.m2/repository/org/spire-math/spire-macros_2.10/0.7.4/spire-macros_2.10-0.7.4.jar:/Users/danlin/.m2/repository/org/jpmml/pmml-model/1.2.15/pmml-model-1.2.15.jar:/Users/danlin/.m2/repository/org/jpmml/pmml-schema/1.2.15/pmml-schema-1.2.15.jar:/Users/danlin/.m2/repository/org/apache/spark/spark-tags_2.10/2.0.0/spark-tags_2.10-2.0.0.jar:/Users/danlin/.m2/repository/org/scalatest/scalatest_2.10/2.2.6/scalatest_2.10-2.2.6.jar:/Users/danlin/.m2/repository/org/spark-project/spark/unused/1.0.0/unused-1.0.0.jar:/Applications/IntelliJ IDEA CE.app/Contents/lib/idea_rt.jar" com.intellij.rt.execution.application.AppMain kaggle.Analysis
16/08/03 21:25:08 INFO o.a.s.SparkContext: Running Spark version 1.5.1
16/08/03 21:25:10 INFO o.a.s.SecurityManager: Changing view acls to: danlin
16/08/03 21:25:10 INFO o.a.s.SecurityManager: Changing modify acls to: danlin
16/08/03 21:25:10 INFO o.a.s.SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(danlin); users with modify permissions: Set(danlin)
16/08/03 21:25:15 INFO o.a.s.u.Utils: Successfully started service 'sparkDriver' on port 50596.
16/08/03 21:25:16 INFO o.a.s.SparkEnv: Registering MapOutputTracker
16/08/03 21:25:16 INFO o.a.s.SparkEnv: Registering BlockManagerMaster
16/08/03 21:25:16 INFO o.a.s.s.DiskBlockManager: Created local directory at /private/var/folders/tk/2t_ll0bs5ml458g35b118h3r0000gp/T/blockmgr-561b0c3b-e564-49b8-b471-2011a154666f
16/08/03 21:25:16 INFO o.a.s.s.MemoryStore: MemoryStore started with capacity 983.1 MB
16/08/03 21:25:16 INFO o.a.s.HttpFileServer: HTTP File server directory is /private/var/folders/tk/2t_ll0bs5ml458g35b118h3r0000gp/T/spark-2a892f8e-3eb9-4a60-ba0c-fb661129ba9e/httpd-61969d90-88f9-4b68-8817-adb98523e201
16/08/03 21:25:16 INFO o.a.s.HttpServer: Starting HTTP Server
16/08/03 21:25:16 INFO o.a.s.u.Utils: Successfully started service 'HTTP file server' on port 50597.
16/08/03 21:25:17 INFO o.a.s.SparkEnv: Registering OutputCommitCoordinator
16/08/03 21:25:17 INFO o.a.s.u.Utils: Successfully started service 'SparkUI' on port 4040.
16/08/03 21:25:17 INFO o.a.s.u.SparkUI: Started SparkUI at http://192.168.0.5:4040
16/08/03 21:25:18 WARN o.a.s.m.MetricsSystem: Using default name DAGScheduler for source because spark.app.id is not set.
16/08/03 21:25:18 INFO o.a.s.e.Executor: Starting executor ID driver on host localhost
16/08/03 21:25:19 INFO o.a.s.u.Utils: Successfully started service 'org.apache.spark.network.netty.NettyBlockTransferService' on port 50602.
16/08/03 21:25:19 INFO o.a.s.n.n.NettyBlockTransferService: Server created on 50602
16/08/03 21:25:19 INFO o.a.s.s.BlockManagerMaster: Trying to register BlockManager
16/08/03 21:25:19 INFO o.a.s.s.BlockManagerMasterEndpoint: Registering block manager localhost:50602 with 983.1 MB RAM, BlockManagerId(driver, localhost, 50602)
16/08/03 21:25:19 INFO o.a.s.s.BlockManagerMaster: Registered BlockManager
16/08/03 21:25:20 INFO o.a.s.s.MemoryStore: ensureFreeSpace(110248) called with curMem=0, maxMem=1030823608
16/08/03 21:25:20 INFO o.a.s.s.MemoryStore: Block broadcast_0 stored as values in memory (estimated size 107.7 KB, free 983.0 MB)
16/08/03 21:25:20 INFO o.a.s.s.MemoryStore: ensureFreeSpace(10065) called with curMem=110248, maxMem=1030823608
16/08/03 21:25:20 INFO o.a.s.s.MemoryStore: Block broadcast_0_piece0 stored as bytes in memory (estimated size 9.8 KB, free 983.0 MB)
16/08/03 21:25:20 INFO o.a.s.s.BlockManagerInfo: Added broadcast_0_piece0 in memory on localhost:50602 (size: 9.8 KB, free: 983.1 MB)
16/08/03 21:25:20 INFO o.a.s.SparkContext: Created broadcast 0 from textFile at Analysis.java:85
16/08/03 21:25:22 INFO o.a.s.SparkContext: Starting job: countByValue at Analysis.java:95
16/08/03 21:25:22 INFO o.a.s.s.DAGScheduler: Registering RDD 5 (countByValue at Analysis.java:95)
16/08/03 21:25:22 INFO o.a.s.s.DAGScheduler: Got job 0 (countByValue at Analysis.java:95) with 2 output partitions
16/08/03 21:25:22 INFO o.a.s.s.DAGScheduler: Final stage: ResultStage 1(countByValue at Analysis.java:95)
16/08/03 21:25:22 INFO o.a.s.s.DAGScheduler: Parents of final stage: List(ShuffleMapStage 0)
16/08/03 21:25:22 INFO o.a.s.s.DAGScheduler: Missing parents: List(ShuffleMapStage 0)
16/08/03 21:25:22 INFO o.a.s.s.DAGScheduler: Submitting ShuffleMapStage 0 (MapPartitionsRDD[5] at countByValue at Analysis.java:95), which has no missing parents
16/08/03 21:25:22 INFO o.a.s.s.MemoryStore: ensureFreeSpace(5968) called with curMem=120313, maxMem=1030823608
16/08/03 21:25:22 INFO o.a.s.s.MemoryStore: Block broadcast_1 stored as values in memory (estimated size 5.8 KB, free 982.9 MB)
16/08/03 21:25:22 INFO o.a.s.s.MemoryStore: ensureFreeSpace(3333) called with curMem=126281, maxMem=1030823608
16/08/03 21:25:22 INFO o.a.s.s.MemoryStore: Block broadcast_1_piece0 stored as bytes in memory (estimated size 3.3 KB, free 982.9 MB)
16/08/03 21:25:22 INFO o.a.s.s.BlockManagerInfo: Added broadcast_1_piece0 in memory on localhost:50602 (size: 3.3 KB, free: 983.1 MB)
16/08/03 21:25:22 INFO o.a.s.SparkContext: Created broadcast 1 from broadcast at DAGScheduler.scala:861
16/08/03 21:25:22 INFO o.a.s.s.DAGScheduler: Submitting 2 missing tasks from ShuffleMapStage 0 (MapPartitionsRDD[5] at countByValue at Analysis.java:95)
16/08/03 21:25:22 INFO o.a.s.s.TaskSchedulerImpl: Adding task set 0.0 with 2 tasks
16/08/03 21:25:22 INFO o.a.s.s.TaskSetManager: Starting task 0.0 in stage 0.0 (TID 0, localhost, PROCESS_LOCAL, 2081 bytes)
16/08/03 21:25:22 INFO o.a.s.s.TaskSetManager: Starting task 1.0 in stage 0.0 (TID 1, localhost, PROCESS_LOCAL, 2081 bytes)
16/08/03 21:25:22 INFO o.a.s.e.Executor: Running task 0.0 in stage 0.0 (TID 0)
16/08/03 21:25:22 INFO o.a.s.e.Executor: Running task 1.0 in stage 0.0 (TID 1)
16/08/03 21:25:22 INFO o.a.s.r.HadoopRDD: Input split: file:/Users/danlin/git/dl4j-lab/src/main/resources/train_mod.csv:0+30111
16/08/03 21:25:22 INFO o.a.s.r.HadoopRDD: Input split: file:/Users/danlin/git/dl4j-lab/src/main/resources/train_mod.csv:30111+30111
16/08/03 21:25:22 INFO o.a.s.e.Executor: Finished task 1.0 in stage 0.0 (TID 1). 2254 bytes result sent to driver
16/08/03 21:25:22 INFO o.a.s.e.Executor: Finished task 0.0 in stage 0.0 (TID 0). 2254 bytes result sent to driver
16/08/03 21:25:22 INFO o.a.s.s.TaskSetManager: Finished task 1.0 in stage 0.0 (TID 1) in 482 ms on localhost (1/2)
16/08/03 21:25:22 INFO o.a.s.s.TaskSetManager: Finished task 0.0 in stage 0.0 (TID 0) in 544 ms on localhost (2/2)
16/08/03 21:25:22 INFO o.a.s.s.TaskSchedulerImpl: Removed TaskSet 0.0, whose tasks have all completed, from pool
16/08/03 21:25:22 INFO o.a.s.s.DAGScheduler: ShuffleMapStage 0 (countByValue at Analysis.java:95) finished in 0,587 s
16/08/03 21:25:22 INFO o.a.s.s.DAGScheduler: looking for newly runnable stages
16/08/03 21:25:22 INFO o.a.s.s.DAGScheduler: running: Set()
16/08/03 21:25:22 INFO o.a.s.s.DAGScheduler: waiting: Set(ResultStage 1)
16/08/03 21:25:22 INFO o.a.s.s.DAGScheduler: failed: Set()
16/08/03 21:25:22 INFO o.a.s.s.DAGScheduler: Missing parents for ResultStage 1: List()
16/08/03 21:25:22 INFO o.a.s.s.DAGScheduler: Submitting ResultStage 1 (ShuffledRDD[6] at countByValue at Analysis.java:95), which is now runnable
16/08/03 21:25:22 INFO o.a.s.s.MemoryStore: ensureFreeSpace(2256) called with curMem=129614, maxMem=1030823608
16/08/03 21:25:22 INFO o.a.s.s.MemoryStore: Block broadcast_2 stored as values in memory (estimated size 2.2 KB, free 982.9 MB)
16/08/03 21:25:22 INFO o.a.s.s.MemoryStore: ensureFreeSpace(1328) called with curMem=131870, maxMem=1030823608
16/08/03 21:25:22 INFO o.a.s.s.MemoryStore: Block broadcast_2_piece0 stored as bytes in memory (estimated size 1328.0 B, free 982.9 MB)
16/08/03 21:25:22 INFO o.a.s.s.BlockManagerInfo: Added broadcast_2_piece0 in memory on localhost:50602 (size: 1328.0 B, free: 983.1 MB)
16/08/03 21:25:22 INFO o.a.s.SparkContext: Created broadcast 2 from broadcast at DAGScheduler.scala:861
16/08/03 21:25:22 INFO o.a.s.s.DAGScheduler: Submitting 2 missing tasks from ResultStage 1 (ShuffledRDD[6] at countByValue at Analysis.java:95)
16/08/03 21:25:22 INFO o.a.s.s.TaskSchedulerImpl: Adding task set 1.0 with 2 tasks
16/08/03 21:25:22 INFO o.a.s.s.TaskSetManager: Starting task 0.0 in stage 1.0 (TID 2, localhost, PROCESS_LOCAL, 1820 bytes)
16/08/03 21:25:22 INFO o.a.s.s.TaskSetManager: Starting task 1.0 in stage 1.0 (TID 3, localhost, PROCESS_LOCAL, 1820 bytes)
16/08/03 21:25:22 INFO o.a.s.e.Executor: Running task 0.0 in stage 1.0 (TID 2)
16/08/03 21:25:22 INFO o.a.s.e.Executor: Running task 1.0 in stage 1.0 (TID 3)
16/08/03 21:25:22 INFO o.a.s.s.ShuffleBlockFetcherIterator: Getting 2 non-empty blocks out of 2 blocks
16/08/03 21:25:22 INFO o.a.s.s.ShuffleBlockFetcherIterator: Started 0 remote fetches in 7 ms
16/08/03 21:25:22 INFO o.a.s.s.ShuffleBlockFetcherIterator: Getting 2 non-empty blocks out of 2 blocks
16/08/03 21:25:22 INFO o.a.s.s.ShuffleBlockFetcherIterator: Started 0 remote fetches in 10 ms
16/08/03 21:25:22 INFO o.a.s.e.Executor: Finished task 1.0 in stage 1.0 (TID 3). 1402 bytes result sent to driver
16/08/03 21:25:22 INFO o.a.s.e.Executor: Finished task 0.0 in stage 1.0 (TID 2). 1485 bytes result sent to driver
16/08/03 21:25:22 INFO o.a.s.s.TaskSetManager: Finished task 1.0 in stage 1.0 (TID 3) in 104 ms on localhost (1/2)
16/08/03 21:25:22 INFO o.a.s.s.TaskSetManager: Finished task 0.0 in stage 1.0 (TID 2) in 108 ms on localhost (2/2)
16/08/03 21:25:22 INFO o.a.s.s.DAGScheduler: ResultStage 1 (countByValue at Analysis.java:95) finished in 0,109 s
16/08/03 21:25:22 INFO o.a.s.s.TaskSchedulerImpl: Removed TaskSet 1.0, whose tasks have all completed, from pool
16/08/03 21:25:22 INFO o.a.s.s.DAGScheduler: Job 0 finished: countByValue at Analysis.java:95, took 0,929643 s
Q : 77
C : 168
S : 644
: 2
16/08/03 21:25:22 INFO o.a.s.SparkContext: Starting job: reduce at Analysis.java:99
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Got job 1 (reduce at Analysis.java:99) with 2 output partitions
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Final stage: ResultStage 2(reduce at Analysis.java:99)
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Parents of final stage: List()
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Missing parents: List()
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Submitting ResultStage 2 (MapPartitionsRDD[9] at map at Analysis.java:99), which has no missing parents
16/08/03 21:25:23 INFO o.a.s.s.MemoryStore: ensureFreeSpace(5728) called with curMem=133198, maxMem=1030823608
16/08/03 21:25:23 INFO o.a.s.s.MemoryStore: Block broadcast_3 stored as values in memory (estimated size 5.6 KB, free 982.9 MB)
16/08/03 21:25:23 INFO o.a.s.s.MemoryStore: ensureFreeSpace(3100) called with curMem=138926, maxMem=1030823608
16/08/03 21:25:23 INFO o.a.s.s.MemoryStore: Block broadcast_3_piece0 stored as bytes in memory (estimated size 3.0 KB, free 982.9 MB)
16/08/03 21:25:23 INFO o.a.s.s.BlockManagerInfo: Added broadcast_3_piece0 in memory on localhost:50602 (size: 3.0 KB, free: 983.1 MB)
16/08/03 21:25:23 INFO o.a.s.SparkContext: Created broadcast 3 from broadcast at DAGScheduler.scala:861
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Submitting 2 missing tasks from ResultStage 2 (MapPartitionsRDD[9] at map at Analysis.java:99)
16/08/03 21:25:23 INFO o.a.s.s.TaskSchedulerImpl: Adding task set 2.0 with 2 tasks
16/08/03 21:25:23 INFO o.a.s.s.TaskSetManager: Starting task 0.0 in stage 2.0 (TID 4, localhost, PROCESS_LOCAL, 2092 bytes)
16/08/03 21:25:23 INFO o.a.s.s.TaskSetManager: Starting task 1.0 in stage 2.0 (TID 5, localhost, PROCESS_LOCAL, 2092 bytes)
16/08/03 21:25:23 INFO o.a.s.e.Executor: Running task 0.0 in stage 2.0 (TID 4)
16/08/03 21:25:23 INFO o.a.s.e.Executor: Running task 1.0 in stage 2.0 (TID 5)
16/08/03 21:25:23 INFO o.a.s.r.HadoopRDD: Input split: file:/Users/danlin/git/dl4j-lab/src/main/resources/train_mod.csv:0+30111
16/08/03 21:25:23 INFO o.a.s.r.HadoopRDD: Input split: file:/Users/danlin/git/dl4j-lab/src/main/resources/train_mod.csv:30111+30111
16/08/03 21:25:23 INFO o.a.s.e.Executor: Finished task 1.0 in stage 2.0 (TID 5). 2163 bytes result sent to driver
16/08/03 21:25:23 INFO o.a.s.e.Executor: Finished task 0.0 in stage 2.0 (TID 4). 2163 bytes result sent to driver
16/08/03 21:25:23 INFO o.a.s.s.TaskSetManager: Finished task 1.0 in stage 2.0 (TID 5) in 27 ms on localhost (1/2)
16/08/03 21:25:23 INFO o.a.s.s.TaskSetManager: Finished task 0.0 in stage 2.0 (TID 4) in 29 ms on localhost (2/2)
16/08/03 21:25:23 INFO o.a.s.s.TaskSchedulerImpl: Removed TaskSet 2.0, whose tasks have all completed, from pool
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: ResultStage 2 (reduce at Analysis.java:99) finished in 0,030 s
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Job 1 finished: reduce at Analysis.java:99, took 0,043801 s
16/08/03 21:25:23 INFO o.a.s.SparkContext: Starting job: count at Analysis.java:100
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Got job 2 (count at Analysis.java:100) with 2 output partitions
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Final stage: ResultStage 3(count at Analysis.java:100)
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Parents of final stage: List()
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Missing parents: List()
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Submitting ResultStage 3 (MapPartitionsRDD[11] at filter at Analysis.java:100), which has no missing parents
16/08/03 21:25:23 INFO o.a.s.s.MemoryStore: ensureFreeSpace(5112) called with curMem=142026, maxMem=1030823608
16/08/03 21:25:23 INFO o.a.s.s.MemoryStore: Block broadcast_4 stored as values in memory (estimated size 5.0 KB, free 982.9 MB)
16/08/03 21:25:23 INFO o.a.s.s.MemoryStore: ensureFreeSpace(2954) called with curMem=147138, maxMem=1030823608
16/08/03 21:25:23 INFO o.a.s.s.MemoryStore: Block broadcast_4_piece0 stored as bytes in memory (estimated size 2.9 KB, free 982.9 MB)
16/08/03 21:25:23 INFO o.a.s.s.BlockManagerInfo: Added broadcast_4_piece0 in memory on localhost:50602 (size: 2.9 KB, free: 983.1 MB)
16/08/03 21:25:23 INFO o.a.s.SparkContext: Created broadcast 4 from broadcast at DAGScheduler.scala:861
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Submitting 2 missing tasks from ResultStage 3 (MapPartitionsRDD[11] at filter at Analysis.java:100)
16/08/03 21:25:23 INFO o.a.s.s.TaskSchedulerImpl: Adding task set 3.0 with 2 tasks
16/08/03 21:25:23 INFO o.a.s.s.TaskSetManager: Starting task 0.0 in stage 3.0 (TID 6, localhost, PROCESS_LOCAL, 2092 bytes)
16/08/03 21:25:23 INFO o.a.s.s.TaskSetManager: Starting task 1.0 in stage 3.0 (TID 7, localhost, PROCESS_LOCAL, 2092 bytes)
16/08/03 21:25:23 INFO o.a.s.e.Executor: Running task 1.0 in stage 3.0 (TID 7)
16/08/03 21:25:23 INFO o.a.s.e.Executor: Running task 0.0 in stage 3.0 (TID 6)
16/08/03 21:25:23 INFO o.a.s.r.HadoopRDD: Input split: file:/Users/danlin/git/dl4j-lab/src/main/resources/train_mod.csv:0+30111
16/08/03 21:25:23 INFO o.a.s.r.HadoopRDD: Input split: file:/Users/danlin/git/dl4j-lab/src/main/resources/train_mod.csv:30111+30111
16/08/03 21:25:23 INFO o.a.s.e.Executor: Finished task 1.0 in stage 3.0 (TID 7). 2082 bytes result sent to driver
16/08/03 21:25:23 INFO o.a.s.e.Executor: Finished task 0.0 in stage 3.0 (TID 6). 2082 bytes result sent to driver
16/08/03 21:25:23 INFO o.a.s.s.TaskSetManager: Finished task 1.0 in stage 3.0 (TID 7) in 23 ms on localhost (1/2)
16/08/03 21:25:23 INFO o.a.s.s.TaskSetManager: Finished task 0.0 in stage 3.0 (TID 6) in 25 ms on localhost (2/2)
16/08/03 21:25:23 INFO o.a.s.s.TaskSchedulerImpl: Removed TaskSet 3.0, whose tasks have all completed, from pool
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: ResultStage 3 (count at Analysis.java:100) finished in 0,025 s
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Job 2 finished: count at Analysis.java:100, took 0,036531 s
Sum of Age: 21205.17
Count of Age: 714.0
Mean of Age: 29.69911764705882
16/08/03 21:25:23 INFO o.a.s.SparkContext: Starting job: aggregate at AnalyzeSpark.java:624
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Got job 3 (aggregate at AnalyzeSpark.java:624) with 2 output partitions
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Final stage: ResultStage 4(aggregate at AnalyzeSpark.java:624)
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Parents of final stage: List()
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Missing parents: List()
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Submitting ResultStage 4 (MapPartitionsRDD[12] at map at AnalyzeSpark.java:659), which has no missing parents
16/08/03 21:25:23 INFO o.a.s.s.MemoryStore: ensureFreeSpace(5312) called with curMem=150092, maxMem=1030823608
16/08/03 21:25:23 INFO o.a.s.s.MemoryStore: Block broadcast_5 stored as values in memory (estimated size 5.2 KB, free 982.9 MB)
16/08/03 21:25:23 INFO o.a.s.s.MemoryStore: ensureFreeSpace(3022) called with curMem=155404, maxMem=1030823608
16/08/03 21:25:23 INFO o.a.s.s.MemoryStore: Block broadcast_5_piece0 stored as bytes in memory (estimated size 3.0 KB, free 982.9 MB)
16/08/03 21:25:23 INFO o.a.s.s.BlockManagerInfo: Added broadcast_5_piece0 in memory on localhost:50602 (size: 3.0 KB, free: 983.0 MB)
16/08/03 21:25:23 INFO o.a.s.SparkContext: Created broadcast 5 from broadcast at DAGScheduler.scala:861
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Submitting 2 missing tasks from ResultStage 4 (MapPartitionsRDD[12] at map at AnalyzeSpark.java:659)
16/08/03 21:25:23 INFO o.a.s.s.TaskSchedulerImpl: Adding task set 4.0 with 2 tasks
16/08/03 21:25:23 INFO o.a.s.s.TaskSetManager: Starting task 0.0 in stage 4.0 (TID 8, localhost, PROCESS_LOCAL, 2092 bytes)
16/08/03 21:25:23 INFO o.a.s.s.TaskSetManager: Starting task 1.0 in stage 4.0 (TID 9, localhost, PROCESS_LOCAL, 2092 bytes)
16/08/03 21:25:23 INFO o.a.s.e.Executor: Running task 0.0 in stage 4.0 (TID 8)
16/08/03 21:25:23 INFO o.a.s.e.Executor: Running task 1.0 in stage 4.0 (TID 9)
16/08/03 21:25:23 INFO o.a.s.CacheManager: Partition rdd_2_1 not found, computing it
16/08/03 21:25:23 INFO o.a.s.r.HadoopRDD: Input split: file:/Users/danlin/git/dl4j-lab/src/main/resources/train_mod.csv:30111+30111
16/08/03 21:25:23 INFO o.a.s.CacheManager: Partition rdd_2_0 not found, computing it
16/08/03 21:25:23 INFO o.a.s.r.HadoopRDD: Input split: file:/Users/danlin/git/dl4j-lab/src/main/resources/train_mod.csv:0+30111
16/08/03 21:25:23 INFO o.a.s.s.MemoryStore: ensureFreeSpace(282600) called with curMem=158426, maxMem=1030823608
16/08/03 21:25:23 INFO o.a.s.s.MemoryStore: Block rdd_2_0 stored as values in memory (estimated size 276.0 KB, free 982.6 MB)
16/08/03 21:25:23 INFO o.a.s.s.BlockManagerInfo: Added rdd_2_0 in memory on localhost:50602 (size: 276.0 KB, free: 982.8 MB)
16/08/03 21:25:23 INFO o.a.s.e.Executor: Finished task 0.0 in stage 4.0 (TID 8). 2763 bytes result sent to driver
16/08/03 21:25:23 INFO o.a.s.s.TaskSetManager: Finished task 0.0 in stage 4.0 (TID 8) in 419 ms on localhost (1/2)
16/08/03 21:25:23 INFO o.a.s.s.MemoryStore: ensureFreeSpace(281552) called with curMem=441026, maxMem=1030823608
16/08/03 21:25:23 INFO o.a.s.s.MemoryStore: Block rdd_2_1 stored as values in memory (estimated size 275.0 KB, free 982.4 MB)
16/08/03 21:25:23 INFO o.a.s.s.BlockManagerInfo: Added rdd_2_1 in memory on localhost:50602 (size: 275.0 KB, free: 982.5 MB)
16/08/03 21:25:23 INFO o.a.s.e.Executor: Finished task 1.0 in stage 4.0 (TID 9). 2763 bytes result sent to driver
16/08/03 21:25:23 INFO o.a.s.s.TaskSetManager: Finished task 1.0 in stage 4.0 (TID 9) in 435 ms on localhost (2/2)
16/08/03 21:25:23 INFO o.a.s.s.TaskSchedulerImpl: Removed TaskSet 4.0, whose tasks have all completed, from pool
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: ResultStage 4 (aggregate at AnalyzeSpark.java:624) finished in 0,437 s
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Job 3 finished: aggregate at AnalyzeSpark.java:624, took 0,455022 s
16/08/03 21:25:23 INFO o.a.s.SparkContext: Starting job: aggregate at AnalyzeSpark.java:633
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Got job 4 (aggregate at AnalyzeSpark.java:633) with 2 output partitions
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Final stage: ResultStage 5(aggregate at AnalyzeSpark.java:633)
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Parents of final stage: List()
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Missing parents: List()
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Submitting ResultStage 5 (MapPartitionsRDD[13] at map at AnalyzeSpark.java:659), which has no missing parents
16/08/03 21:25:23 INFO o.a.s.s.MemoryStore: ensureFreeSpace(5488) called with curMem=722578, maxMem=1030823608
16/08/03 21:25:23 INFO o.a.s.s.MemoryStore: Block broadcast_6 stored as values in memory (estimated size 5.4 KB, free 982.4 MB)
16/08/03 21:25:23 INFO o.a.s.s.MemoryStore: ensureFreeSpace(3132) called with curMem=728066, maxMem=1030823608
16/08/03 21:25:23 INFO o.a.s.s.MemoryStore: Block broadcast_6_piece0 stored as bytes in memory (estimated size 3.1 KB, free 982.4 MB)
16/08/03 21:25:23 INFO o.a.s.s.BlockManagerInfo: Added broadcast_6_piece0 in memory on localhost:50602 (size: 3.1 KB, free: 982.5 MB)
16/08/03 21:25:23 INFO o.a.s.SparkContext: Created broadcast 6 from broadcast at DAGScheduler.scala:861
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Submitting 2 missing tasks from ResultStage 5 (MapPartitionsRDD[13] at map at AnalyzeSpark.java:659)
16/08/03 21:25:23 INFO o.a.s.s.TaskSchedulerImpl: Adding task set 5.0 with 2 tasks
16/08/03 21:25:23 INFO o.a.s.s.TaskSetManager: Starting task 0.0 in stage 5.0 (TID 10, localhost, PROCESS_LOCAL, 2092 bytes)
16/08/03 21:25:23 INFO o.a.s.s.TaskSetManager: Starting task 1.0 in stage 5.0 (TID 11, localhost, PROCESS_LOCAL, 2092 bytes)
16/08/03 21:25:23 INFO o.a.s.e.Executor: Running task 0.0 in stage 5.0 (TID 10)
16/08/03 21:25:23 INFO o.a.s.e.Executor: Running task 1.0 in stage 5.0 (TID 11)
16/08/03 21:25:23 INFO o.a.s.s.BlockManager: Found block rdd_2_0 locally
16/08/03 21:25:23 INFO o.a.s.e.Executor: Finished task 0.0 in stage 5.0 (TID 10). 2239 bytes result sent to driver
16/08/03 21:25:23 INFO o.a.s.s.TaskSetManager: Finished task 0.0 in stage 5.0 (TID 10) in 14 ms on localhost (1/2)
16/08/03 21:25:23 INFO o.a.s.s.BlockManager: Found block rdd_2_1 locally
16/08/03 21:25:23 INFO o.a.s.e.Executor: Finished task 1.0 in stage 5.0 (TID 11). 2239 bytes result sent to driver
16/08/03 21:25:23 INFO o.a.s.s.TaskSetManager: Finished task 1.0 in stage 5.0 (TID 11) in 16 ms on localhost (2/2)
16/08/03 21:25:23 INFO o.a.s.s.TaskSchedulerImpl: Removed TaskSet 5.0, whose tasks have all completed, from pool
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: ResultStage 5 (aggregate at AnalyzeSpark.java:633) finished in 0,020 s
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Job 4 finished: aggregate at AnalyzeSpark.java:633, took 0,039036 s
16/08/03 21:25:23 INFO o.a.s.ContextCleaner: Cleaned accumulator 4
16/08/03 21:25:23 INFO o.a.s.SparkContext: Starting job: aggregate at AnalyzeSpark.java:633
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Got job 5 (aggregate at AnalyzeSpark.java:633) with 2 output partitions
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Final stage: ResultStage 6(aggregate at AnalyzeSpark.java:633)
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Parents of final stage: List()
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Missing parents: List()
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Submitting ResultStage 6 (MapPartitionsRDD[14] at map at AnalyzeSpark.java:659), which has no missing parents
16/08/03 21:25:23 INFO o.a.s.s.MemoryStore: ensureFreeSpace(5496) called with curMem=731198, maxMem=1030823608
16/08/03 21:25:23 INFO o.a.s.s.MemoryStore: Block broadcast_7 stored as values in memory (estimated size 5.4 KB, free 982.4 MB)
16/08/03 21:25:23 INFO o.a.s.s.MemoryStore: ensureFreeSpace(3143) called with curMem=736694, maxMem=1030823608
16/08/03 21:25:23 INFO o.a.s.s.MemoryStore: Block broadcast_7_piece0 stored as bytes in memory (estimated size 3.1 KB, free 982.4 MB)
16/08/03 21:25:23 INFO o.a.s.s.BlockManagerInfo: Added broadcast_7_piece0 in memory on localhost:50602 (size: 3.1 KB, free: 982.5 MB)
16/08/03 21:25:23 INFO o.a.s.SparkContext: Created broadcast 7 from broadcast at DAGScheduler.scala:861
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Submitting 2 missing tasks from ResultStage 6 (MapPartitionsRDD[14] at map at AnalyzeSpark.java:659)
16/08/03 21:25:23 INFO o.a.s.s.TaskSchedulerImpl: Adding task set 6.0 with 2 tasks
16/08/03 21:25:23 INFO o.a.s.s.TaskSetManager: Starting task 0.0 in stage 6.0 (TID 12, localhost, PROCESS_LOCAL, 2092 bytes)
16/08/03 21:25:23 INFO o.a.s.s.TaskSetManager: Starting task 1.0 in stage 6.0 (TID 13, localhost, PROCESS_LOCAL, 2092 bytes)
16/08/03 21:25:23 INFO o.a.s.e.Executor: Running task 0.0 in stage 6.0 (TID 12)
16/08/03 21:25:23 INFO o.a.s.e.Executor: Running task 1.0 in stage 6.0 (TID 13)
16/08/03 21:25:23 INFO o.a.s.s.BlockManager: Found block rdd_2_0 locally
16/08/03 21:25:23 INFO o.a.s.s.BlockManager: Found block rdd_2_1 locally
16/08/03 21:25:23 INFO o.a.s.s.BlockManagerInfo: Removed broadcast_6_piece0 on localhost:50602 in memory (size: 3.1 KB, free: 982.5 MB)
16/08/03 21:25:23 INFO o.a.s.e.Executor: Finished task 0.0 in stage 6.0 (TID 12). 2239 bytes result sent to driver
16/08/03 21:25:23 INFO o.a.s.s.TaskSetManager: Finished task 0.0 in stage 6.0 (TID 12) in 12 ms on localhost (1/2)
16/08/03 21:25:23 INFO o.a.s.e.Executor: Finished task 1.0 in stage 6.0 (TID 13). 2239 bytes result sent to driver
16/08/03 21:25:23 INFO o.a.s.ContextCleaner: Cleaned accumulator 6
16/08/03 21:25:23 INFO o.a.s.s.TaskSetManager: Finished task 1.0 in stage 6.0 (TID 13) in 14 ms on localhost (2/2)
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: ResultStage 6 (aggregate at AnalyzeSpark.java:633) finished in 0,015 s
16/08/03 21:25:23 INFO o.a.s.s.TaskSchedulerImpl: Removed TaskSet 6.0, whose tasks have all completed, from pool
16/08/03 21:25:23 INFO o.a.s.s.BlockManagerInfo: Removed broadcast_5_piece0 on localhost:50602 in memory (size: 3.0 KB, free: 982.5 MB)
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Job 5 finished: aggregate at AnalyzeSpark.java:633, took 0,031377 s
16/08/03 21:25:23 INFO o.a.s.ContextCleaner: Cleaned accumulator 5
16/08/03 21:25:23 INFO o.a.s.s.BlockManagerInfo: Removed broadcast_4_piece0 on localhost:50602 in memory (size: 2.9 KB, free: 982.5 MB)
16/08/03 21:25:23 INFO o.a.s.s.BlockManagerInfo: Removed broadcast_3_piece0 on localhost:50602 in memory (size: 3.0 KB, free: 982.5 MB)
16/08/03 21:25:23 INFO o.a.s.ContextCleaner: Cleaned accumulator 3
16/08/03 21:25:23 INFO o.a.s.s.BlockManagerInfo: Removed broadcast_2_piece0 on localhost:50602 in memory (size: 1328.0 B, free: 982.5 MB)
16/08/03 21:25:23 INFO o.a.s.ContextCleaner: Cleaned accumulator 2
16/08/03 21:25:23 INFO o.a.s.SparkContext: Starting job: count at AnalyzeSpark.java:617
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Registering RDD 16 (distinct at AnalyzeSpark.java:617)
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Got job 6 (count at AnalyzeSpark.java:617) with 2 output partitions
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Final stage: ResultStage 8(count at AnalyzeSpark.java:617)
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Parents of final stage: List(ShuffleMapStage 7)
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Missing parents: List(ShuffleMapStage 7)
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Submitting ShuffleMapStage 7 (MapPartitionsRDD[16] at distinct at AnalyzeSpark.java:617), which has no missing parents
16/08/03 21:25:23 INFO o.a.s.s.MemoryStore: ensureFreeSpace(5168) called with curMem=702405, maxMem=1030823608
16/08/03 21:25:23 INFO o.a.s.s.MemoryStore: Block broadcast_8 stored as values in memory (estimated size 5.0 KB, free 982.4 MB)
16/08/03 21:25:23 INFO o.a.s.s.MemoryStore: ensureFreeSpace(3004) called with curMem=707573, maxMem=1030823608
16/08/03 21:25:23 INFO o.a.s.s.MemoryStore: Block broadcast_8_piece0 stored as bytes in memory (estimated size 2.9 KB, free 982.4 MB)
16/08/03 21:25:23 INFO o.a.s.s.BlockManagerInfo: Added broadcast_8_piece0 in memory on localhost:50602 (size: 2.9 KB, free: 982.5 MB)
16/08/03 21:25:23 INFO o.a.s.SparkContext: Created broadcast 8 from broadcast at DAGScheduler.scala:861
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Submitting 2 missing tasks from ShuffleMapStage 7 (MapPartitionsRDD[16] at distinct at AnalyzeSpark.java:617)
16/08/03 21:25:23 INFO o.a.s.s.TaskSchedulerImpl: Adding task set 7.0 with 2 tasks
16/08/03 21:25:23 INFO o.a.s.s.TaskSetManager: Starting task 0.0 in stage 7.0 (TID 14, localhost, PROCESS_LOCAL, 2081 bytes)
16/08/03 21:25:23 INFO o.a.s.s.TaskSetManager: Starting task 1.0 in stage 7.0 (TID 15, localhost, PROCESS_LOCAL, 2081 bytes)
16/08/03 21:25:23 INFO o.a.s.e.Executor: Running task 0.0 in stage 7.0 (TID 14)
16/08/03 21:25:23 INFO o.a.s.CacheManager: Partition rdd_15_0 not found, computing it
16/08/03 21:25:23 INFO o.a.s.s.BlockManager: Found block rdd_2_0 locally
16/08/03 21:25:23 INFO o.a.s.e.Executor: Running task 1.0 in stage 7.0 (TID 15)
16/08/03 21:25:23 INFO o.a.s.CacheManager: Partition rdd_15_1 not found, computing it
16/08/03 21:25:23 INFO o.a.s.s.BlockManager: Found block rdd_2_1 locally
16/08/03 21:25:23 INFO o.a.s.s.MemoryStore: ensureFreeSpace(32080) called with curMem=710577, maxMem=1030823608
16/08/03 21:25:23 INFO o.a.s.s.MemoryStore: Block rdd_15_0 stored as values in memory (estimated size 31.3 KB, free 982.4 MB)
16/08/03 21:25:23 INFO o.a.s.s.BlockManagerInfo: Added rdd_15_0 in memory on localhost:50602 (size: 31.3 KB, free: 982.5 MB)
16/08/03 21:25:23 INFO o.a.s.s.MemoryStore: ensureFreeSpace(31448) called with curMem=742657, maxMem=1030823608
16/08/03 21:25:23 INFO o.a.s.s.MemoryStore: Block rdd_15_1 stored as values in memory (estimated size 30.7 KB, free 982.3 MB)
16/08/03 21:25:23 INFO o.a.s.s.BlockManagerInfo: Added rdd_15_1 in memory on localhost:50602 (size: 30.7 KB, free: 982.5 MB)
16/08/03 21:25:23 INFO o.a.s.e.Executor: Finished task 0.0 in stage 7.0 (TID 14). 2753 bytes result sent to driver
16/08/03 21:25:23 INFO o.a.s.e.Executor: Finished task 1.0 in stage 7.0 (TID 15). 2753 bytes result sent to driver
16/08/03 21:25:23 INFO o.a.s.s.TaskSetManager: Finished task 0.0 in stage 7.0 (TID 14) in 98 ms on localhost (1/2)
16/08/03 21:25:23 INFO o.a.s.s.TaskSetManager: Finished task 1.0 in stage 7.0 (TID 15) in 98 ms on localhost (2/2)
16/08/03 21:25:23 INFO o.a.s.s.TaskSchedulerImpl: Removed TaskSet 7.0, whose tasks have all completed, from pool
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: ShuffleMapStage 7 (distinct at AnalyzeSpark.java:617) finished in 0,099 s
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: looking for newly runnable stages
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: running: Set()
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: waiting: Set(ResultStage 8)
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: failed: Set()
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Missing parents for ResultStage 8: List()
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Submitting ResultStage 8 (MapPartitionsRDD[18] at distinct at AnalyzeSpark.java:617), which is now runnable
16/08/03 21:25:23 INFO o.a.s.s.MemoryStore: ensureFreeSpace(2880) called with curMem=774105, maxMem=1030823608
16/08/03 21:25:23 INFO o.a.s.s.MemoryStore: Block broadcast_9 stored as values in memory (estimated size 2.8 KB, free 982.3 MB)
16/08/03 21:25:23 INFO o.a.s.s.MemoryStore: ensureFreeSpace(1690) called with curMem=776985, maxMem=1030823608
16/08/03 21:25:23 INFO o.a.s.s.MemoryStore: Block broadcast_9_piece0 stored as bytes in memory (estimated size 1690.0 B, free 982.3 MB)
16/08/03 21:25:23 INFO o.a.s.s.BlockManagerInfo: Added broadcast_9_piece0 in memory on localhost:50602 (size: 1690.0 B, free: 982.5 MB)
16/08/03 21:25:23 INFO o.a.s.SparkContext: Created broadcast 9 from broadcast at DAGScheduler.scala:861
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Submitting 2 missing tasks from ResultStage 8 (MapPartitionsRDD[18] at distinct at AnalyzeSpark.java:617)
16/08/03 21:25:23 INFO o.a.s.s.TaskSchedulerImpl: Adding task set 8.0 with 2 tasks
16/08/03 21:25:23 INFO o.a.s.s.TaskSetManager: Starting task 0.0 in stage 8.0 (TID 16, localhost, PROCESS_LOCAL, 1820 bytes)
16/08/03 21:25:23 INFO o.a.s.s.TaskSetManager: Starting task 1.0 in stage 8.0 (TID 17, localhost, PROCESS_LOCAL, 1820 bytes)
16/08/03 21:25:23 INFO o.a.s.e.Executor: Running task 0.0 in stage 8.0 (TID 16)
16/08/03 21:25:23 INFO o.a.s.e.Executor: Running task 1.0 in stage 8.0 (TID 17)
16/08/03 21:25:23 INFO o.a.s.s.ShuffleBlockFetcherIterator: Getting 2 non-empty blocks out of 2 blocks
16/08/03 21:25:23 INFO o.a.s.s.ShuffleBlockFetcherIterator: Started 0 remote fetches in 1 ms
16/08/03 21:25:23 INFO o.a.s.s.ShuffleBlockFetcherIterator: Getting 2 non-empty blocks out of 2 blocks
16/08/03 21:25:23 INFO o.a.s.s.ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms
16/08/03 21:25:23 INFO o.a.s.e.Executor: Finished task 1.0 in stage 8.0 (TID 17). 1203 bytes result sent to driver
16/08/03 21:25:23 INFO o.a.s.s.TaskSetManager: Finished task 1.0 in stage 8.0 (TID 17) in 42 ms on localhost (1/2)
16/08/03 21:25:23 INFO o.a.s.e.Executor: Finished task 0.0 in stage 8.0 (TID 16). 1203 bytes result sent to driver
16/08/03 21:25:23 INFO o.a.s.s.TaskSetManager: Finished task 0.0 in stage 8.0 (TID 16) in 44 ms on localhost (2/2)
16/08/03 21:25:23 INFO o.a.s.s.TaskSchedulerImpl: Removed TaskSet 8.0, whose tasks have all completed, from pool
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: ResultStage 8 (count at AnalyzeSpark.java:617) finished in 0,045 s
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Job 6 finished: count at AnalyzeSpark.java:617, took 0,163739 s
16/08/03 21:25:23 INFO o.a.s.SparkContext: Starting job: aggregate at AnalyzeSpark.java:620
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Got job 7 (aggregate at AnalyzeSpark.java:620) with 2 output partitions
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Final stage: ResultStage 9(aggregate at AnalyzeSpark.java:620)
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Parents of final stage: List()
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Missing parents: List()
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Submitting ResultStage 9 (MapPartitionsRDD[15] at map at AnalyzeSpark.java:659), which has no missing parents
16/08/03 21:25:23 INFO o.a.s.s.MemoryStore: ensureFreeSpace(5432) called with curMem=778675, maxMem=1030823608
16/08/03 21:25:23 INFO o.a.s.s.MemoryStore: Block broadcast_10 stored as values in memory (estimated size 5.3 KB, free 982.3 MB)
16/08/03 21:25:23 INFO o.a.s.s.MemoryStore: ensureFreeSpace(3109) called with curMem=784107, maxMem=1030823608
16/08/03 21:25:23 INFO o.a.s.s.MemoryStore: Block broadcast_10_piece0 stored as bytes in memory (estimated size 3.0 KB, free 982.3 MB)
16/08/03 21:25:23 INFO o.a.s.s.BlockManagerInfo: Added broadcast_10_piece0 in memory on localhost:50602 (size: 3.0 KB, free: 982.4 MB)
16/08/03 21:25:23 INFO o.a.s.SparkContext: Created broadcast 10 from broadcast at DAGScheduler.scala:861
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Submitting 2 missing tasks from ResultStage 9 (MapPartitionsRDD[15] at map at AnalyzeSpark.java:659)
16/08/03 21:25:23 INFO o.a.s.s.TaskSchedulerImpl: Adding task set 9.0 with 2 tasks
16/08/03 21:25:23 INFO o.a.s.s.TaskSetManager: Starting task 0.0 in stage 9.0 (TID 18, localhost, PROCESS_LOCAL, 2092 bytes)
16/08/03 21:25:23 INFO o.a.s.s.TaskSetManager: Starting task 1.0 in stage 9.0 (TID 19, localhost, PROCESS_LOCAL, 2092 bytes)
16/08/03 21:25:23 INFO o.a.s.e.Executor: Running task 1.0 in stage 9.0 (TID 19)
16/08/03 21:25:23 INFO o.a.s.s.BlockManager: Found block rdd_15_1 locally
16/08/03 21:25:23 INFO o.a.s.e.Executor: Finished task 1.0 in stage 9.0 (TID 19). 2392 bytes result sent to driver
16/08/03 21:25:23 INFO o.a.s.s.TaskSetManager: Finished task 1.0 in stage 9.0 (TID 19) in 16 ms on localhost (1/2)
16/08/03 21:25:23 INFO o.a.s.e.Executor: Running task 0.0 in stage 9.0 (TID 18)
16/08/03 21:25:23 INFO o.a.s.s.BlockManager: Found block rdd_15_0 locally
16/08/03 21:25:23 INFO o.a.s.e.Executor: Finished task 0.0 in stage 9.0 (TID 18). 2392 bytes result sent to driver
16/08/03 21:25:23 INFO o.a.s.s.TaskSetManager: Finished task 0.0 in stage 9.0 (TID 18) in 31 ms on localhost (2/2)
16/08/03 21:25:23 INFO o.a.s.s.TaskSchedulerImpl: Removed TaskSet 9.0, whose tasks have all completed, from pool
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: ResultStage 9 (aggregate at AnalyzeSpark.java:620) finished in 0,033 s
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Job 7 finished: aggregate at AnalyzeSpark.java:620, took 0,042328 s
16/08/03 21:25:23 INFO o.a.s.SparkContext: Starting job: aggregate at AnalyzeSpark.java:633
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Got job 8 (aggregate at AnalyzeSpark.java:633) with 2 output partitions
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Final stage: ResultStage 10(aggregate at AnalyzeSpark.java:633)
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Parents of final stage: List()
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Missing parents: List()
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Submitting ResultStage 10 (MapPartitionsRDD[19] at map at AnalyzeSpark.java:659), which has no missing parents
16/08/03 21:25:23 INFO o.a.s.s.MemoryStore: ensureFreeSpace(5496) called with curMem=787216, maxMem=1030823608
16/08/03 21:25:23 INFO o.a.s.s.MemoryStore: Block broadcast_11 stored as values in memory (estimated size 5.4 KB, free 982.3 MB)
16/08/03 21:25:23 INFO o.a.s.s.MemoryStore: ensureFreeSpace(3142) called with curMem=792712, maxMem=1030823608
16/08/03 21:25:23 INFO o.a.s.s.MemoryStore: Block broadcast_11_piece0 stored as bytes in memory (estimated size 3.1 KB, free 982.3 MB)
16/08/03 21:25:23 INFO o.a.s.s.BlockManagerInfo: Added broadcast_11_piece0 in memory on localhost:50602 (size: 3.1 KB, free: 982.4 MB)
16/08/03 21:25:23 INFO o.a.s.SparkContext: Created broadcast 11 from broadcast at DAGScheduler.scala:861
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Submitting 2 missing tasks from ResultStage 10 (MapPartitionsRDD[19] at map at AnalyzeSpark.java:659)
16/08/03 21:25:23 INFO o.a.s.s.TaskSchedulerImpl: Adding task set 10.0 with 2 tasks
16/08/03 21:25:23 INFO o.a.s.s.TaskSetManager: Starting task 0.0 in stage 10.0 (TID 20, localhost, PROCESS_LOCAL, 2092 bytes)
16/08/03 21:25:23 INFO o.a.s.s.TaskSetManager: Starting task 1.0 in stage 10.0 (TID 21, localhost, PROCESS_LOCAL, 2092 bytes)
16/08/03 21:25:23 INFO o.a.s.e.Executor: Running task 1.0 in stage 10.0 (TID 21)
16/08/03 21:25:23 INFO o.a.s.e.Executor: Running task 0.0 in stage 10.0 (TID 20)
16/08/03 21:25:23 INFO o.a.s.s.BlockManager: Found block rdd_2_1 locally
16/08/03 21:25:23 INFO o.a.s.s.BlockManager: Found block rdd_2_0 locally
16/08/03 21:25:23 INFO o.a.s.e.Executor: Finished task 1.0 in stage 10.0 (TID 21). 2239 bytes result sent to driver
16/08/03 21:25:23 INFO o.a.s.e.Executor: Finished task 0.0 in stage 10.0 (TID 20). 2239 bytes result sent to driver
16/08/03 21:25:23 INFO o.a.s.s.TaskSetManager: Finished task 1.0 in stage 10.0 (TID 21) in 5 ms on localhost (1/2)
16/08/03 21:25:23 INFO o.a.s.s.TaskSetManager: Finished task 0.0 in stage 10.0 (TID 20) in 8 ms on localhost (2/2)
16/08/03 21:25:23 INFO o.a.s.s.TaskSchedulerImpl: Removed TaskSet 10.0, whose tasks have all completed, from pool
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: ResultStage 10 (aggregate at AnalyzeSpark.java:633) finished in 0,008 s
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Job 8 finished: aggregate at AnalyzeSpark.java:633, took 0,017659 s
16/08/03 21:25:23 INFO o.a.s.SparkContext: Starting job: aggregate at AnalyzeSpark.java:630
16/08/03 21:25:23 INFO o.a.s.s.DAGScheduler: Got job 9 (aggregate at AnalyzeSpark.java:630) with 2 output partitions
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Final stage: ResultStage 11(aggregate at AnalyzeSpark.java:630)
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Parents of final stage: List()
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Missing parents: List()
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Submitting ResultStage 11 (MapPartitionsRDD[20] at map at AnalyzeSpark.java:659), which has no missing parents
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: ensureFreeSpace(5336) called with curMem=795854, maxMem=1030823608
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: Block broadcast_12 stored as values in memory (estimated size 5.2 KB, free 982.3 MB)
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: ensureFreeSpace(3048) called with curMem=801190, maxMem=1030823608
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: Block broadcast_12_piece0 stored as bytes in memory (estimated size 3.0 KB, free 982.3 MB)
16/08/03 21:25:24 INFO o.a.s.s.BlockManagerInfo: Added broadcast_12_piece0 in memory on localhost:50602 (size: 3.0 KB, free: 982.4 MB)
16/08/03 21:25:24 INFO o.a.s.SparkContext: Created broadcast 12 from broadcast at DAGScheduler.scala:861
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Submitting 2 missing tasks from ResultStage 11 (MapPartitionsRDD[20] at map at AnalyzeSpark.java:659)
16/08/03 21:25:24 INFO o.a.s.s.TaskSchedulerImpl: Adding task set 11.0 with 2 tasks
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Starting task 0.0 in stage 11.0 (TID 22, localhost, PROCESS_LOCAL, 2092 bytes)
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Starting task 1.0 in stage 11.0 (TID 23, localhost, PROCESS_LOCAL, 2092 bytes)
16/08/03 21:25:24 INFO o.a.s.e.Executor: Running task 1.0 in stage 11.0 (TID 23)
16/08/03 21:25:24 INFO o.a.s.e.Executor: Running task 0.0 in stage 11.0 (TID 22)
16/08/03 21:25:24 INFO o.a.s.s.BlockManager: Found block rdd_2_1 locally
16/08/03 21:25:24 INFO o.a.s.s.BlockManager: Found block rdd_2_0 locally
16/08/03 21:25:24 INFO o.a.s.e.Executor: Finished task 1.0 in stage 11.0 (TID 23). 2303 bytes result sent to driver
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Finished task 1.0 in stage 11.0 (TID 23) in 10 ms on localhost (1/2)
16/08/03 21:25:24 INFO o.a.s.e.Executor: Finished task 0.0 in stage 11.0 (TID 22). 2303 bytes result sent to driver
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Finished task 0.0 in stage 11.0 (TID 22) in 14 ms on localhost (2/2)
16/08/03 21:25:24 INFO o.a.s.s.TaskSchedulerImpl: Removed TaskSet 11.0, whose tasks have all completed, from pool
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: ResultStage 11 (aggregate at AnalyzeSpark.java:630) finished in 0,014 s
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Job 9 finished: aggregate at AnalyzeSpark.java:630, took 0,024081 s
16/08/03 21:25:24 INFO o.a.s.SparkContext: Starting job: aggregate at AnalyzeSpark.java:624
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Got job 10 (aggregate at AnalyzeSpark.java:624) with 2 output partitions
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Final stage: ResultStage 12(aggregate at AnalyzeSpark.java:624)
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Parents of final stage: List()
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Missing parents: List()
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Submitting ResultStage 12 (MapPartitionsRDD[21] at map at AnalyzeSpark.java:659), which has no missing parents
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: ensureFreeSpace(5312) called with curMem=804238, maxMem=1030823608
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: Block broadcast_13 stored as values in memory (estimated size 5.2 KB, free 982.3 MB)
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: ensureFreeSpace(3026) called with curMem=809550, maxMem=1030823608
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: Block broadcast_13_piece0 stored as bytes in memory (estimated size 3.0 KB, free 982.3 MB)
16/08/03 21:25:24 INFO o.a.s.s.BlockManagerInfo: Added broadcast_13_piece0 in memory on localhost:50602 (size: 3.0 KB, free: 982.4 MB)
16/08/03 21:25:24 INFO o.a.s.SparkContext: Created broadcast 13 from broadcast at DAGScheduler.scala:861
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Submitting 2 missing tasks from ResultStage 12 (MapPartitionsRDD[21] at map at AnalyzeSpark.java:659)
16/08/03 21:25:24 INFO o.a.s.s.TaskSchedulerImpl: Adding task set 12.0 with 2 tasks
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Starting task 0.0 in stage 12.0 (TID 24, localhost, PROCESS_LOCAL, 2092 bytes)
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Starting task 1.0 in stage 12.0 (TID 25, localhost, PROCESS_LOCAL, 2092 bytes)
16/08/03 21:25:24 INFO o.a.s.e.Executor: Running task 0.0 in stage 12.0 (TID 24)
16/08/03 21:25:24 INFO o.a.s.e.Executor: Running task 1.0 in stage 12.0 (TID 25)
16/08/03 21:25:24 INFO o.a.s.s.BlockManager: Found block rdd_2_1 locally
16/08/03 21:25:24 INFO o.a.s.s.BlockManager: Found block rdd_2_0 locally
16/08/03 21:25:24 INFO o.a.s.e.Executor: Finished task 1.0 in stage 12.0 (TID 25). 2264 bytes result sent to driver
16/08/03 21:25:24 INFO o.a.s.e.Executor: Finished task 0.0 in stage 12.0 (TID 24). 2264 bytes result sent to driver
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Finished task 1.0 in stage 12.0 (TID 25) in 7 ms on localhost (1/2)
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Finished task 0.0 in stage 12.0 (TID 24) in 7 ms on localhost (2/2)
16/08/03 21:25:24 INFO o.a.s.s.TaskSchedulerImpl: Removed TaskSet 12.0, whose tasks have all completed, from pool
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: ResultStage 12 (aggregate at AnalyzeSpark.java:624) finished in 0,009 s
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Job 10 finished: aggregate at AnalyzeSpark.java:624, took 0,017409 s
16/08/03 21:25:24 INFO o.a.s.SparkContext: Starting job: aggregate at AnalyzeSpark.java:624
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Got job 11 (aggregate at AnalyzeSpark.java:624) with 2 output partitions
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Final stage: ResultStage 13(aggregate at AnalyzeSpark.java:624)
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Parents of final stage: List()
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Missing parents: List()
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Submitting ResultStage 13 (MapPartitionsRDD[22] at map at AnalyzeSpark.java:659), which has no missing parents
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: ensureFreeSpace(5312) called with curMem=812576, maxMem=1030823608
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: Block broadcast_14 stored as values in memory (estimated size 5.2 KB, free 982.3 MB)
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: ensureFreeSpace(3026) called with curMem=817888, maxMem=1030823608
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: Block broadcast_14_piece0 stored as bytes in memory (estimated size 3.0 KB, free 982.3 MB)
16/08/03 21:25:24 INFO o.a.s.s.BlockManagerInfo: Added broadcast_14_piece0 in memory on localhost:50602 (size: 3.0 KB, free: 982.4 MB)
16/08/03 21:25:24 INFO o.a.s.SparkContext: Created broadcast 14 from broadcast at DAGScheduler.scala:861
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Submitting 2 missing tasks from ResultStage 13 (MapPartitionsRDD[22] at map at AnalyzeSpark.java:659)
16/08/03 21:25:24 INFO o.a.s.s.TaskSchedulerImpl: Adding task set 13.0 with 2 tasks
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Starting task 0.0 in stage 13.0 (TID 26, localhost, PROCESS_LOCAL, 2092 bytes)
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Starting task 1.0 in stage 13.0 (TID 27, localhost, PROCESS_LOCAL, 2092 bytes)
16/08/03 21:25:24 INFO o.a.s.e.Executor: Running task 1.0 in stage 13.0 (TID 27)
16/08/03 21:25:24 INFO o.a.s.e.Executor: Running task 0.0 in stage 13.0 (TID 26)
16/08/03 21:25:24 INFO o.a.s.s.BlockManager: Found block rdd_2_0 locally
16/08/03 21:25:24 INFO o.a.s.s.BlockManager: Found block rdd_2_1 locally
16/08/03 21:25:24 INFO o.a.s.e.Executor: Finished task 0.0 in stage 13.0 (TID 26). 2264 bytes result sent to driver
16/08/03 21:25:24 INFO o.a.s.e.Executor: Finished task 1.0 in stage 13.0 (TID 27). 2264 bytes result sent to driver
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Finished task 0.0 in stage 13.0 (TID 26) in 7 ms on localhost (1/2)
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Finished task 1.0 in stage 13.0 (TID 27) in 8 ms on localhost (2/2)
16/08/03 21:25:24 INFO o.a.s.s.TaskSchedulerImpl: Removed TaskSet 13.0, whose tasks have all completed, from pool
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: ResultStage 13 (aggregate at AnalyzeSpark.java:624) finished in 0,009 s
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Job 11 finished: aggregate at AnalyzeSpark.java:624, took 0,017168 s
16/08/03 21:25:24 INFO o.a.s.SparkContext: Starting job: count at AnalyzeSpark.java:617
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Registering RDD 24 (distinct at AnalyzeSpark.java:617)
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Got job 12 (count at AnalyzeSpark.java:617) with 2 output partitions
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Final stage: ResultStage 15(count at AnalyzeSpark.java:617)
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Parents of final stage: List(ShuffleMapStage 14)
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Missing parents: List(ShuffleMapStage 14)
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Submitting ShuffleMapStage 14 (MapPartitionsRDD[24] at distinct at AnalyzeSpark.java:617), which has no missing parents
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: ensureFreeSpace(5168) called with curMem=820914, maxMem=1030823608
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: Block broadcast_15 stored as values in memory (estimated size 5.0 KB, free 982.3 MB)
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: ensureFreeSpace(3004) called with curMem=826082, maxMem=1030823608
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: Block broadcast_15_piece0 stored as bytes in memory (estimated size 2.9 KB, free 982.3 MB)
16/08/03 21:25:24 INFO o.a.s.s.BlockManagerInfo: Added broadcast_15_piece0 in memory on localhost:50602 (size: 2.9 KB, free: 982.4 MB)
16/08/03 21:25:24 INFO o.a.s.SparkContext: Created broadcast 15 from broadcast at DAGScheduler.scala:861
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Submitting 2 missing tasks from ShuffleMapStage 14 (MapPartitionsRDD[24] at distinct at AnalyzeSpark.java:617)
16/08/03 21:25:24 INFO o.a.s.s.TaskSchedulerImpl: Adding task set 14.0 with 2 tasks
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Starting task 0.0 in stage 14.0 (TID 28, localhost, PROCESS_LOCAL, 2081 bytes)
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Starting task 1.0 in stage 14.0 (TID 29, localhost, PROCESS_LOCAL, 2081 bytes)
16/08/03 21:25:24 INFO o.a.s.e.Executor: Running task 1.0 in stage 14.0 (TID 29)
16/08/03 21:25:24 INFO o.a.s.e.Executor: Running task 0.0 in stage 14.0 (TID 28)
16/08/03 21:25:24 INFO o.a.s.CacheManager: Partition rdd_23_0 not found, computing it
16/08/03 21:25:24 INFO o.a.s.CacheManager: Partition rdd_23_1 not found, computing it
16/08/03 21:25:24 INFO o.a.s.s.BlockManager: Found block rdd_2_0 locally
16/08/03 21:25:24 INFO o.a.s.s.BlockManager: Found block rdd_2_1 locally
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: ensureFreeSpace(21448) called with curMem=829086, maxMem=1030823608
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: Block rdd_23_1 stored as values in memory (estimated size 20.9 KB, free 982.3 MB)
16/08/03 21:25:24 INFO o.a.s.s.BlockManagerInfo: Added rdd_23_1 in memory on localhost:50602 (size: 20.9 KB, free: 982.4 MB)
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: ensureFreeSpace(21680) called with curMem=850534, maxMem=1030823608
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: Block rdd_23_0 stored as values in memory (estimated size 21.2 KB, free 982.2 MB)
16/08/03 21:25:24 INFO o.a.s.s.BlockManagerInfo: Added rdd_23_0 in memory on localhost:50602 (size: 21.2 KB, free: 982.4 MB)
16/08/03 21:25:24 INFO o.a.s.e.Executor: Finished task 1.0 in stage 14.0 (TID 29). 2753 bytes result sent to driver
16/08/03 21:25:24 INFO o.a.s.e.Executor: Finished task 0.0 in stage 14.0 (TID 28). 2753 bytes result sent to driver
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Finished task 1.0 in stage 14.0 (TID 29) in 27 ms on localhost (1/2)
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Finished task 0.0 in stage 14.0 (TID 28) in 28 ms on localhost (2/2)
16/08/03 21:25:24 INFO o.a.s.s.TaskSchedulerImpl: Removed TaskSet 14.0, whose tasks have all completed, from pool
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: ShuffleMapStage 14 (distinct at AnalyzeSpark.java:617) finished in 0,030 s
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: looking for newly runnable stages
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: running: Set()
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: waiting: Set(ResultStage 15)
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: failed: Set()
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Missing parents for ResultStage 15: List()
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Submitting ResultStage 15 (MapPartitionsRDD[26] at distinct at AnalyzeSpark.java:617), which is now runnable
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: ensureFreeSpace(2880) called with curMem=872214, maxMem=1030823608
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: Block broadcast_16 stored as values in memory (estimated size 2.8 KB, free 982.2 MB)
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: ensureFreeSpace(1688) called with curMem=875094, maxMem=1030823608
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: Block broadcast_16_piece0 stored as bytes in memory (estimated size 1688.0 B, free 982.2 MB)
16/08/03 21:25:24 INFO o.a.s.s.BlockManagerInfo: Added broadcast_16_piece0 in memory on localhost:50602 (size: 1688.0 B, free: 982.4 MB)
16/08/03 21:25:24 INFO o.a.s.SparkContext: Created broadcast 16 from broadcast at DAGScheduler.scala:861
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Submitting 2 missing tasks from ResultStage 15 (MapPartitionsRDD[26] at distinct at AnalyzeSpark.java:617)
16/08/03 21:25:24 INFO o.a.s.s.TaskSchedulerImpl: Adding task set 15.0 with 2 tasks
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Starting task 0.0 in stage 15.0 (TID 30, localhost, PROCESS_LOCAL, 1820 bytes)
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Starting task 1.0 in stage 15.0 (TID 31, localhost, PROCESS_LOCAL, 1820 bytes)
16/08/03 21:25:24 INFO o.a.s.e.Executor: Running task 0.0 in stage 15.0 (TID 30)
16/08/03 21:25:24 INFO o.a.s.e.Executor: Running task 1.0 in stage 15.0 (TID 31)
16/08/03 21:25:24 INFO o.a.s.s.ShuffleBlockFetcherIterator: Getting 2 non-empty blocks out of 2 blocks
16/08/03 21:25:24 INFO o.a.s.s.ShuffleBlockFetcherIterator: Started 0 remote fetches in 1 ms
16/08/03 21:25:24 INFO o.a.s.s.ShuffleBlockFetcherIterator: Getting 2 non-empty blocks out of 2 blocks
16/08/03 21:25:24 INFO o.a.s.s.ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms
16/08/03 21:25:24 INFO o.a.s.e.Executor: Finished task 0.0 in stage 15.0 (TID 30). 1203 bytes result sent to driver
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Finished task 0.0 in stage 15.0 (TID 30) in 22 ms on localhost (1/2)
16/08/03 21:25:24 INFO o.a.s.e.Executor: Finished task 1.0 in stage 15.0 (TID 31). 1203 bytes result sent to driver
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Finished task 1.0 in stage 15.0 (TID 31) in 23 ms on localhost (2/2)
16/08/03 21:25:24 INFO o.a.s.s.TaskSchedulerImpl: Removed TaskSet 15.0, whose tasks have all completed, from pool
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: ResultStage 15 (count at AnalyzeSpark.java:617) finished in 0,024 s
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Job 12 finished: count at AnalyzeSpark.java:617, took 0,071391 s
16/08/03 21:25:24 INFO o.a.s.SparkContext: Starting job: aggregate at AnalyzeSpark.java:620
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Got job 13 (aggregate at AnalyzeSpark.java:620) with 2 output partitions
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Final stage: ResultStage 16(aggregate at AnalyzeSpark.java:620)
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Parents of final stage: List()
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Missing parents: List()
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Submitting ResultStage 16 (MapPartitionsRDD[23] at map at AnalyzeSpark.java:659), which has no missing parents
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: ensureFreeSpace(5432) called with curMem=876782, maxMem=1030823608
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: Block broadcast_17 stored as values in memory (estimated size 5.3 KB, free 982.2 MB)
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: ensureFreeSpace(3109) called with curMem=882214, maxMem=1030823608
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: Block broadcast_17_piece0 stored as bytes in memory (estimated size 3.0 KB, free 982.2 MB)
16/08/03 21:25:24 INFO o.a.s.s.BlockManagerInfo: Added broadcast_17_piece0 in memory on localhost:50602 (size: 3.0 KB, free: 982.4 MB)
16/08/03 21:25:24 INFO o.a.s.SparkContext: Created broadcast 17 from broadcast at DAGScheduler.scala:861
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Submitting 2 missing tasks from ResultStage 16 (MapPartitionsRDD[23] at map at AnalyzeSpark.java:659)
16/08/03 21:25:24 INFO o.a.s.s.TaskSchedulerImpl: Adding task set 16.0 with 2 tasks
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Starting task 0.0 in stage 16.0 (TID 32, localhost, PROCESS_LOCAL, 2092 bytes)
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Starting task 1.0 in stage 16.0 (TID 33, localhost, PROCESS_LOCAL, 2092 bytes)
16/08/03 21:25:24 INFO o.a.s.e.Executor: Running task 1.0 in stage 16.0 (TID 33)
16/08/03 21:25:24 INFO o.a.s.e.Executor: Running task 0.0 in stage 16.0 (TID 32)
16/08/03 21:25:24 INFO o.a.s.s.BlockManager: Found block rdd_23_1 locally
16/08/03 21:25:24 INFO o.a.s.s.BlockManager: Found block rdd_23_0 locally
16/08/03 21:25:24 INFO o.a.s.e.Executor: Finished task 1.0 in stage 16.0 (TID 33). 2392 bytes result sent to driver
16/08/03 21:25:24 INFO o.a.s.e.Executor: Finished task 0.0 in stage 16.0 (TID 32). 2392 bytes result sent to driver
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Finished task 1.0 in stage 16.0 (TID 33) in 7 ms on localhost (1/2)
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Finished task 0.0 in stage 16.0 (TID 32) in 9 ms on localhost (2/2)
16/08/03 21:25:24 INFO o.a.s.s.TaskSchedulerImpl: Removed TaskSet 16.0, whose tasks have all completed, from pool
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: ResultStage 16 (aggregate at AnalyzeSpark.java:620) finished in 0,009 s
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Job 13 finished: aggregate at AnalyzeSpark.java:620, took 0,018106 s
16/08/03 21:25:24 INFO o.a.s.SparkContext: Starting job: count at AnalyzeSpark.java:617
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Registering RDD 28 (distinct at AnalyzeSpark.java:617)
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Got job 14 (count at AnalyzeSpark.java:617) with 2 output partitions
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Final stage: ResultStage 18(count at AnalyzeSpark.java:617)
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Parents of final stage: List(ShuffleMapStage 17)
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Missing parents: List(ShuffleMapStage 17)
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Submitting ShuffleMapStage 17 (MapPartitionsRDD[28] at distinct at AnalyzeSpark.java:617), which has no missing parents
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: ensureFreeSpace(5168) called with curMem=885323, maxMem=1030823608
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: Block broadcast_18 stored as values in memory (estimated size 5.0 KB, free 982.2 MB)
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: ensureFreeSpace(3005) called with curMem=890491, maxMem=1030823608
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: Block broadcast_18_piece0 stored as bytes in memory (estimated size 2.9 KB, free 982.2 MB)
16/08/03 21:25:24 INFO o.a.s.s.BlockManagerInfo: Added broadcast_18_piece0 in memory on localhost:50602 (size: 2.9 KB, free: 982.4 MB)
16/08/03 21:25:24 INFO o.a.s.SparkContext: Created broadcast 18 from broadcast at DAGScheduler.scala:861
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Submitting 2 missing tasks from ShuffleMapStage 17 (MapPartitionsRDD[28] at distinct at AnalyzeSpark.java:617)
16/08/03 21:25:24 INFO o.a.s.s.TaskSchedulerImpl: Adding task set 17.0 with 2 tasks
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Starting task 0.0 in stage 17.0 (TID 34, localhost, PROCESS_LOCAL, 2081 bytes)
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Starting task 1.0 in stage 17.0 (TID 35, localhost, PROCESS_LOCAL, 2081 bytes)
16/08/03 21:25:24 INFO o.a.s.e.Executor: Running task 0.0 in stage 17.0 (TID 34)
16/08/03 21:25:24 INFO o.a.s.e.Executor: Running task 1.0 in stage 17.0 (TID 35)
16/08/03 21:25:24 INFO o.a.s.CacheManager: Partition rdd_27_0 not found, computing it
16/08/03 21:25:24 INFO o.a.s.s.BlockManager: Found block rdd_2_0 locally
16/08/03 21:25:24 INFO o.a.s.CacheManager: Partition rdd_27_1 not found, computing it
16/08/03 21:25:24 INFO o.a.s.s.BlockManager: Found block rdd_2_1 locally
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: ensureFreeSpace(21008) called with curMem=893496, maxMem=1030823608
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: Block rdd_27_1 stored as values in memory (estimated size 20.5 KB, free 982.2 MB)
16/08/03 21:25:24 INFO o.a.s.s.BlockManagerInfo: Added rdd_27_1 in memory on localhost:50602 (size: 20.5 KB, free: 982.4 MB)
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: ensureFreeSpace(20992) called with curMem=914504, maxMem=1030823608
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: Block rdd_27_0 stored as values in memory (estimated size 20.5 KB, free 982.2 MB)
16/08/03 21:25:24 INFO o.a.s.s.BlockManagerInfo: Added rdd_27_0 in memory on localhost:50602 (size: 20.5 KB, free: 982.3 MB)
16/08/03 21:25:24 INFO o.a.s.e.Executor: Finished task 1.0 in stage 17.0 (TID 35). 2753 bytes result sent to driver
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Finished task 1.0 in stage 17.0 (TID 35) in 63 ms on localhost (1/2)
16/08/03 21:25:24 INFO o.a.s.e.Executor: Finished task 0.0 in stage 17.0 (TID 34). 2753 bytes result sent to driver
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Finished task 0.0 in stage 17.0 (TID 34) in 80 ms on localhost (2/2)
16/08/03 21:25:24 INFO o.a.s.s.TaskSchedulerImpl: Removed TaskSet 17.0, whose tasks have all completed, from pool
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: ShuffleMapStage 17 (distinct at AnalyzeSpark.java:617) finished in 0,082 s
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: looking for newly runnable stages
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: running: Set()
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: waiting: Set(ResultStage 18)
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: failed: Set()
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Missing parents for ResultStage 18: List()
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Submitting ResultStage 18 (MapPartitionsRDD[30] at distinct at AnalyzeSpark.java:617), which is now runnable
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: ensureFreeSpace(2880) called with curMem=935496, maxMem=1030823608
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: Block broadcast_19 stored as values in memory (estimated size 2.8 KB, free 982.2 MB)
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: ensureFreeSpace(1692) called with curMem=938376, maxMem=1030823608
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: Block broadcast_19_piece0 stored as bytes in memory (estimated size 1692.0 B, free 982.2 MB)
16/08/03 21:25:24 INFO o.a.s.s.BlockManagerInfo: Added broadcast_19_piece0 in memory on localhost:50602 (size: 1692.0 B, free: 982.3 MB)
16/08/03 21:25:24 INFO o.a.s.SparkContext: Created broadcast 19 from broadcast at DAGScheduler.scala:861
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Submitting 2 missing tasks from ResultStage 18 (MapPartitionsRDD[30] at distinct at AnalyzeSpark.java:617)
16/08/03 21:25:24 INFO o.a.s.s.TaskSchedulerImpl: Adding task set 18.0 with 2 tasks
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Starting task 0.0 in stage 18.0 (TID 36, localhost, PROCESS_LOCAL, 1820 bytes)
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Starting task 1.0 in stage 18.0 (TID 37, localhost, PROCESS_LOCAL, 1820 bytes)
16/08/03 21:25:24 INFO o.a.s.e.Executor: Running task 1.0 in stage 18.0 (TID 37)
16/08/03 21:25:24 INFO o.a.s.e.Executor: Running task 0.0 in stage 18.0 (TID 36)
16/08/03 21:25:24 INFO o.a.s.s.ShuffleBlockFetcherIterator: Getting 2 non-empty blocks out of 2 blocks
16/08/03 21:25:24 INFO o.a.s.s.ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms
16/08/03 21:25:24 INFO o.a.s.s.ShuffleBlockFetcherIterator: Getting 2 non-empty blocks out of 2 blocks
16/08/03 21:25:24 INFO o.a.s.s.ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms
16/08/03 21:25:24 INFO o.a.s.e.Executor: Finished task 1.0 in stage 18.0 (TID 37). 1203 bytes result sent to driver
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Finished task 1.0 in stage 18.0 (TID 37) in 25 ms on localhost (1/2)
16/08/03 21:25:24 INFO o.a.s.e.Executor: Finished task 0.0 in stage 18.0 (TID 36). 1203 bytes result sent to driver
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Finished task 0.0 in stage 18.0 (TID 36) in 28 ms on localhost (2/2)
16/08/03 21:25:24 INFO o.a.s.s.TaskSchedulerImpl: Removed TaskSet 18.0, whose tasks have all completed, from pool
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: ResultStage 18 (count at AnalyzeSpark.java:617) finished in 0,029 s
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Job 14 finished: count at AnalyzeSpark.java:617, took 0,164778 s
16/08/03 21:25:24 INFO o.a.s.SparkContext: Starting job: aggregate at AnalyzeSpark.java:620
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Got job 15 (aggregate at AnalyzeSpark.java:620) with 2 output partitions
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Final stage: ResultStage 19(aggregate at AnalyzeSpark.java:620)
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Parents of final stage: List()
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Missing parents: List()
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Submitting ResultStage 19 (MapPartitionsRDD[27] at map at AnalyzeSpark.java:659), which has no missing parents
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: ensureFreeSpace(5432) called with curMem=940068, maxMem=1030823608
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: Block broadcast_20 stored as values in memory (estimated size 5.3 KB, free 982.2 MB)
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: ensureFreeSpace(3109) called with curMem=945500, maxMem=1030823608
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: Block broadcast_20_piece0 stored as bytes in memory (estimated size 3.0 KB, free 982.2 MB)
16/08/03 21:25:24 INFO o.a.s.s.BlockManagerInfo: Added broadcast_20_piece0 in memory on localhost:50602 (size: 3.0 KB, free: 982.3 MB)
16/08/03 21:25:24 INFO o.a.s.SparkContext: Created broadcast 20 from broadcast at DAGScheduler.scala:861
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Submitting 2 missing tasks from ResultStage 19 (MapPartitionsRDD[27] at map at AnalyzeSpark.java:659)
16/08/03 21:25:24 INFO o.a.s.s.TaskSchedulerImpl: Adding task set 19.0 with 2 tasks
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Starting task 0.0 in stage 19.0 (TID 38, localhost, PROCESS_LOCAL, 2092 bytes)
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Starting task 1.0 in stage 19.0 (TID 39, localhost, PROCESS_LOCAL, 2092 bytes)
16/08/03 21:25:24 INFO o.a.s.e.Executor: Running task 1.0 in stage 19.0 (TID 39)
16/08/03 21:25:24 INFO o.a.s.e.Executor: Running task 0.0 in stage 19.0 (TID 38)
16/08/03 21:25:24 INFO o.a.s.s.BlockManager: Found block rdd_27_1 locally
16/08/03 21:25:24 INFO o.a.s.e.Executor: Finished task 1.0 in stage 19.0 (TID 39). 2392 bytes result sent to driver
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Finished task 1.0 in stage 19.0 (TID 39) in 9 ms on localhost (1/2)
16/08/03 21:25:24 INFO o.a.s.s.BlockManager: Found block rdd_27_0 locally
16/08/03 21:25:24 INFO o.a.s.e.Executor: Finished task 0.0 in stage 19.0 (TID 38). 2392 bytes result sent to driver
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Finished task 0.0 in stage 19.0 (TID 38) in 15 ms on localhost (2/2)
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: ResultStage 19 (aggregate at AnalyzeSpark.java:620) finished in 0,016 s
16/08/03 21:25:24 INFO o.a.s.s.TaskSchedulerImpl: Removed TaskSet 19.0, whose tasks have all completed, from pool
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Job 15 finished: aggregate at AnalyzeSpark.java:620, took 0,026930 s
16/08/03 21:25:24 INFO o.a.s.SparkContext: Starting job: count at AnalyzeSpark.java:617
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Registering RDD 32 (distinct at AnalyzeSpark.java:617)
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Got job 16 (count at AnalyzeSpark.java:617) with 2 output partitions
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Final stage: ResultStage 21(count at AnalyzeSpark.java:617)
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Parents of final stage: List(ShuffleMapStage 20)
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Missing parents: List(ShuffleMapStage 20)
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Submitting ShuffleMapStage 20 (MapPartitionsRDD[32] at distinct at AnalyzeSpark.java:617), which has no missing parents
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: ensureFreeSpace(5168) called with curMem=948609, maxMem=1030823608
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: Block broadcast_21 stored as values in memory (estimated size 5.0 KB, free 982.2 MB)
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: ensureFreeSpace(3004) called with curMem=953777, maxMem=1030823608
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: Block broadcast_21_piece0 stored as bytes in memory (estimated size 2.9 KB, free 982.2 MB)
16/08/03 21:25:24 INFO o.a.s.s.BlockManagerInfo: Added broadcast_21_piece0 in memory on localhost:50602 (size: 2.9 KB, free: 982.3 MB)
16/08/03 21:25:24 INFO o.a.s.SparkContext: Created broadcast 21 from broadcast at DAGScheduler.scala:861
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Submitting 2 missing tasks from ShuffleMapStage 20 (MapPartitionsRDD[32] at distinct at AnalyzeSpark.java:617)
16/08/03 21:25:24 INFO o.a.s.s.TaskSchedulerImpl: Adding task set 20.0 with 2 tasks
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Starting task 0.0 in stage 20.0 (TID 40, localhost, PROCESS_LOCAL, 2081 bytes)
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Starting task 1.0 in stage 20.0 (TID 41, localhost, PROCESS_LOCAL, 2081 bytes)
16/08/03 21:25:24 INFO o.a.s.e.Executor: Running task 0.0 in stage 20.0 (TID 40)
16/08/03 21:25:24 INFO o.a.s.e.Executor: Running task 1.0 in stage 20.0 (TID 41)
16/08/03 21:25:24 INFO o.a.s.CacheManager: Partition rdd_31_0 not found, computing it
16/08/03 21:25:24 INFO o.a.s.s.BlockManager: Found block rdd_2_0 locally
16/08/03 21:25:24 INFO o.a.s.CacheManager: Partition rdd_31_1 not found, computing it
16/08/03 21:25:24 INFO o.a.s.s.BlockManager: Found block rdd_2_1 locally
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: ensureFreeSpace(18592) called with curMem=956781, maxMem=1030823608
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: Block rdd_31_0 stored as values in memory (estimated size 18.2 KB, free 982.1 MB)
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: ensureFreeSpace(18480) called with curMem=975373, maxMem=1030823608
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: Block rdd_31_1 stored as values in memory (estimated size 18.0 KB, free 982.1 MB)
16/08/03 21:25:24 INFO o.a.s.s.BlockManagerInfo: Added rdd_31_0 in memory on localhost:50602 (size: 18.2 KB, free: 982.3 MB)
16/08/03 21:25:24 INFO o.a.s.s.BlockManagerInfo: Added rdd_31_1 in memory on localhost:50602 (size: 18.0 KB, free: 982.3 MB)
16/08/03 21:25:24 INFO o.a.s.e.Executor: Finished task 0.0 in stage 20.0 (TID 40). 2753 bytes result sent to driver
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Finished task 0.0 in stage 20.0 (TID 40) in 29 ms on localhost (1/2)
16/08/03 21:25:24 INFO o.a.s.e.Executor: Finished task 1.0 in stage 20.0 (TID 41). 2753 bytes result sent to driver
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Finished task 1.0 in stage 20.0 (TID 41) in 32 ms on localhost (2/2)
16/08/03 21:25:24 INFO o.a.s.s.TaskSchedulerImpl: Removed TaskSet 20.0, whose tasks have all completed, from pool
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: ShuffleMapStage 20 (distinct at AnalyzeSpark.java:617) finished in 0,032 s
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: looking for newly runnable stages
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: running: Set()
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: waiting: Set(ResultStage 21)
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: failed: Set()
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Missing parents for ResultStage 21: List()
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Submitting ResultStage 21 (MapPartitionsRDD[34] at distinct at AnalyzeSpark.java:617), which is now runnable
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: ensureFreeSpace(2880) called with curMem=993853, maxMem=1030823608
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: Block broadcast_22 stored as values in memory (estimated size 2.8 KB, free 982.1 MB)
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: ensureFreeSpace(1691) called with curMem=996733, maxMem=1030823608
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: Block broadcast_22_piece0 stored as bytes in memory (estimated size 1691.0 B, free 982.1 MB)
16/08/03 21:25:24 INFO o.a.s.s.BlockManagerInfo: Added broadcast_22_piece0 in memory on localhost:50602 (size: 1691.0 B, free: 982.3 MB)
16/08/03 21:25:24 INFO o.a.s.SparkContext: Created broadcast 22 from broadcast at DAGScheduler.scala:861
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Submitting 2 missing tasks from ResultStage 21 (MapPartitionsRDD[34] at distinct at AnalyzeSpark.java:617)
16/08/03 21:25:24 INFO o.a.s.s.TaskSchedulerImpl: Adding task set 21.0 with 2 tasks
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Starting task 0.0 in stage 21.0 (TID 42, localhost, PROCESS_LOCAL, 1820 bytes)
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Starting task 1.0 in stage 21.0 (TID 43, localhost, PROCESS_LOCAL, 1820 bytes)
16/08/03 21:25:24 INFO o.a.s.e.Executor: Running task 1.0 in stage 21.0 (TID 43)
16/08/03 21:25:24 INFO o.a.s.e.Executor: Running task 0.0 in stage 21.0 (TID 42)
16/08/03 21:25:24 INFO o.a.s.s.ShuffleBlockFetcherIterator: Getting 2 non-empty blocks out of 2 blocks
16/08/03 21:25:24 INFO o.a.s.s.ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms
16/08/03 21:25:24 INFO o.a.s.s.ShuffleBlockFetcherIterator: Getting 2 non-empty blocks out of 2 blocks
16/08/03 21:25:24 INFO o.a.s.s.ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms
16/08/03 21:25:24 INFO o.a.s.e.Executor: Finished task 0.0 in stage 21.0 (TID 42). 1203 bytes result sent to driver
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Finished task 0.0 in stage 21.0 (TID 42) in 19 ms on localhost (1/2)
16/08/03 21:25:24 INFO o.a.s.e.Executor: Finished task 1.0 in stage 21.0 (TID 43). 1203 bytes result sent to driver
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Finished task 1.0 in stage 21.0 (TID 43) in 22 ms on localhost (2/2)
16/08/03 21:25:24 INFO o.a.s.s.TaskSchedulerImpl: Removed TaskSet 21.0, whose tasks have all completed, from pool
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: ResultStage 21 (count at AnalyzeSpark.java:617) finished in 0,024 s
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Job 16 finished: count at AnalyzeSpark.java:617, took 0,087752 s
16/08/03 21:25:24 INFO o.a.s.SparkContext: Starting job: aggregate at AnalyzeSpark.java:620
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Got job 17 (aggregate at AnalyzeSpark.java:620) with 2 output partitions
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Final stage: ResultStage 22(aggregate at AnalyzeSpark.java:620)
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Parents of final stage: List()
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Missing parents: List()
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Submitting ResultStage 22 (MapPartitionsRDD[31] at map at AnalyzeSpark.java:659), which has no missing parents
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: ensureFreeSpace(5432) called with curMem=998424, maxMem=1030823608
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: Block broadcast_23 stored as values in memory (estimated size 5.3 KB, free 982.1 MB)
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: ensureFreeSpace(3109) called with curMem=1003856, maxMem=1030823608
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: Block broadcast_23_piece0 stored as bytes in memory (estimated size 3.0 KB, free 982.1 MB)
16/08/03 21:25:24 INFO o.a.s.s.BlockManagerInfo: Added broadcast_23_piece0 in memory on localhost:50602 (size: 3.0 KB, free: 982.3 MB)
16/08/03 21:25:24 INFO o.a.s.SparkContext: Created broadcast 23 from broadcast at DAGScheduler.scala:861
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Submitting 2 missing tasks from ResultStage 22 (MapPartitionsRDD[31] at map at AnalyzeSpark.java:659)
16/08/03 21:25:24 INFO o.a.s.s.TaskSchedulerImpl: Adding task set 22.0 with 2 tasks
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Starting task 0.0 in stage 22.0 (TID 44, localhost, PROCESS_LOCAL, 2092 bytes)
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Starting task 1.0 in stage 22.0 (TID 45, localhost, PROCESS_LOCAL, 2092 bytes)
16/08/03 21:25:24 INFO o.a.s.e.Executor: Running task 1.0 in stage 22.0 (TID 45)
16/08/03 21:25:24 INFO o.a.s.e.Executor: Running task 0.0 in stage 22.0 (TID 44)
16/08/03 21:25:24 INFO o.a.s.s.BlockManager: Found block rdd_31_1 locally
16/08/03 21:25:24 INFO o.a.s.s.BlockManager: Found block rdd_31_0 locally
16/08/03 21:25:24 INFO o.a.s.e.Executor: Finished task 0.0 in stage 22.0 (TID 44). 2392 bytes result sent to driver
16/08/03 21:25:24 INFO o.a.s.e.Executor: Finished task 1.0 in stage 22.0 (TID 45). 2392 bytes result sent to driver
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Finished task 0.0 in stage 22.0 (TID 44) in 8 ms on localhost (1/2)
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Finished task 1.0 in stage 22.0 (TID 45) in 8 ms on localhost (2/2)
16/08/03 21:25:24 INFO o.a.s.s.TaskSchedulerImpl: Removed TaskSet 22.0, whose tasks have all completed, from pool
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: ResultStage 22 (aggregate at AnalyzeSpark.java:620) finished in 0,009 s
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Job 17 finished: aggregate at AnalyzeSpark.java:620, took 0,022002 s
16/08/03 21:25:24 INFO o.a.s.SparkContext: Starting job: aggregate at AnalyzeSpark.java:633
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Got job 18 (aggregate at AnalyzeSpark.java:633) with 2 output partitions
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Final stage: ResultStage 23(aggregate at AnalyzeSpark.java:633)
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Parents of final stage: List()
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Missing parents: List()
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Submitting ResultStage 23 (MapPartitionsRDD[35] at map at AnalyzeSpark.java:659), which has no missing parents
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: ensureFreeSpace(5496) called with curMem=1006965, maxMem=1030823608
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: Block broadcast_24 stored as values in memory (estimated size 5.4 KB, free 982.1 MB)
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: ensureFreeSpace(3146) called with curMem=1012461, maxMem=1030823608
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: Block broadcast_24_piece0 stored as bytes in memory (estimated size 3.1 KB, free 982.1 MB)
16/08/03 21:25:24 INFO o.a.s.s.BlockManagerInfo: Added broadcast_24_piece0 in memory on localhost:50602 (size: 3.1 KB, free: 982.3 MB)
16/08/03 21:25:24 INFO o.a.s.SparkContext: Created broadcast 24 from broadcast at DAGScheduler.scala:861
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Submitting 2 missing tasks from ResultStage 23 (MapPartitionsRDD[35] at map at AnalyzeSpark.java:659)
16/08/03 21:25:24 INFO o.a.s.s.TaskSchedulerImpl: Adding task set 23.0 with 2 tasks
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Starting task 0.0 in stage 23.0 (TID 46, localhost, PROCESS_LOCAL, 2092 bytes)
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Starting task 1.0 in stage 23.0 (TID 47, localhost, PROCESS_LOCAL, 2092 bytes)
16/08/03 21:25:24 INFO o.a.s.e.Executor: Running task 1.0 in stage 23.0 (TID 47)
16/08/03 21:25:24 INFO o.a.s.e.Executor: Running task 0.0 in stage 23.0 (TID 46)
16/08/03 21:25:24 INFO o.a.s.s.BlockManager: Found block rdd_2_0 locally
16/08/03 21:25:24 INFO o.a.s.e.Executor: Finished task 0.0 in stage 23.0 (TID 46). 2239 bytes result sent to driver
16/08/03 21:25:24 INFO o.a.s.s.BlockManager: Found block rdd_2_1 locally
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Finished task 0.0 in stage 23.0 (TID 46) in 6 ms on localhost (1/2)
16/08/03 21:25:24 INFO o.a.s.e.Executor: Finished task 1.0 in stage 23.0 (TID 47). 2239 bytes result sent to driver
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Finished task 1.0 in stage 23.0 (TID 47) in 8 ms on localhost (2/2)
16/08/03 21:25:24 INFO o.a.s.s.TaskSchedulerImpl: Removed TaskSet 23.0, whose tasks have all completed, from pool
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: ResultStage 23 (aggregate at AnalyzeSpark.java:633) finished in 0,009 s
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Job 18 finished: aggregate at AnalyzeSpark.java:633, took 0,019992 s
----- Data Quality -----
idx name type quality details
0 "PassengerId" Integer ok IntegerQuality(countValid=891, countInvalid=0, countMissing=0, countTotal=891, countNonInteger=0)
1 "Survived" Categorical ok CategoricalQuality(countValid=891, countInvalid=0, countMissing=0, countTotal=891)
2 "Pclass" Categorical ok CategoricalQuality(countValid=891, countInvalid=0, countMissing=0, countTotal=891)
3 "Name" String ok StringQuality(countValid=891, countInvalid=0, countMissing=0, countTotal=891, countEmptyString=0, countAlphabetic=0, countNumerical=0, countWordCharacter=0, countWhitespace=0, countUnique=891)
4 "Sex" Categorical ok CategoricalQuality(countValid=891, countInvalid=0, countMissing=0, countTotal=891)
5 "Age" Double FAIL DoubleQuality(countValid=714, countInvalid=0, countMissing=177, countTotal=891, countNonReal=177, countNaN=0, countInfinite=0)
6 "SibSp" Integer ok IntegerQuality(countValid=891, countInvalid=0, countMissing=0, countTotal=891, countNonInteger=0)
7 "Parch" Integer ok IntegerQuality(countValid=891, countInvalid=0, countMissing=0, countTotal=891, countNonInteger=0)
8 "Ticket" String ok StringQuality(countValid=891, countInvalid=0, countMissing=0, countTotal=891, countEmptyString=0, countAlphabetic=0, countNumerical=661, countWordCharacter=665, countWhitespace=0, countUnique=681)
9 "Fare" String ok StringQuality(countValid=891, countInvalid=0, countMissing=0, countTotal=891, countEmptyString=0, countAlphabetic=0, countNumerical=161, countWordCharacter=161, countWhitespace=0, countUnique=248)
10 "Cabin" String ok StringQuality(countValid=891, countInvalid=0, countMissing=0, countTotal=891, countEmptyString=687, countAlphabetic=4, countNumerical=0, countWordCharacter=180, countWhitespace=0, countUnique=148)
11 "Embarked" Categorical FAIL CategoricalQuality(countValid=889, countInvalid=0, countMissing=2, countTotal=891)
16/08/03 21:25:24 INFO o.a.s.SparkContext: Starting job: count at Analysis.java:134
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Got job 19 (count at Analysis.java:134) with 2 output partitions
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Final stage: ResultStage 24(count at Analysis.java:134)
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Parents of final stage: List()
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Missing parents: List()
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Submitting ResultStage 24 (MapPartitionsRDD[37] at map at SparkTransformExecutor.java:110), which has no missing parents
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: ensureFreeSpace(7088) called with curMem=1015607, maxMem=1030823608
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: Block broadcast_25 stored as values in memory (estimated size 6.9 KB, free 982.1 MB)
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: ensureFreeSpace(4029) called with curMem=1022695, maxMem=1030823608
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: Block broadcast_25_piece0 stored as bytes in memory (estimated size 3.9 KB, free 982.1 MB)
16/08/03 21:25:24 INFO o.a.s.s.BlockManagerInfo: Added broadcast_25_piece0 in memory on localhost:50602 (size: 3.9 KB, free: 982.3 MB)
16/08/03 21:25:24 INFO o.a.s.SparkContext: Created broadcast 25 from broadcast at DAGScheduler.scala:861
16/08/03 21:25:24 INFO o.a.s.s.BlockManagerInfo: Removed broadcast_15_piece0 on localhost:50602 in memory (size: 2.9 KB, free: 982.3 MB)
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Submitting 2 missing tasks from ResultStage 24 (MapPartitionsRDD[37] at map at SparkTransformExecutor.java:110)
16/08/03 21:25:24 INFO o.a.s.s.TaskSchedulerImpl: Adding task set 24.0 with 2 tasks
16/08/03 21:25:24 INFO o.a.s.ContextCleaner: Cleaned accumulator 14
16/08/03 21:25:24 INFO o.a.s.s.BlockManagerInfo: Removed broadcast_13_piece0 on localhost:50602 in memory (size: 3.0 KB, free: 982.3 MB)
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Starting task 0.0 in stage 24.0 (TID 48, localhost, PROCESS_LOCAL, 2092 bytes)
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Starting task 1.0 in stage 24.0 (TID 49, localhost, PROCESS_LOCAL, 2092 bytes)
16/08/03 21:25:24 INFO o.a.s.ContextCleaner: Cleaned accumulator 13
16/08/03 21:25:24 INFO o.a.s.e.Executor: Running task 0.0 in stage 24.0 (TID 48)
16/08/03 21:25:24 INFO o.a.s.e.Executor: Running task 1.0 in stage 24.0 (TID 49)
16/08/03 21:25:24 INFO o.a.s.s.BlockManagerInfo: Removed broadcast_12_piece0 on localhost:50602 in memory (size: 3.0 KB, free: 982.3 MB)
16/08/03 21:25:24 INFO o.a.s.ContextCleaner: Cleaned accumulator 12
16/08/03 21:25:24 INFO o.a.s.s.BlockManagerInfo: Removed broadcast_11_piece0 on localhost:50602 in memory (size: 3.1 KB, free: 982.3 MB)
16/08/03 21:25:24 INFO o.a.s.ContextCleaner: Cleaned accumulator 11
16/08/03 21:25:24 INFO o.a.s.s.BlockManagerInfo: Removed broadcast_10_piece0 on localhost:50602 in memory (size: 3.0 KB, free: 982.3 MB)
16/08/03 21:25:24 INFO o.a.s.CacheManager: Partition rdd_37_0 not found, computing it
16/08/03 21:25:24 INFO o.a.s.ContextCleaner: Cleaned accumulator 10
16/08/03 21:25:24 INFO o.a.s.s.BlockManager: Found block rdd_2_0 locally
16/08/03 21:25:24 INFO o.a.s.CacheManager: Partition rdd_37_1 not found, computing it
16/08/03 21:25:24 INFO o.a.s.s.BlockManager: Found block rdd_2_1 locally
16/08/03 21:25:24 INFO o.a.s.s.BlockManagerInfo: Removed broadcast_9_piece0 on localhost:50602 in memory (size: 1690.0 B, free: 982.3 MB)
16/08/03 21:25:24 INFO o.a.s.ContextCleaner: Cleaned accumulator 9
16/08/03 21:25:24 INFO o.a.s.s.BlockManagerInfo: Removed broadcast_8_piece0 on localhost:50602 in memory (size: 2.9 KB, free: 982.3 MB)
16/08/03 21:25:24 INFO o.a.s.ContextCleaner: Cleaned accumulator 8
16/08/03 21:25:24 INFO o.a.s.ContextCleaner: Cleaned shuffle 1
16/08/03 21:25:24 INFO o.a.s.s.BlockManager: Removing RDD 15
16/08/03 21:25:24 INFO o.a.s.ContextCleaner: Cleaned RDD 15
16/08/03 21:25:24 INFO o.a.s.s.BlockManagerInfo: Removed broadcast_7_piece0 on localhost:50602 in memory (size: 3.1 KB, free: 982.4 MB)
16/08/03 21:25:24 INFO o.a.s.ContextCleaner: Cleaned accumulator 7
16/08/03 21:25:24 INFO o.a.s.s.BlockManagerInfo: Removed broadcast_24_piece0 on localhost:50602 in memory (size: 3.1 KB, free: 982.4 MB)
16/08/03 21:25:24 INFO o.a.s.ContextCleaner: Cleaned accumulator 24
16/08/03 21:25:24 INFO o.a.s.s.BlockManagerInfo: Removed broadcast_23_piece0 on localhost:50602 in memory (size: 3.0 KB, free: 982.4 MB)
16/08/03 21:25:24 INFO o.a.s.ContextCleaner: Cleaned accumulator 23
16/08/03 21:25:24 INFO o.a.s.s.BlockManagerInfo: Removed broadcast_22_piece0 on localhost:50602 in memory (size: 1691.0 B, free: 982.4 MB)
16/08/03 21:25:24 INFO o.a.s.ContextCleaner: Cleaned accumulator 22
16/08/03 21:25:24 INFO o.a.s.s.BlockManagerInfo: Removed broadcast_21_piece0 on localhost:50602 in memory (size: 2.9 KB, free: 982.4 MB)
16/08/03 21:25:24 INFO o.a.s.ContextCleaner: Cleaned accumulator 21
16/08/03 21:25:24 INFO o.a.s.ContextCleaner: Cleaned shuffle 4
16/08/03 21:25:24 INFO o.a.s.s.BlockManager: Removing RDD 31
16/08/03 21:25:24 INFO o.a.s.ContextCleaner: Cleaned RDD 31
16/08/03 21:25:24 INFO o.a.s.s.BlockManagerInfo: Removed broadcast_20_piece0 on localhost:50602 in memory (size: 3.0 KB, free: 982.4 MB)
16/08/03 21:25:24 INFO o.a.s.ContextCleaner: Cleaned accumulator 20
16/08/03 21:25:24 INFO o.a.s.s.BlockManagerInfo: Removed broadcast_19_piece0 on localhost:50602 in memory (size: 1692.0 B, free: 982.4 MB)
16/08/03 21:25:24 INFO o.a.s.ContextCleaner: Cleaned accumulator 19
16/08/03 21:25:24 INFO o.a.s.s.BlockManagerInfo: Removed broadcast_18_piece0 on localhost:50602 in memory (size: 2.9 KB, free: 982.4 MB)
16/08/03 21:25:24 INFO o.a.s.ContextCleaner: Cleaned accumulator 18
16/08/03 21:25:24 INFO o.a.s.ContextCleaner: Cleaned shuffle 3
16/08/03 21:25:24 INFO o.a.s.s.BlockManager: Removing RDD 27
16/08/03 21:25:24 INFO o.a.s.ContextCleaner: Cleaned RDD 27
16/08/03 21:25:24 INFO o.a.s.s.BlockManagerInfo: Removed broadcast_17_piece0 on localhost:50602 in memory (size: 3.0 KB, free: 982.5 MB)
16/08/03 21:25:24 INFO o.a.s.ContextCleaner: Cleaned accumulator 17
16/08/03 21:25:24 INFO o.a.s.s.BlockManagerInfo: Removed broadcast_16_piece0 on localhost:50602 in memory (size: 1688.0 B, free: 982.5 MB)
16/08/03 21:25:24 INFO o.a.s.ContextCleaner: Cleaned accumulator 16
16/08/03 21:25:24 INFO o.a.s.ContextCleaner: Cleaned accumulator 15
16/08/03 21:25:24 INFO o.a.s.ContextCleaner: Cleaned shuffle 2
16/08/03 21:25:24 INFO o.a.s.s.BlockManager: Removing RDD 23
16/08/03 21:25:24 INFO o.a.s.ContextCleaner: Cleaned RDD 23
16/08/03 21:25:24 INFO o.a.s.s.BlockManagerInfo: Removed broadcast_14_piece0 on localhost:50602 in memory (size: 3.0 KB, free: 982.5 MB)
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: ensureFreeSpace(273528) called with curMem=704883, maxMem=1030823608
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: Block rdd_37_0 stored as values in memory (estimated size 267.1 KB, free 982.1 MB)
16/08/03 21:25:24 INFO o.a.s.s.BlockManagerInfo: Added rdd_37_0 in memory on localhost:50602 (size: 267.1 KB, free: 982.3 MB)
16/08/03 21:25:24 INFO o.a.s.e.Executor: Finished task 0.0 in stage 24.0 (TID 48). 2581 bytes result sent to driver
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Finished task 0.0 in stage 24.0 (TID 48) in 90 ms on localhost (1/2)
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: ensureFreeSpace(271944) called with curMem=978411, maxMem=1030823608
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: Block rdd_37_1 stored as values in memory (estimated size 265.6 KB, free 981.9 MB)
16/08/03 21:25:24 INFO o.a.s.s.BlockManagerInfo: Added rdd_37_1 in memory on localhost:50602 (size: 265.6 KB, free: 982.0 MB)
16/08/03 21:25:24 INFO o.a.s.e.Executor: Finished task 1.0 in stage 24.0 (TID 49). 2581 bytes result sent to driver
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Finished task 1.0 in stage 24.0 (TID 49) in 98 ms on localhost (2/2)
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: ResultStage 24 (count at Analysis.java:134) finished in 0,099 s
16/08/03 21:25:24 INFO o.a.s.s.TaskSchedulerImpl: Removed TaskSet 24.0, whose tasks have all completed, from pool
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Job 19 finished: count at Analysis.java:134, took 0,121251 s
16/08/03 21:25:24 INFO o.a.s.SparkContext: Starting job: take at Analysis.java:135
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Got job 20 (take at Analysis.java:135) with 1 output partitions
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Final stage: ResultStage 25(take at Analysis.java:135)
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Parents of final stage: List()
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Missing parents: List()
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Submitting ResultStage 25 (MapPartitionsRDD[37] at map at SparkTransformExecutor.java:110), which has no missing parents
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: ensureFreeSpace(7256) called with curMem=1250355, maxMem=1030823608
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: Block broadcast_26 stored as values in memory (estimated size 7.1 KB, free 981.9 MB)
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: ensureFreeSpace(4107) called with curMem=1257611, maxMem=1030823608
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: Block broadcast_26_piece0 stored as bytes in memory (estimated size 4.0 KB, free 981.9 MB)
16/08/03 21:25:24 INFO o.a.s.s.BlockManagerInfo: Added broadcast_26_piece0 in memory on localhost:50602 (size: 4.0 KB, free: 982.0 MB)
16/08/03 21:25:24 INFO o.a.s.SparkContext: Created broadcast 26 from broadcast at DAGScheduler.scala:861
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Submitting 1 missing tasks from ResultStage 25 (MapPartitionsRDD[37] at map at SparkTransformExecutor.java:110)
16/08/03 21:25:24 INFO o.a.s.s.TaskSchedulerImpl: Adding task set 25.0 with 1 tasks
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Starting task 0.0 in stage 25.0 (TID 50, localhost, PROCESS_LOCAL, 2092 bytes)
16/08/03 21:25:24 INFO o.a.s.e.Executor: Running task 0.0 in stage 25.0 (TID 50)
16/08/03 21:25:24 INFO o.a.s.s.BlockManager: Found block rdd_37_0 locally
16/08/03 21:25:24 INFO o.a.s.e.Executor: Finished task 0.0 in stage 25.0 (TID 50). 5432 bytes result sent to driver
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Finished task 0.0 in stage 25.0 (TID 50) in 7 ms on localhost (1/1)
16/08/03 21:25:24 INFO o.a.s.s.TaskSchedulerImpl: Removed TaskSet 25.0, whose tasks have all completed, from pool
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: ResultStage 25 (take at Analysis.java:135) finished in 0,007 s
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Job 20 finished: take at Analysis.java:135, took 0,016261 s
16/08/03 21:25:24 INFO o.a.s.SparkContext: Starting job: aggregate at AnalyzeSpark.java:361
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Got job 21 (aggregate at AnalyzeSpark.java:361) with 2 output partitions
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Final stage: ResultStage 26(aggregate at AnalyzeSpark.java:361)
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Parents of final stage: List()
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Missing parents: List()
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Submitting ResultStage 26 (MapPartitionsRDD[37] at map at SparkTransformExecutor.java:110), which has no missing parents
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: ensureFreeSpace(8024) called with curMem=1261718, maxMem=1030823608
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: Block broadcast_27 stored as values in memory (estimated size 7.8 KB, free 981.9 MB)
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: ensureFreeSpace(4488) called with curMem=1269742, maxMem=1030823608
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: Block broadcast_27_piece0 stored as bytes in memory (estimated size 4.4 KB, free 981.9 MB)
16/08/03 21:25:24 INFO o.a.s.s.BlockManagerInfo: Added broadcast_27_piece0 in memory on localhost:50602 (size: 4.4 KB, free: 982.0 MB)
16/08/03 21:25:24 INFO o.a.s.SparkContext: Created broadcast 27 from broadcast at DAGScheduler.scala:861
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Submitting 2 missing tasks from ResultStage 26 (MapPartitionsRDD[37] at map at SparkTransformExecutor.java:110)
16/08/03 21:25:24 INFO o.a.s.s.TaskSchedulerImpl: Adding task set 26.0 with 2 tasks
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Starting task 0.0 in stage 26.0 (TID 51, localhost, PROCESS_LOCAL, 2092 bytes)
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Starting task 1.0 in stage 26.0 (TID 52, localhost, PROCESS_LOCAL, 2092 bytes)
16/08/03 21:25:24 INFO o.a.s.e.Executor: Running task 0.0 in stage 26.0 (TID 51)
16/08/03 21:25:24 INFO o.a.s.e.Executor: Running task 1.0 in stage 26.0 (TID 52)
16/08/03 21:25:24 INFO o.a.s.s.BlockManager: Found block rdd_37_0 locally
16/08/03 21:25:24 INFO o.a.s.s.BlockManager: Found block rdd_37_1 locally
16/08/03 21:25:24 INFO o.a.s.e.Executor: Finished task 0.0 in stage 26.0 (TID 51). 3723 bytes result sent to driver
16/08/03 21:25:24 INFO o.a.s.e.Executor: Finished task 1.0 in stage 26.0 (TID 52). 3723 bytes result sent to driver
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Finished task 0.0 in stage 26.0 (TID 51) in 29 ms on localhost (1/2)
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Finished task 1.0 in stage 26.0 (TID 52) in 28 ms on localhost (2/2)
16/08/03 21:25:24 INFO o.a.s.s.TaskSchedulerImpl: Removed TaskSet 26.0, whose tasks have all completed, from pool
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: ResultStage 26 (aggregate at AnalyzeSpark.java:361) finished in 0,030 s
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Job 21 finished: aggregate at AnalyzeSpark.java:361, took 0,039593 s
16/08/03 21:25:24 INFO o.a.s.SparkContext: Starting job: aggregate at AnalyzeSpark.java:495
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Got job 22 (aggregate at AnalyzeSpark.java:495) with 2 output partitions
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Final stage: ResultStage 27(aggregate at AnalyzeSpark.java:495)
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Parents of final stage: List()
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Missing parents: List()
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Submitting ResultStage 27 (MapPartitionsRDD[37] at map at SparkTransformExecutor.java:110), which has no missing parents
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: ensureFreeSpace(8400) called with curMem=1274230, maxMem=1030823608
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: Block broadcast_28 stored as values in memory (estimated size 8.2 KB, free 981.8 MB)
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: ensureFreeSpace(4625) called with curMem=1282630, maxMem=1030823608
16/08/03 21:25:24 INFO o.a.s.s.MemoryStore: Block broadcast_28_piece0 stored as bytes in memory (estimated size 4.5 KB, free 981.8 MB)
16/08/03 21:25:24 INFO o.a.s.s.BlockManagerInfo: Added broadcast_28_piece0 in memory on localhost:50602 (size: 4.5 KB, free: 982.0 MB)
16/08/03 21:25:24 INFO o.a.s.SparkContext: Created broadcast 28 from broadcast at DAGScheduler.scala:861
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Submitting 2 missing tasks from ResultStage 27 (MapPartitionsRDD[37] at map at SparkTransformExecutor.java:110)
16/08/03 21:25:24 INFO o.a.s.s.TaskSchedulerImpl: Adding task set 27.0 with 2 tasks
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Starting task 0.0 in stage 27.0 (TID 53, localhost, PROCESS_LOCAL, 2092 bytes)
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Starting task 1.0 in stage 27.0 (TID 54, localhost, PROCESS_LOCAL, 2092 bytes)
16/08/03 21:25:24 INFO o.a.s.e.Executor: Running task 0.0 in stage 27.0 (TID 53)
16/08/03 21:25:24 INFO o.a.s.e.Executor: Running task 1.0 in stage 27.0 (TID 54)
16/08/03 21:25:24 INFO o.a.s.s.BlockManager: Found block rdd_37_0 locally
16/08/03 21:25:24 INFO o.a.s.s.BlockManager: Found block rdd_37_1 locally
16/08/03 21:25:24 INFO o.a.s.e.Executor: Finished task 0.0 in stage 27.0 (TID 53). 7158 bytes result sent to driver
16/08/03 21:25:24 INFO o.a.s.e.Executor: Finished task 1.0 in stage 27.0 (TID 54). 7158 bytes result sent to driver
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Finished task 0.0 in stage 27.0 (TID 53) in 20 ms on localhost (1/2)
16/08/03 21:25:24 INFO o.a.s.s.TaskSetManager: Finished task 1.0 in stage 27.0 (TID 54) in 21 ms on localhost (2/2)
16/08/03 21:25:24 INFO o.a.s.s.TaskSchedulerImpl: Removed TaskSet 27.0, whose tasks have all completed, from pool
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: ResultStage 27 (aggregate at AnalyzeSpark.java:495) finished in 0,023 s
16/08/03 21:25:24 INFO o.a.s.s.DAGScheduler: Job 22 failed: aggregate at AnalyzeSpark.java:495, took 0,036230 s
Exception in thread "main" org.apache.spark.SparkDriverExecutionException: Execution error
at org.apache.spark.scheduler.DAGScheduler.handleTaskCompletion(DAGScheduler.scala:1024)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1493)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1458)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1447)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:567)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1822)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1942)
at org.apache.spark.rdd.RDD$$anonfun$aggregate$1.apply(RDD.scala:1078)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:108)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:306)
at org.apache.spark.rdd.RDD.aggregate(RDD.scala:1071)
at org.apache.spark.api.java.JavaRDDLike$class.aggregate(JavaRDDLike.scala:416)
at org.apache.spark.api.java.AbstractJavaRDDLike.aggregate(JavaRDDLike.scala:47)
at org.datavec.spark.transform.AnalyzeSpark.analyze(AnalyzeSpark.java:495)
at org.datavec.spark.transform.AnalyzeSpark.analyze(AnalyzeSpark.java:123)
at kaggle.Analysis.main(Analysis.java:137)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:483)
at com.intellij.rt.execution.application.AppMain.main(AppMain.java:144)
Caused by: java.lang.NullPointerException
at org.datavec.spark.transform.analysis.histogram.HistogramCombineFunction.call(HistogramCombineFunction.java:38)
at org.datavec.spark.transform.analysis.histogram.HistogramCombineFunction.call(HistogramCombineFunction.java:28)
at org.apache.spark.api.java.JavaPairRDD$$anonfun$toScalaFunction2$1.apply(JavaPairRDD.scala:1024)
at org.apache.spark.rdd.RDD$$anonfun$aggregate$1$$anonfun$22.apply(RDD.scala:1077)
at org.apache.spark.rdd.RDD$$anonfun$aggregate$1$$anonfun$22.apply(RDD.scala:1077)
at org.apache.spark.scheduler.JobWaiter.taskSucceeded(JobWaiter.scala:56)
at org.apache.spark.scheduler.DAGScheduler.handleTaskCompletion(DAGScheduler.scala:1020)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1493)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1458)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1447)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
16/08/03 21:25:24 INFO o.a.s.SparkContext: Invoking stop() from shutdown hook
16/08/03 21:25:25 INFO o.a.s.u.SparkUI: Stopped Spark web UI at http://192.168.0.5:4040
16/08/03 21:25:25 INFO o.a.s.s.DAGScheduler: Stopping DAGScheduler
16/08/03 21:25:25 INFO o.a.s.MapOutputTrackerMasterEndpoint: MapOutputTrackerMasterEndpoint stopped!
16/08/03 21:25:25 INFO o.a.s.s.MemoryStore: MemoryStore cleared
16/08/03 21:25:25 INFO o.a.s.s.BlockManager: BlockManager stopped
16/08/03 21:25:25 INFO o.a.s.s.BlockManagerMaster: BlockManagerMaster stopped
16/08/03 21:25:25 INFO o.a.s.s.OutputCommitCoordinator$OutputCommitCoordinatorEndpoint: OutputCommitCoordinator stopped!
16/08/03 21:25:25 INFO o.a.s.SparkContext: Successfully stopped SparkContext
16/08/03 21:25:25 INFO o.a.s.u.ShutdownHookManager: Shutdown hook called
16/08/03 21:25:25 INFO o.a.s.u.ShutdownHookManager: Deleting directory /private/var/folders/tk/2t_ll0bs5ml458g35b118h3r0000gp/T/spark-2a892f8e-3eb9-4a60-ba0c-fb661129ba9e
Process finished with exit code 1
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.danlin</groupId>
<artifactId>dl4j</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<nd4j.backend>nd4j-native</nd4j.backend>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<shadedClassifier>bin</shadedClassifier>
<java.version>1.7</java.version>
<nd4j.version>0.5.0</nd4j.version>
<dl4j.version>0.5.0</dl4j.version>
<canova.version>0.0.0.17</canova.version>
<arbiter.version>0.5.0</arbiter.version>
<guava.version>19.0</guava.version>
<jfreechart.version>1.0.13</jfreechart.version>
<maven-shade-plugin.version>2.4.3</maven-shade-plugin.version>
<exec-maven-plugin.version>1.4.0</exec-maven-plugin.version>
<datavec.version>0.5.0</datavec.version>
<scala.binary.version>2.10</scala.binary.version>
</properties>
<dependencyManagement>
<dependencies>
<dependency>
<groupId>org.nd4j</groupId>
<artifactId>nd4j-native</artifactId>
<version>${nd4j.version}</version>
</dependency>
<dependency>
<groupId>org.nd4j</groupId>
<artifactId>nd4j-cuda-7.5</artifactId>
<version>${nd4j.version}</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
<version>2.4.4</version>
</dependency>
</dependencies>
</dependencyManagement>
<dependencies>
<dependency>
<groupId>org.deeplearning4j</groupId>
<artifactId>deeplearning4j-nlp</artifactId>
<version>${dl4j.version}</version>
</dependency>
<dependency>
<groupId>org.deeplearning4j</groupId>
<artifactId>deeplearning4j-core</artifactId>
<version>${dl4j.version}</version>
</dependency>
<dependency>
<groupId>org.deeplearning4j</groupId>
<artifactId>deeplearning4j-ui</artifactId>
<version>${dl4j.version}</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>${guava.version}</version>
</dependency>
<dependency>
<groupId>org.nd4j</groupId>
<artifactId>${nd4j.backend}</artifactId>
</dependency>
<dependency>
<artifactId>canova-nd4j-image</artifactId>
<groupId>org.nd4j</groupId>
<version>${canova.version}</version>
</dependency>
<dependency>
<artifactId>canova-nd4j-codec</artifactId>
<groupId>org.nd4j</groupId>
<version>${canova.version}</version>
</dependency>
<!-- Used in the RegressionMathFunctions example -->
<!--<dependency>-->
<!--<groupId>jfree</groupId>-->
<!--<artifactId>jfreechart</artifactId>-->
<!--<version>${jfreechart.version}</version>-->
<!--</dependency>-->
<!-- Arbiter: used for hyperparameter optimization examples -->
<dependency>
<groupId>org.deeplearning4j</groupId>
<artifactId>arbiter-deeplearning4j</artifactId>
<version>${arbiter.version}</version>
</dependency>
<dependency>
<groupId>org.datavec</groupId>
<artifactId>datavec-api</artifactId>
<version>${datavec.version}</version>
</dependency>
<dependency>
<groupId>org.datavec</groupId>
<artifactId>datavec-spark_${scala.binary.version}</artifactId>
<version>${datavec.version}</version>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
<exclusion>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-mllib_2.10</artifactId>
<version>2.0.0</version>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
<exclusion>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>exec-maven-plugin</artifactId>
<version>${exec-maven-plugin.version}</version>
<executions>
<execution>
<goals>
<goal>exec</goal>
</goals>
</execution>
</executions>
<configuration>
<executable>java</executable>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>${maven-shade-plugin.version}</version>
<configuration>
<shadedArtifactAttached>true</shadedArtifactAttached>
<shadedClassifierName>${shadedClassifier}</shadedClassifierName>
<createDependencyReducedPom>true</createDependencyReducedPom>
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>org/datanucleus/**</exclude>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
</excludes>
</filter>
</filters>
</configuration>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<transformers>
<transformer
implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
<resource>reference.conf</resource>
</transformer>
<transformer
implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
<transformer
implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
</transformer>
</transformers>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.5.1</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
</plugins>
</build>
</project>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment