Skip to content

Instantly share code, notes, and snippets.

Created December 11, 2017 06:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save anonymous/1f2e382fdda002580154b5c43fbe9b3a to your computer and use it in GitHub Desktop.
Save anonymous/1f2e382fdda002580154b5c43fbe9b3a to your computer and use it in GitHub Desktop.
import com.tdunning.math.stats.TDigest;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.VoidFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
public class BatchProcess {
public static void main(String[] args) {
String master = "local";
String appName = "java_client";
startProcess(master, appName);
}
public static void startProcess(String master, String appName) {
SparkSession session = SparkSession.builder().master(master).appName(appName).getOrCreate();
Dataset<Row> dataSet = session.read().option("header", "true").option("inferSchema", "true").csv("J:\\csv_path\\T_EN");
JavaRDD<Integer> rdd = dataSet.javaRDD().map(
new Function<Row, Integer>() {
public Integer call(Row r) {
return (Integer) r.getAs("Duration");
}
});
TDigest totalDigest = TDigest.createDigest(100);
rdd.foreach(new VoidFunction<Integer>() {
public void call(Integer value) {
System.out.println(value);
totalDigest.add(value);
}
});
System.out.println("tdigest size " + totalDigest.size());
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment