Skip to content

Instantly share code, notes, and snippets.

@sebge2emasphere
Created July 30, 2018 11:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sebge2emasphere/610e9c4a94fa4208a8ee341cea326dfd to your computer and use it in GitHub Desktop.
Save sebge2emasphere/610e9c4a94fa4208a8ee341cea326dfd to your computer and use it in GitHub Desktop.
package com.emasphere.poc.parquetspark.sample;
import com.emasphere.poc.parquetspark.ParquetCsvImporter;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SQLContext;
import org.apache.spark.sql.functions;
import java.math.BigDecimal;
/**
* @author Sebastien Gerard
*/
public class Sample01 {
public static BigDecimal compute(JavaSparkContext context) {
final Dataset<Row> agg = SQLContext
.getOrCreate(context.sc())
.read()
.parquet(ParquetCsvImporter.LOCATION)
.select(
"montant", "code_vendeur", "datefact"
)
.filter(
"code_vendeur like '%JVA%' and datefact >= date '2018-01-01' and datefact <= date '2018-06-30'"
)
.agg(
functions.sum("montant")
);
agg.explain();
return agg
.first()
.getDecimal(0);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment