Skip to content

Instantly share code, notes, and snippets.

@ldacosta
Created February 15, 2017 03:56
Show Gist options
  • Save ldacosta/e85d3b96c76ddda101d2a2c3c3cab457 to your computer and use it in GitHub Desktop.
Save ldacosta/e85d3b96c76ddda101d2a2c3c3cab457 to your computer and use it in GitHub Desktop.
def summarize2(dsSEMUnionClean: Dataset[SEMUnionClean]): Dataset[SEMUnionClean] = {
import dsSEMUnionClean.sparkSession.implicits._
import org.apache.spark.sql.expressions.scalalang.typed.{
count => typedCount,
sum => typedSum
}
val xxx =
dsSEMUnionClean.groupByKey(r => (r.account_descriptive_name, r.advertiser_id, r.advertiser_name, r.campaign_id, r.campaign_name, r.source))
.agg(
// typedSum[SEMUnionClean](_.assists).name("sum(assists)"),
typedSum[SEMUnionClean](_.clicks).name("sum(clicks)"),
typedSum[SEMUnionClean](_.conversions).name("sum(conversions)"),
typedSum[SEMUnionClean](_.conversions_many_per_click).name("sum(conversions_many_per_click)"),
typedSum[SEMUnionClean](_.impressions).name("sum(depth)")
)
val zzz = xxx.joinWith(xxx, xxx.col("_1") === xxx.col("_1"))
val yyy = xxx.map {
case ((accountName, advId, advName, campaignId, campaignName, src), howManyAssists, sumOfImpressions) =>
}
// .withColumnRenamed("value", "group")
// .alias("Summary by color level")
// .show()
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment