Skip to content

Instantly share code, notes, and snippets.

@krishnanraman
Created February 12, 2013 01:54
Show Gist options
  • Save krishnanraman/4759444 to your computer and use it in GitHub Desktop.
Save krishnanraman/4759444 to your computer and use it in GitHub Desktop.
PropTest
/////////// INPUT PIPE = props.txt, Columns: date, displaylocation, engagements ///////////
2/1 1 10
2/1 1 20
2/1 2 30
2/1 2 10
2/1 3 20
2/1 4 10
2/2 1 10
2/2 2 20
2/2 2 15
2/2 3 20
2/2 3 25
2/2 4 10
///////// RESULT PIPE = resultprop.txt, Columns: date, displayloc, proportion of engagements /////////
2/1 1 0.3
2/1 2 0.4
2/1 3 0.2
2/1 4 0.1
2/2 1 0.1
2/2 2 0.35
2/2 3 0.45
2/2 4 0.1
///// CODE
import com.twitter.scalding._
import cascading.pipe.Pipe
import cascading.pipe.joiner._
class PropTest(args : Args) extends Job(args) {
val pipe = TextLine("props.txt")
.read
.map('line -> ('date, 'displayloc, 'engagements)){
line:String =>
val arr = line.split("\\s")
(arr(0), arr(1), arr(2))
}.project( 'date, 'displayloc, 'engagements)
/////////// abstract below ??
val num = pipe.groupBy('date,'displayloc){
g => g.sum('engagements->'engagements1)
}
val denom = pipe.groupBy('date) {
g=>g.sum('engagements->'engagements2)
}
val res = num.joinWithSmaller(('date)->('date),denom)
.mapTo(('date,'displayloc, 'engagements1, 'engagements2)->('date,'displayloc,'proportion)){
tuple:(String,Int,Int,Int) =>
val (date,displayloc, num, denom) = tuple
val prop = num/(denom+0.0d)
(date,displayloc,prop)
}
////////////// end of abstraction ////////
res.write(Tsv("resultprop"))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment