Skip to content

Instantly share code, notes, and snippets.

@krishnanraman
Created July 26, 2014 00:01
Show Gist options
  • Save krishnanraman/739f4f753f77400abb71 to your computer and use it in GitHub Desktop.
Save krishnanraman/739f4f753f77400abb71 to your computer and use it in GitHub Desktop.
typed vs fields
import com.twitter.scalding._
import TDsl._
class mult(args:Args) extends Job(args) {
def mkRow(columns:Int, dominant:Int):Seq[Double] = Seq.tabulate[Double](columns)(i=> if (i==dominant) 5+math.random*10 else math.random)
val n:Int = args("rows").toInt
/* Typed - FAILS :( OOM for n as small as 10,000
TypedPipe.from(0 to n)
.map{
i=> (i, mkRow(n, i))
}
.write(TypedTsv[(Int, Seq[Double])]("typed"))
*/
// Fields To Typed - WORKS
IterableSource((0 until n), 'a)
.read
.toTypedPipe[Int]('a)
.map {
i=> (i, mkRow(n, i))
}
.write(TypedTsv[(Int, Seq[Double])]("typed"))
/* Fields API - WORKS !!!
IterableSource((0 until n), 'a)
.read
.map('a -> 'b){
i:Int =>
mkRow(n, i)
}
.write(Tsv("fields"))
*/
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment