Skip to content

Instantly share code, notes, and snippets.

@pomadchin
Last active December 28, 2015 13:09
Show Gist options
  • Save pomadchin/7505737 to your computer and use it in GitHub Desktop.
Save pomadchin/7505737 to your computer and use it in GitHub Desktop.
Hadoop practice, a small converter for matrix.
/*
@daunnc, stackoverflow: http://stackoverflow.com/questions/20020967/transforming-matrix-format-scalding-solved
converts
1 2 3
3 4 5
6 7 8
to
1 1 1
1 2 2
1 3 3
2 1 3
2 2 4
2 3 5
3 1 5
3 2 6
3 3 7
*/
import com.twitter.scalding._
//import com.twitter.scalding.FunctionImplicits._ to use implicit -- after comments
class convertMatrixJob(args: Args) extends Job(args) {
var prev: Long = 0
var pos: Long = 1
val zeroInt = 0
val zeroDouble = 0.0
TextLine(args("input"))
.flatMap('line -> 'number) { line : String => line.split("\\s+") }
.mapTo(('offset, 'line, 'number) -> ('row, 'col, 'val)) { res : (Long, String, String) => //(offset: Long, line: String, number: String)
val (offset, line, number) = res // del line if using implicits
pos = if(prev == (offset + 1)) pos + 1 else 1
prev = offset + 1
(offset + 1, pos, number) }
.filter('row, 'col, 'v) { line : (Long, String, String) =>
val (row, col, v) = line
(v != zeroInt.toString) && (v != zeroDouble.toString)
}
.write(Tsv(args("output")))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment