Skip to content

Instantly share code, notes, and snippets.

@tomtau
Created September 5, 2013 15:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tomtau/6451497 to your computer and use it in GitHub Desktop.
Save tomtau/6451497 to your computer and use it in GitHub Desktop.
Quick and dirty script for generating jobs in the old and new Matrix API as well as corresponding TSV
package tsvgen
import java.io._
import scala.util.Random
object TsvGenerator {
def generateTsv(matrixParam: (String, Long, Long)): Unit = {
val (filename, rows, cols) = matrixParam
val writer = new PrintWriter(new File(filename))
var col = 0L
var row = 0L
while (row < rows) {
while (col < cols) {
writer.print(row)
writer.print("\t")
writer.print(col)
writer.print("\t")
writer.print(Random.nextDouble())
writer.println()
col += 1
}
col = 0L
row += 1
}
writer.close()
}
val OLD_HEADER = """
import com.twitter.scalding._
import com.twitter.scalding.mathematics.Matrix
class BenchOldJob(args : Args) extends Job(args) {
import Matrix._
"""
val NEW_HEADER = """
import com.twitter.scalding._
import cascading.pipe.joiner._
import com.twitter.scalding.mathematics.Matrix2
import com.twitter.scalding.mathematics.FiniteHint
import com.twitter.algebird.Group
class BenchNewJob(args : Args) extends Job(args) {
import Matrix2._
import com.twitter.scalding.mathematics.MatrixLiteral
import cascading.pipe.Pipe
import cascading.tuple.Fields
import com.twitter.scalding.TDsl._
"""
val MAT = "a"
val BASE = "/home/ttauber/scalding-bench/" + MAT
val COMMAND = "/home/ttauber/scalding/scripts/scald.rb --local " + BASE
def generatePairs(dims: List[Long]): List[(String, Long, Long)] = {
val dimPairs = dims zip dims.tail
for {
dim <- dimPairs
} yield (BASE + dimPairs.indexOf(dim) + ".tsv", dim._1, dim._2)
}
def readOld(matrixParam: (String, Long, Long), index: Int): String = "\tval " + MAT + index +
" = Tsv(\"" + matrixParam._1 + "\", ('x, 'y, 'v) )\n\t.read.\n\ttoMatrix[Int,Int,Double]('x,'y,'v)\n"
def multOldChain(len: Int, outname: String): String = {
val chains = for {
i <- 0 to len - 1
} yield MAT + i
"(" + (chains.reduce((x, y) => x + " * " + y)) + ").write( Tsv(\"" + BASE + outname + ".tsv\"))\n}"
}
def generateOldJob(mats: List[(String, Long, Long)]): Unit = {
val readingMats = mats.map(x => readOld(x, mats.indexOf(x))).reduce((x, y) => x + "\n\n" + y)
val result = OLD_HEADER + readingMats + multOldChain(mats.length, "old")
val writer = new PrintWriter(new File(BASE + "old" + ".scala"))
writer.write(result)
writer.close()
val exec = COMMAND + "old.scala" + mats.map(x => " --input " + BASE + mats.indexOf(x) + ".tsv").reduce( (x, y) => x ++ y) + " --output " + BASE + "o.tsv"
System.out.println(exec + " > /dev/null 2>&1")
}
def readNew(matrixParam: (String, Long, Long), index: Int): String = "\tval p" + index +
" = Tsv(\"" + matrixParam._1 + "\", ('x, 'y, 'v) )\n\t.read\n\nval tp" + index + " = p" + index +
".toTypedPipe[(Int, Int, Double)](('x,'y,'v))\n" +
"val " + MAT + index + " = MatrixLiteral(tp" + index + ", FiniteHint(" + matrixParam._2 + ", " +
matrixParam._3 + "))\n\n"
def multNewChain(len: Int, outname: String): String = {
val chains = for {
i <- 0 to len - 1
} yield MAT + i
"(" + (chains.reduce((x, y) => x + " * " + y)) + ").toTypedPipe.write( TypedTsv[(Int, Int, Double)](\"" + BASE + outname + ".tsv\"))\n}"
}
def generateNewJob(mats: List[(String, Long, Long)]): Unit = {
val readingMats = mats.map(x => readNew(x, mats.indexOf(x))).reduce((x, y) => x + "\n\n" + y)
val result = NEW_HEADER + readingMats + multNewChain(mats.length, "new")
val writer = new PrintWriter(new File(BASE + "new" + ".scala"))
writer.write(result)
writer.close()
val exec = COMMAND + "new.scala" + mats.map(x => " --input " + BASE + mats.indexOf(x) + ".tsv").reduce( (x, y) => x ++ y) + " --output " + BASE + "o.tsv"
System.out.println(exec + " > /dev/null 2>&1")
}
def graphOldChain(len: Int, outname: String): String = {
val chains = for {
i <- 0 to len - 1
} yield MAT + "0"
"(" + (chains.reduce((x, y) => x + " * " + y)) + ").write( Tsv(\"" + BASE + outname + ".tsv\"))\n}"
}
def generateOldGraphJob(mat: (String, Long, Long), len: Int): Unit = {
val readingMats = readOld(mat, 0)
val result = OLD_HEADER + readingMats + graphOldChain(len, "old")
val writer = new PrintWriter(new File(BASE + "old" + ".scala"))
writer.write(result)
writer.close()
val exec = COMMAND + "old.scala" + " --input " + BASE + "0.tsv --output " + BASE + "o.tsv"
System.out.println(exec + " > /dev/null 2>&1")
}
def graphNewChain(len: Int, outname: String): String = {
"("+ MAT + "0 ^ 8).toTypedPipe.write( TypedTsv[(Int, Int, Double)](\"" + BASE + outname + ".tsv\"))\n}"
}
def generateNewGraphJob(mat: (String, Long, Long), len: Int): Unit = {
val readingMats = readNew(mat, 0)
val result = NEW_HEADER + readingMats + graphNewChain(len, "new")
val writer = new PrintWriter(new File(BASE + "new" + ".scala"))
writer.write(result)
writer.close()
val exec = COMMAND + "new.scala" + " --input " + BASE + "0.tsv --output " + BASE + "o.tsv"
System.out.println(exec + " > /dev/null 2>&1")
}
def main(args: Array[String]): Unit = {
/*
val dimensions = generatePairs(List(35L,15L,5L,10L,200L,250L))
dimensions.foreach(param => generateTsv(param))
generateOldJob(dimensions)
generateNewJob(dimensions)*/
val mat = (BASE + "0.tsv", 100L, 100L)
generateTsv(mat)
generateOldGraphJob(mat, 8)
generateNewGraphJob(mat, 8)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment