Created
September 5, 2013 15:11
-
-
Save tomtau/6451497 to your computer and use it in GitHub Desktop.
Quick and dirty script for generating jobs in the old and new Matrix API as well as corresponding TSV
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package tsvgen | |
import java.io._ | |
import scala.util.Random | |
object TsvGenerator { | |
def generateTsv(matrixParam: (String, Long, Long)): Unit = { | |
val (filename, rows, cols) = matrixParam | |
val writer = new PrintWriter(new File(filename)) | |
var col = 0L | |
var row = 0L | |
while (row < rows) { | |
while (col < cols) { | |
writer.print(row) | |
writer.print("\t") | |
writer.print(col) | |
writer.print("\t") | |
writer.print(Random.nextDouble()) | |
writer.println() | |
col += 1 | |
} | |
col = 0L | |
row += 1 | |
} | |
writer.close() | |
} | |
val OLD_HEADER = """ | |
import com.twitter.scalding._ | |
import com.twitter.scalding.mathematics.Matrix | |
class BenchOldJob(args : Args) extends Job(args) { | |
import Matrix._ | |
""" | |
val NEW_HEADER = """ | |
import com.twitter.scalding._ | |
import cascading.pipe.joiner._ | |
import com.twitter.scalding.mathematics.Matrix2 | |
import com.twitter.scalding.mathematics.FiniteHint | |
import com.twitter.algebird.Group | |
class BenchNewJob(args : Args) extends Job(args) { | |
import Matrix2._ | |
import com.twitter.scalding.mathematics.MatrixLiteral | |
import cascading.pipe.Pipe | |
import cascading.tuple.Fields | |
import com.twitter.scalding.TDsl._ | |
""" | |
val MAT = "a" | |
val BASE = "/home/ttauber/scalding-bench/" + MAT | |
val COMMAND = "/home/ttauber/scalding/scripts/scald.rb --local " + BASE | |
def generatePairs(dims: List[Long]): List[(String, Long, Long)] = { | |
val dimPairs = dims zip dims.tail | |
for { | |
dim <- dimPairs | |
} yield (BASE + dimPairs.indexOf(dim) + ".tsv", dim._1, dim._2) | |
} | |
def readOld(matrixParam: (String, Long, Long), index: Int): String = "\tval " + MAT + index + | |
" = Tsv(\"" + matrixParam._1 + "\", ('x, 'y, 'v) )\n\t.read.\n\ttoMatrix[Int,Int,Double]('x,'y,'v)\n" | |
def multOldChain(len: Int, outname: String): String = { | |
val chains = for { | |
i <- 0 to len - 1 | |
} yield MAT + i | |
"(" + (chains.reduce((x, y) => x + " * " + y)) + ").write( Tsv(\"" + BASE + outname + ".tsv\"))\n}" | |
} | |
def generateOldJob(mats: List[(String, Long, Long)]): Unit = { | |
val readingMats = mats.map(x => readOld(x, mats.indexOf(x))).reduce((x, y) => x + "\n\n" + y) | |
val result = OLD_HEADER + readingMats + multOldChain(mats.length, "old") | |
val writer = new PrintWriter(new File(BASE + "old" + ".scala")) | |
writer.write(result) | |
writer.close() | |
val exec = COMMAND + "old.scala" + mats.map(x => " --input " + BASE + mats.indexOf(x) + ".tsv").reduce( (x, y) => x ++ y) + " --output " + BASE + "o.tsv" | |
System.out.println(exec + " > /dev/null 2>&1") | |
} | |
def readNew(matrixParam: (String, Long, Long), index: Int): String = "\tval p" + index + | |
" = Tsv(\"" + matrixParam._1 + "\", ('x, 'y, 'v) )\n\t.read\n\nval tp" + index + " = p" + index + | |
".toTypedPipe[(Int, Int, Double)](('x,'y,'v))\n" + | |
"val " + MAT + index + " = MatrixLiteral(tp" + index + ", FiniteHint(" + matrixParam._2 + ", " + | |
matrixParam._3 + "))\n\n" | |
def multNewChain(len: Int, outname: String): String = { | |
val chains = for { | |
i <- 0 to len - 1 | |
} yield MAT + i | |
"(" + (chains.reduce((x, y) => x + " * " + y)) + ").toTypedPipe.write( TypedTsv[(Int, Int, Double)](\"" + BASE + outname + ".tsv\"))\n}" | |
} | |
def generateNewJob(mats: List[(String, Long, Long)]): Unit = { | |
val readingMats = mats.map(x => readNew(x, mats.indexOf(x))).reduce((x, y) => x + "\n\n" + y) | |
val result = NEW_HEADER + readingMats + multNewChain(mats.length, "new") | |
val writer = new PrintWriter(new File(BASE + "new" + ".scala")) | |
writer.write(result) | |
writer.close() | |
val exec = COMMAND + "new.scala" + mats.map(x => " --input " + BASE + mats.indexOf(x) + ".tsv").reduce( (x, y) => x ++ y) + " --output " + BASE + "o.tsv" | |
System.out.println(exec + " > /dev/null 2>&1") | |
} | |
def graphOldChain(len: Int, outname: String): String = { | |
val chains = for { | |
i <- 0 to len - 1 | |
} yield MAT + "0" | |
"(" + (chains.reduce((x, y) => x + " * " + y)) + ").write( Tsv(\"" + BASE + outname + ".tsv\"))\n}" | |
} | |
def generateOldGraphJob(mat: (String, Long, Long), len: Int): Unit = { | |
val readingMats = readOld(mat, 0) | |
val result = OLD_HEADER + readingMats + graphOldChain(len, "old") | |
val writer = new PrintWriter(new File(BASE + "old" + ".scala")) | |
writer.write(result) | |
writer.close() | |
val exec = COMMAND + "old.scala" + " --input " + BASE + "0.tsv --output " + BASE + "o.tsv" | |
System.out.println(exec + " > /dev/null 2>&1") | |
} | |
def graphNewChain(len: Int, outname: String): String = { | |
"("+ MAT + "0 ^ 8).toTypedPipe.write( TypedTsv[(Int, Int, Double)](\"" + BASE + outname + ".tsv\"))\n}" | |
} | |
def generateNewGraphJob(mat: (String, Long, Long), len: Int): Unit = { | |
val readingMats = readNew(mat, 0) | |
val result = NEW_HEADER + readingMats + graphNewChain(len, "new") | |
val writer = new PrintWriter(new File(BASE + "new" + ".scala")) | |
writer.write(result) | |
writer.close() | |
val exec = COMMAND + "new.scala" + " --input " + BASE + "0.tsv --output " + BASE + "o.tsv" | |
System.out.println(exec + " > /dev/null 2>&1") | |
} | |
def main(args: Array[String]): Unit = { | |
/* | |
val dimensions = generatePairs(List(35L,15L,5L,10L,200L,250L)) | |
dimensions.foreach(param => generateTsv(param)) | |
generateOldJob(dimensions) | |
generateNewJob(dimensions)*/ | |
val mat = (BASE + "0.tsv", 100L, 100L) | |
generateTsv(mat) | |
generateOldGraphJob(mat, 8) | |
generateNewGraphJob(mat, 8) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment