Skip to content

Instantly share code, notes, and snippets.

@alexy
Created January 12, 2012 22:09
Show Gist options
  • Save alexy/1603447 to your computer and use it in GitHub Desktop.
Save alexy/1603447 to your computer and use it in GitHub Desktop.
Initialize a Hadoop job from Scala
package com.klout.labs.braver.util
import org.apache.hadoop.mapreduce.Job
import org.apache.hadoop.conf.Configuration
object Hadoop {
def setJobConfig(name: String, jobClass: Class[_], outputClasses: Option[Tuple2[Class[_],Class[_]]]) {
val conf = new Configuration()
val job = new Job(conf, name)
val settings = List(
("mapred.output.compress", "false"),
("mapred.compress.map.output", "true"),
("mapred.map.output.compression.codec", "com.hadoop.compression.lzo.LzoCodec"),
("mapred.output.compression.codec", "com.hadoop.compression.lzo.LzoCodec"),
("mapred.output.compression.type", "BLOCK")
)
// configuration.set("today", "20120103")
// configuration.set("fs.default.name", "hdfs://dewhnn:8020/")
// val fs: FileSystem = DistributedFileSystem.newInstance(configuration)
// TODO will we see the changes to conf propagate?
val jc = job.getConfiguration
for ((k,v) <- settings) { jc.set(k,v) }
job.setJarByClass(jobClass)
outputClasses.foreach { case (outputKeyClass, outputValueClass) =>
job.setOutputKeyClass(outputKeyClass)
job.setOutputValueClass(outputValueClass)
}
}
}
// usage:
def main(args: Array[String]) = withHadoopArgs(args) { a =>
setJobConfig("Scoobi Score Dump", this.getClass, Some((classOf[Text], classOf[Text])))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment