blmarket/HBaseNoob.scala

## build.sbt
name := "HelloWorld"

version := "0.0.1"

scalaVersion := "2.9.2"

scalacOptions ++= Seq("-Ydependent-method-types", "-deprecation")

libraryDependencies ++= Seq(
  "org.apache.hbase" % "hbase" % "0.94.6-cdh4.3.0",
  "org.apache.hadoop" % "hadoop-common" % "2.0.0-cdh4.3.0",
  "org.apache.hadoop" % "hadoop-client" % "2.0.0-cdh4.3.0"
)

resolvers ++= Seq(
  "Hadoop Releases" at "https://repository.cloudera.com/content/repositories/releases/",
  "Cloudera" at "https://repository.cloudera.com/artifactory/public/"
)

unmanagedJars in Compile <++= baseDirectory map { base =>
  val baseJars = (base ** "*.jar")
  (baseJars +++ file("/etc/hbase/conf") +++ file("/etc/hadoop/conf")).classpath
}

// libraryDependencies += "com.nicta" %% "scoobi" % "0.7.0-cdh4-SNAPSHOT"
// resolvers ++= Seq("nicta's avro" at "http://nicta.github.com/scoobi/releases",
//                   "sonatype snapshots" at "http://oss.sonatype.org/content/repositories/snapshots",
//                   "cloudera" at "https://repository.cloudera.com/content/repositories/releases")

## HBaseNoob.scala
// run this with sbt run

import org.apache.hadoop.hbase._
import org.apache.hadoop.hbase.client._
import org.apache.hadoop.hbase.util._
import org.apache.hadoop.hbase.client.{HBaseAdmin,Scan,Result}
import org.apache.hadoop.hbase.mapreduce.{TableMapper,TableMapReduceUtil}
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.io.ImmutableBytesWritable

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.mapreduce.Job
import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat
import org.apache.hadoop.io.{Text,NullWritable}

object HBaseNoob {
  def main(args: Array[String]) {
    val conf = HBaseConfiguration.create()
    val admin = new HBaseAdmin(conf)

    // admin.listTables().foreach(println) // listing table... works
    val table = new HTable(conf, "TestTable")
    val put = new Put("test-key".getBytes())
    put.add("info".getBytes(), "q".getBytes(), "value".getBytes())
    table.put(put)

    println("HelloWorld")
  }
}


## HelloWorld.scala
// $ . runner.sh beforehand, run by hadoop jar {jarfile} HelloWorld

import org.apache.hadoop.hbase.client.{HBaseAdmin,Scan,Result}
import org.apache.hadoop.hbase.mapreduce.{TableMapper,TableMapReduceUtil}
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.io.ImmutableBytesWritable

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.mapreduce.Job
import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat
import org.apache.hadoop.io.{Text,NullWritable,LongWritable}

class MyMapper extends TableMapper[Text, Text] {
  def map(row: ImmutableBytesWritable, value: Result, context: Context) {
    context.write(new Text("WORLD"), new Text("HELLO"))
    // do nothing
  }
}

object HelloWorld {
  def printTables(conf: Configuration) {
    val admin = new HBaseAdmin(HBaseConfiguration.create())
    val tables = admin.listTables()
    tables.foreach(println)
  }

  def main(args: Array[String]) {
    val conf = HBaseConfiguration.create()
    val job = Job.getInstance(conf, "testJob")
    job.setJarByClass(classOf[MyMapper])

    val scan = new Scan()
    scan.setCacheBlocks(false)

    TableMapReduceUtil.initTableMapperJob(
      "item_reco_test".getBytes(),
      scan,
      classOf[MyMapper],
      classOf[ImmutableBytesWritable], classOf[Result],
      job)
    job.setOutputFormatClass(classOf[NullOutputFormat[LongWritable, LongWritable]])
    val b = job.waitForCompletion(true)
    println(b)
    println("HelloWorld")
  }
}


## NoobDriver.scala
// $ . runner.sh beforehand, run by hadoop jar {jarfile} HelloWorld

import org.apache.hadoop.hbase.client.{HBaseAdmin,Scan,Result}
import org.apache.hadoop.hbase.mapreduce.{TableMapper,TableMapReduceUtil}
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.io.ImmutableBytesWritable

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.mapreduce._
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat
import org.apache.hadoop.io.{Text,NullWritable,LongWritable,IntWritable}
import org.apache.hadoop.util._
import org.apache.hadoop.fs.Path

class NoobMapper extends Mapper[Object, Text, Text, IntWritable] {
  def map(key: Object, value: Text, context: Context) {
    context.write(value, new IntWritable(1))
  }
}

object NoobDriver {
  def main(args: Array[String]) {
    val conf = new Configuration()
    val otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs()

    val job = new Job(conf, "word count")
    job.setJarByClass(classOf[NoobMapper])
    job.setMapperClass(classOf[NoobMapper])

    job.setOutputKeyClass(classOf[Text])
    job.setOutputValueClass(classOf[IntWritable])

    FileInputFormat.addInputPath(job, new Path(otherArgs.head))
    FileOutputFormat.setOutputPath(job, new Path(otherArgs.last))

    job.waitForCompletion(true)
  }
}

## runner.sh
#!/bin/bash

# This runner was origin by https://github.com/derrickcheng/ScalaOnHadoop
# Authored by derrick

# export HADOOP_LIBS="/home/aa/projects/hadoop/hadoop/hadoop-core-1.0.3.jar:/home/aa/projects/hadoop/hadoop/lib/commons-logging-1.1.1.jar" #MODIFY THIS WITH HADOOP LIBS
export SCALA_LIBS="/usr/share/java/scala/jline.jar:/usr/share/java/scala-library-2.9.2.jar:/usr/share/java/scalap-2.9.2.jar"
export HBASE_LIBS=`hbase classpath`
# export SCALA_LIBS="${SCALA_ROOT}/jline.jar:${SCALA_ROOT}/scala-compiler.jar:${SCALA_ROOT}/scala-library.jar:${SCALA_ROOT}/scalap.jar:${SCALA_ROOT}/scalacheck.jar:${SCALA_ROOT}/scala-dbc.jar:${SCALA_ROOT}/scala-partest.jar:${SCALA_ROOT}/scala-swing.jar"

# export ALL_LIBS="${SCALA_LIBS}:${MATIO_LIBS}:${HADOOP_LIBS}:${BIDMAT_LIBS}:${JCUDA_LIBS}:${JAVA_HOME}/lib/tools.jar"

#export HADOOP_USER_CLASSPATH_FIRST="true"
export HADOOP_CLASSPATH="${SCALA_LIBS}:${HBASE_LIBS}"
#export HADOOP_CLASSPATH=lib/scala-library.jar:lib/BIDMat.jar:lib/jline-2.9.2.jar
export LIB_JARS=`echo ${HADOOP_CLASSPATH} | sed s/:/,/g`

#  if [ `uname` = "Darwin" ]; then
#          hadoop fs -put ${BIDMAT_ROOT}/lib/osx64/ osx64
#          export FILES="osx64/HDF5_Copyright.html,osx64/JCUDA5.0,osx64/JCUDA_Copyright.txt,osx64/libbidmatmkl.jnilib,osx64/libhdf4.settings,osx64/libhdf5.settings,osx64/libiomp5.dylib,osx64/libjhdf.jnilib,osx64/libjhdf5.jnilib"
#  else
#          hadoop fs -put ${BIDMAT_ROOT}/lib/linux64 linux64
#          export FILES="linux64/HDF5_Copyright.html,linux64/JCUDA4.2,linux64/JCUDA5.0,linux64/JCUDA_Copyright.txt,linux64/libbidmatmkl.so,linux64/libhdf4.settings,linux64/libhdf5.settings,linux64/libiomp5.so,linux64/libjhdf5.so,linux64/libjhdf.so"
#  fi

# hadoop jar runJar.jar -libjars ${LIB_JARS} -files ${FILES} "BIDMatExample" input output
	name := "HelloWorld"

	version := "0.0.1"

	scalaVersion := "2.9.2"

	scalacOptions ++= Seq("-Ydependent-method-types", "-deprecation")

	libraryDependencies ++= Seq(
	"org.apache.hbase" % "hbase" % "0.94.6-cdh4.3.0",
	"org.apache.hadoop" % "hadoop-common" % "2.0.0-cdh4.3.0",
	"org.apache.hadoop" % "hadoop-client" % "2.0.0-cdh4.3.0"
	)

	resolvers ++= Seq(
	"Hadoop Releases" at "https://repository.cloudera.com/content/repositories/releases/",
	"Cloudera" at "https://repository.cloudera.com/artifactory/public/"
	)

	unmanagedJars in Compile <++= baseDirectory map { base =>
	val baseJars = (base ** "*.jar")
	(baseJars +++ file("/etc/hbase/conf") +++ file("/etc/hadoop/conf")).classpath
	}

	// libraryDependencies += "com.nicta" %% "scoobi" % "0.7.0-cdh4-SNAPSHOT"
	// resolvers ++= Seq("nicta's avro" at "http://nicta.github.com/scoobi/releases",
	// "sonatype snapshots" at "http://oss.sonatype.org/content/repositories/snapshots",
	// "cloudera" at "https://repository.cloudera.com/content/repositories/releases")
	// run this with sbt run

	import org.apache.hadoop.hbase._
	import org.apache.hadoop.hbase.client._
	import org.apache.hadoop.hbase.util._
	import org.apache.hadoop.hbase.client.{HBaseAdmin,Scan,Result}
	import org.apache.hadoop.hbase.mapreduce.{TableMapper,TableMapReduceUtil}
	import org.apache.hadoop.hbase.HBaseConfiguration
	import org.apache.hadoop.hbase.io.ImmutableBytesWritable

	import org.apache.hadoop.conf.Configuration
	import org.apache.hadoop.mapreduce.Job
	import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat
	import org.apache.hadoop.io.{Text,NullWritable}

	object HBaseNoob {
	def main(args: Array[String]) {
	val conf = HBaseConfiguration.create()
	val admin = new HBaseAdmin(conf)

	// admin.listTables().foreach(println) // listing table... works
	val table = new HTable(conf, "TestTable")
	val put = new Put("test-key".getBytes())
	put.add("info".getBytes(), "q".getBytes(), "value".getBytes())
	table.put(put)

	println("HelloWorld")
	}
	}
	// $ . runner.sh beforehand, run by hadoop jar {jarfile} HelloWorld

	import org.apache.hadoop.hbase.client.{HBaseAdmin,Scan,Result}
	import org.apache.hadoop.hbase.mapreduce.{TableMapper,TableMapReduceUtil}
	import org.apache.hadoop.hbase.HBaseConfiguration
	import org.apache.hadoop.hbase.io.ImmutableBytesWritable

	import org.apache.hadoop.conf.Configuration
	import org.apache.hadoop.mapreduce.Job
	import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat
	import org.apache.hadoop.io.{Text,NullWritable,LongWritable}

	class MyMapper extends TableMapper[Text, Text] {
	def map(row: ImmutableBytesWritable, value: Result, context: Context) {
	context.write(new Text("WORLD"), new Text("HELLO"))
	// do nothing
	}
	}

	object HelloWorld {
	def printTables(conf: Configuration) {
	val admin = new HBaseAdmin(HBaseConfiguration.create())
	val tables = admin.listTables()
	tables.foreach(println)
	}

	def main(args: Array[String]) {
	val conf = HBaseConfiguration.create()
	val job = Job.getInstance(conf, "testJob")
	job.setJarByClass(classOf[MyMapper])

	val scan = new Scan()
	scan.setCacheBlocks(false)

	TableMapReduceUtil.initTableMapperJob(
	"item_reco_test".getBytes(),
	scan,
	classOf[MyMapper],
	classOf[ImmutableBytesWritable], classOf[Result],
	job)
	job.setOutputFormatClass(classOf[NullOutputFormat[LongWritable, LongWritable]])
	val b = job.waitForCompletion(true)
	println(b)
	println("HelloWorld")
	}
	}
	#!/bin/bash

	# This runner was origin by https://github.com/derrickcheng/ScalaOnHadoop
	# Authored by derrick

	# export HADOOP_LIBS="/home/aa/projects/hadoop/hadoop/hadoop-core-1.0.3.jar:/home/aa/projects/hadoop/hadoop/lib/commons-logging-1.1.1.jar" #MODIFY THIS WITH HADOOP LIBS
	export SCALA_LIBS="/usr/share/java/scala/jline.jar:/usr/share/java/scala-library-2.9.2.jar:/usr/share/java/scalap-2.9.2.jar"
	export HBASE_LIBS=`hbase classpath`
	# export SCALA_LIBS="${SCALA_ROOT}/jline.jar:${SCALA_ROOT}/scala-compiler.jar:${SCALA_ROOT}/scala-library.jar:${SCALA_ROOT}/scalap.jar:${SCALA_ROOT}/scalacheck.jar:${SCALA_ROOT}/scala-dbc.jar:${SCALA_ROOT}/scala-partest.jar:${SCALA_ROOT}/scala-swing.jar"

	# export ALL_LIBS="${SCALA_LIBS}:${MATIO_LIBS}:${HADOOP_LIBS}:${BIDMAT_LIBS}:${JCUDA_LIBS}:${JAVA_HOME}/lib/tools.jar"

	#export HADOOP_USER_CLASSPATH_FIRST="true"
	export HADOOP_CLASSPATH="${SCALA_LIBS}:${HBASE_LIBS}"
	#export HADOOP_CLASSPATH=lib/scala-library.jar:lib/BIDMat.jar:lib/jline-2.9.2.jar
	export LIB_JARS=`echo ${HADOOP_CLASSPATH} \| sed s/:/,/g`

	# if [ `uname` = "Darwin" ]; then
	# hadoop fs -put ${BIDMAT_ROOT}/lib/osx64/ osx64
	# export FILES="osx64/HDF5_Copyright.html,osx64/JCUDA5.0,osx64/JCUDA_Copyright.txt,osx64/libbidmatmkl.jnilib,osx64/libhdf4.settings,osx64/libhdf5.settings,osx64/libiomp5.dylib,osx64/libjhdf.jnilib,osx64/libjhdf5.jnilib"
	# else
	# hadoop fs -put ${BIDMAT_ROOT}/lib/linux64 linux64
	# export FILES="linux64/HDF5_Copyright.html,linux64/JCUDA4.2,linux64/JCUDA5.0,linux64/JCUDA_Copyright.txt,linux64/libbidmatmkl.so,linux64/libhdf4.settings,linux64/libhdf5.settings,linux64/libiomp5.so,linux64/libjhdf5.so,linux64/libjhdf.so"
	# fi

	# hadoop jar runJar.jar -libjars ${LIB_JARS} -files ${FILES} "BIDMatExample" input output