Skip to content

Instantly share code, notes, and snippets.

@blmarket
Last active April 30, 2023 06:15
Show Gist options
  • Star 5 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save blmarket/6248323 to your computer and use it in GitHub Desktop.
Save blmarket/6248323 to your computer and use it in GitHub Desktop.
Scala+HBase bootstrap project includes sbt and HelloWorld file.
name := "HelloWorld"
version := "0.0.1"
scalaVersion := "2.9.2"
scalacOptions ++= Seq("-Ydependent-method-types", "-deprecation")
libraryDependencies ++= Seq(
"org.apache.hbase" % "hbase" % "0.94.6-cdh4.3.0",
"org.apache.hadoop" % "hadoop-common" % "2.0.0-cdh4.3.0",
"org.apache.hadoop" % "hadoop-client" % "2.0.0-cdh4.3.0"
)
resolvers ++= Seq(
"Hadoop Releases" at "https://repository.cloudera.com/content/repositories/releases/",
"Cloudera" at "https://repository.cloudera.com/artifactory/public/"
)
unmanagedJars in Compile <++= baseDirectory map { base =>
val baseJars = (base ** "*.jar")
(baseJars +++ file("/etc/hbase/conf") +++ file("/etc/hadoop/conf")).classpath
}
// libraryDependencies += "com.nicta" %% "scoobi" % "0.7.0-cdh4-SNAPSHOT"
// resolvers ++= Seq("nicta's avro" at "http://nicta.github.com/scoobi/releases",
// "sonatype snapshots" at "http://oss.sonatype.org/content/repositories/snapshots",
// "cloudera" at "https://repository.cloudera.com/content/repositories/releases")
// run this with sbt run
import org.apache.hadoop.hbase._
import org.apache.hadoop.hbase.client._
import org.apache.hadoop.hbase.util._
import org.apache.hadoop.hbase.client.{HBaseAdmin,Scan,Result}
import org.apache.hadoop.hbase.mapreduce.{TableMapper,TableMapReduceUtil}
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.mapreduce.Job
import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat
import org.apache.hadoop.io.{Text,NullWritable}
object HBaseNoob {
def main(args: Array[String]) {
val conf = HBaseConfiguration.create()
val admin = new HBaseAdmin(conf)
// admin.listTables().foreach(println) // listing table... works
val table = new HTable(conf, "TestTable")
val put = new Put("test-key".getBytes())
put.add("info".getBytes(), "q".getBytes(), "value".getBytes())
table.put(put)
println("HelloWorld")
}
}
// $ . runner.sh beforehand, run by hadoop jar {jarfile} HelloWorld
import org.apache.hadoop.hbase.client.{HBaseAdmin,Scan,Result}
import org.apache.hadoop.hbase.mapreduce.{TableMapper,TableMapReduceUtil}
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.mapreduce.Job
import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat
import org.apache.hadoop.io.{Text,NullWritable,LongWritable}
class MyMapper extends TableMapper[Text, Text] {
def map(row: ImmutableBytesWritable, value: Result, context: Context) {
context.write(new Text("WORLD"), new Text("HELLO"))
// do nothing
}
}
object HelloWorld {
def printTables(conf: Configuration) {
val admin = new HBaseAdmin(HBaseConfiguration.create())
val tables = admin.listTables()
tables.foreach(println)
}
def main(args: Array[String]) {
val conf = HBaseConfiguration.create()
val job = Job.getInstance(conf, "testJob")
job.setJarByClass(classOf[MyMapper])
val scan = new Scan()
scan.setCacheBlocks(false)
TableMapReduceUtil.initTableMapperJob(
"item_reco_test".getBytes(),
scan,
classOf[MyMapper],
classOf[ImmutableBytesWritable], classOf[Result],
job)
job.setOutputFormatClass(classOf[NullOutputFormat[LongWritable, LongWritable]])
val b = job.waitForCompletion(true)
println(b)
println("HelloWorld")
}
}
// $ . runner.sh beforehand, run by hadoop jar {jarfile} HelloWorld
import org.apache.hadoop.hbase.client.{HBaseAdmin,Scan,Result}
import org.apache.hadoop.hbase.mapreduce.{TableMapper,TableMapReduceUtil}
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.mapreduce._
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat
import org.apache.hadoop.io.{Text,NullWritable,LongWritable,IntWritable}
import org.apache.hadoop.util._
import org.apache.hadoop.fs.Path
class NoobMapper extends Mapper[Object, Text, Text, IntWritable] {
def map(key: Object, value: Text, context: Context) {
context.write(value, new IntWritable(1))
}
}
object NoobDriver {
def main(args: Array[String]) {
val conf = new Configuration()
val otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs()
val job = new Job(conf, "word count")
job.setJarByClass(classOf[NoobMapper])
job.setMapperClass(classOf[NoobMapper])
job.setOutputKeyClass(classOf[Text])
job.setOutputValueClass(classOf[IntWritable])
FileInputFormat.addInputPath(job, new Path(otherArgs.head))
FileOutputFormat.setOutputPath(job, new Path(otherArgs.last))
job.waitForCompletion(true)
}
}
#!/bin/bash
# This runner was origin by https://github.com/derrickcheng/ScalaOnHadoop
# Authored by derrick
# export HADOOP_LIBS="/home/aa/projects/hadoop/hadoop/hadoop-core-1.0.3.jar:/home/aa/projects/hadoop/hadoop/lib/commons-logging-1.1.1.jar" #MODIFY THIS WITH HADOOP LIBS
export SCALA_LIBS="/usr/share/java/scala/jline.jar:/usr/share/java/scala-library-2.9.2.jar:/usr/share/java/scalap-2.9.2.jar"
export HBASE_LIBS=`hbase classpath`
# export SCALA_LIBS="${SCALA_ROOT}/jline.jar:${SCALA_ROOT}/scala-compiler.jar:${SCALA_ROOT}/scala-library.jar:${SCALA_ROOT}/scalap.jar:${SCALA_ROOT}/scalacheck.jar:${SCALA_ROOT}/scala-dbc.jar:${SCALA_ROOT}/scala-partest.jar:${SCALA_ROOT}/scala-swing.jar"
# export ALL_LIBS="${SCALA_LIBS}:${MATIO_LIBS}:${HADOOP_LIBS}:${BIDMAT_LIBS}:${JCUDA_LIBS}:${JAVA_HOME}/lib/tools.jar"
#export HADOOP_USER_CLASSPATH_FIRST="true"
export HADOOP_CLASSPATH="${SCALA_LIBS}:${HBASE_LIBS}"
#export HADOOP_CLASSPATH=lib/scala-library.jar:lib/BIDMat.jar:lib/jline-2.9.2.jar
export LIB_JARS=`echo ${HADOOP_CLASSPATH} | sed s/:/,/g`
# if [ `uname` = "Darwin" ]; then
# hadoop fs -put ${BIDMAT_ROOT}/lib/osx64/ osx64
# export FILES="osx64/HDF5_Copyright.html,osx64/JCUDA5.0,osx64/JCUDA_Copyright.txt,osx64/libbidmatmkl.jnilib,osx64/libhdf4.settings,osx64/libhdf5.settings,osx64/libiomp5.dylib,osx64/libjhdf.jnilib,osx64/libjhdf5.jnilib"
# else
# hadoop fs -put ${BIDMAT_ROOT}/lib/linux64 linux64
# export FILES="linux64/HDF5_Copyright.html,linux64/JCUDA4.2,linux64/JCUDA5.0,linux64/JCUDA_Copyright.txt,linux64/libbidmatmkl.so,linux64/libhdf4.settings,linux64/libhdf5.settings,linux64/libiomp5.so,linux64/libjhdf5.so,linux64/libjhdf.so"
# fi
# hadoop jar runJar.jar -libjars ${LIB_JARS} -files ${FILES} "BIDMatExample" input output
@peacebytes
Copy link

Hi,

I followed almost the same as HelloWorld example.

I added a 2 ways of output:
class MyMapper extends TableMapper[Text, Text] {
def map(row: ImmutableBytesWritable, value: Result, context: Context) {
context.write(new Text("WORLD"), new Text("HELLO"))
println("i am here")
System.out.println("i am here with java");
// do nothing
}
}

I got the job finished but didn't find any output from MyMapper.

Could you please show me how can I make sure that MyMapper class was called from HelloWorld?

Hope to hear from you soon.
Thanks & Regards,
Micahel


Log Type: stderr

Log Length: 1070

SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/usr/lib/zookeeper/lib/slf4j-log4j12-1.6.1.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/var/lib/hadoop-yarn/cache/yarn/nm-local-dir/usercache/root/appcache/application_1378797941378_0006/filecache/-5421579767982981155/job.jar/job.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/var/lib/hadoop-yarn/cache/yarn/nm-local-dir/usercache/root/filecache/4183485615875643550/hadoop-3828057274528849787.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/var/lib/hadoop-yarn/cache/yarn/nm-local-dir/usercache/root/filecache/-694838895948176220/hadoop-3992786908817592457.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/var/lib/hadoop-yarn/cache/yarn/nm-local-dir/usercache/root/filecache/8668426439724515383/hadoop-3900082634689490049.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.

Log Type: stdout

Log Length: 514

java.lang.InterruptedException
at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.reportInterruptAfterWait(AbstractQueuedSynchronizer.java:1961)
at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.awaitNanos(AbstractQueuedSynchronizer.java:2038)
at java.util.concurrent.LinkedBlockingQueue.poll(LinkedBlockingQueue.java:424)
at org.apache.hadoop.mapreduce.v2.app.speculate.DefaultSpeculator$1.run(DefaultSpeculator.java:189)
at java.lang.Thread.run(Thread.java:662)

Log Type: syslog

Log Length: 39444

Showing 4096 bytes of 39444 total. Click here for the full log.

Thread-62] org.apache.hadoop.yarn.service.AbstractService: Service:org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncherImpl is stopped.
2013-09-10 15:10:34,254 INFO [Thread-62] org.apache.hadoop.yarn.service.AbstractService: Service:org.apache.hadoop.mapreduce.v2.app.MRAppMaster$ContainerLauncherRouter is stopped.
2013-09-10 15:10:34,256 INFO [Thread-62] org.apache.hadoop.mapreduce.v2.app.rm.RMContainerAllocator: Setting job diagnostics to
2013-09-10 15:10:34,256 INFO [Thread-62] org.apache.hadoop.mapreduce.v2.app.rm.RMContainerAllocator: History url is localhost:19888/jobhistory/job/job_1378797941378_0006
2013-09-10 15:10:34,261 INFO [Thread-62] org.apache.hadoop.yarn.service.AbstractService: Service:RMCommunicator is stopped.
2013-09-10 15:10:34,261 INFO [Thread-62] org.apache.hadoop.mapreduce.v2.app.rm.RMContainerAllocator: Final Stats: PendingReds:0 ScheduledMaps:0 ScheduledReds:0 AssignedMaps:0 AssignedReds:0 CompletedMaps:1 CompletedReds:1 ContAlloc:2 ContRel:0 HostLocal:0 RackLocal:1
2013-09-10 15:10:34,261 INFO [Thread-62] org.apache.hadoop.yarn.service.AbstractService: Service:org.apache.hadoop.mapreduce.v2.app.MRAppMaster$ContainerAllocatorRouter is stopped.
2013-09-10 15:10:34,261 INFO [Thread-62] org.apache.hadoop.yarn.service.AbstractService: Service:org.apache.hadoop.mapreduce.v2.app.speculate.DefaultSpeculator is stopped.
2013-09-10 15:10:34,261 INFO [Thread-62] org.apache.hadoop.ipc.Server: Stopping server on 41883
2013-09-10 15:10:34,261 ERROR [DefaultSpeculator background processing] org.apache.hadoop.mapreduce.v2.app.speculate.DefaultSpeculator: Background thread returning, interrupted : java.lang.InterruptedException
2013-09-10 15:10:34,261 INFO [IPC Server listener on 41883] org.apache.hadoop.ipc.Server: Stopping IPC Server listener on 41883
2013-09-10 15:10:34,262 INFO [IPC Server Responder] org.apache.hadoop.ipc.Server: Stopping IPC Server Responder
2013-09-10 15:10:34,263 INFO [Thread-62] org.mortbay.log: Stopped SelectChannelConnector@0.0.0.0:0
2013-09-10 15:10:34,363 INFO [Thread-62] org.apache.hadoop.yarn.service.AbstractService: Service:MRClientService is stopped.
2013-09-10 15:10:34,364 INFO [Thread-62] org.apache.hadoop.yarn.service.AbstractService: Service:CommitterEventHandler is stopped.
2013-09-10 15:10:34,364 INFO [Thread-62] org.apache.hadoop.ipc.Server: Stopping server on 49660
2013-09-10 15:10:34,365 INFO [IPC Server listener on 49660] org.apache.hadoop.ipc.Server: Stopping IPC Server listener on 49660
2013-09-10 15:10:34,365 INFO [Thread-62] org.apache.hadoop.yarn.service.AbstractService: Service:TaskHeartbeatHandler is stopped.
2013-09-10 15:10:34,365 INFO [IPC Server Responder] org.apache.hadoop.ipc.Server: Stopping IPC Server Responder
2013-09-10 15:10:34,365 INFO [TaskHeartbeatHandler PingChecker] org.apache.hadoop.mapreduce.v2.app.TaskHeartbeatHandler: TaskHeartbeatHandler thread interrupted
2013-09-10 15:10:34,365 INFO [Thread-62] org.apache.hadoop.yarn.service.AbstractService: Service:org.apache.hadoop.mapred.TaskAttemptListenerImpl is stopped.
2013-09-10 15:10:34,365 INFO [Thread-62] org.apache.hadoop.yarn.service.AbstractService: Service:Dispatcher is stopped.
2013-09-10 15:10:34,365 INFO [Thread-62] org.apache.hadoop.yarn.service.AbstractService: Service:org.apache.hadoop.mapreduce.v2.app.MRAppMaster is stopped.
2013-09-10 15:10:34,365 INFO [Thread-62] org.apache.hadoop.mapreduce.v2.app.MRAppMaster: Exiting MR AppMaster..GoodBye!
2013-09-10 15:10:34,368 INFO [Thread-1] org.apache.hadoop.mapreduce.v2.app.MRAppMaster: MRAppMaster received a signal. Signaling RMCommunicator and JobHistoryEventHandler.
2013-09-10 15:10:34,368 INFO [Thread-1] org.apache.hadoop.mapreduce.v2.app.rm.RMContainerAllocator: RMCommunicator notified that iSignalled is: true
2013-09-10 15:10:34,368 INFO [Thread-1] org.apache.hadoop.mapreduce.v2.app.rm.RMContainerAllocator: RMCommunicator notified that shouldUnregistered is: true
2013-09-10 15:10:34,368 INFO [Thread-1] org.apache.hadoop.mapreduce.jobhistory.JobHistoryEventHandler: JobHistoryEventHandler notified that forceJobCompletion is true

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment