Last active
April 30, 2023 06:15
-
-
Save blmarket/6248323 to your computer and use it in GitHub Desktop.
Scala+HBase bootstrap project includes sbt and HelloWorld file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name := "HelloWorld" | |
version := "0.0.1" | |
scalaVersion := "2.9.2" | |
scalacOptions ++= Seq("-Ydependent-method-types", "-deprecation") | |
libraryDependencies ++= Seq( | |
"org.apache.hbase" % "hbase" % "0.94.6-cdh4.3.0", | |
"org.apache.hadoop" % "hadoop-common" % "2.0.0-cdh4.3.0", | |
"org.apache.hadoop" % "hadoop-client" % "2.0.0-cdh4.3.0" | |
) | |
resolvers ++= Seq( | |
"Hadoop Releases" at "https://repository.cloudera.com/content/repositories/releases/", | |
"Cloudera" at "https://repository.cloudera.com/artifactory/public/" | |
) | |
unmanagedJars in Compile <++= baseDirectory map { base => | |
val baseJars = (base ** "*.jar") | |
(baseJars +++ file("/etc/hbase/conf") +++ file("/etc/hadoop/conf")).classpath | |
} | |
// libraryDependencies += "com.nicta" %% "scoobi" % "0.7.0-cdh4-SNAPSHOT" | |
// resolvers ++= Seq("nicta's avro" at "http://nicta.github.com/scoobi/releases", | |
// "sonatype snapshots" at "http://oss.sonatype.org/content/repositories/snapshots", | |
// "cloudera" at "https://repository.cloudera.com/content/repositories/releases") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// run this with sbt run | |
import org.apache.hadoop.hbase._ | |
import org.apache.hadoop.hbase.client._ | |
import org.apache.hadoop.hbase.util._ | |
import org.apache.hadoop.hbase.client.{HBaseAdmin,Scan,Result} | |
import org.apache.hadoop.hbase.mapreduce.{TableMapper,TableMapReduceUtil} | |
import org.apache.hadoop.hbase.HBaseConfiguration | |
import org.apache.hadoop.hbase.io.ImmutableBytesWritable | |
import org.apache.hadoop.conf.Configuration | |
import org.apache.hadoop.mapreduce.Job | |
import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat | |
import org.apache.hadoop.io.{Text,NullWritable} | |
object HBaseNoob { | |
def main(args: Array[String]) { | |
val conf = HBaseConfiguration.create() | |
val admin = new HBaseAdmin(conf) | |
// admin.listTables().foreach(println) // listing table... works | |
val table = new HTable(conf, "TestTable") | |
val put = new Put("test-key".getBytes()) | |
put.add("info".getBytes(), "q".getBytes(), "value".getBytes()) | |
table.put(put) | |
println("HelloWorld") | |
} | |
} | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// $ . runner.sh beforehand, run by hadoop jar {jarfile} HelloWorld | |
import org.apache.hadoop.hbase.client.{HBaseAdmin,Scan,Result} | |
import org.apache.hadoop.hbase.mapreduce.{TableMapper,TableMapReduceUtil} | |
import org.apache.hadoop.hbase.HBaseConfiguration | |
import org.apache.hadoop.hbase.io.ImmutableBytesWritable | |
import org.apache.hadoop.conf.Configuration | |
import org.apache.hadoop.mapreduce.Job | |
import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat | |
import org.apache.hadoop.io.{Text,NullWritable,LongWritable} | |
class MyMapper extends TableMapper[Text, Text] { | |
def map(row: ImmutableBytesWritable, value: Result, context: Context) { | |
context.write(new Text("WORLD"), new Text("HELLO")) | |
// do nothing | |
} | |
} | |
object HelloWorld { | |
def printTables(conf: Configuration) { | |
val admin = new HBaseAdmin(HBaseConfiguration.create()) | |
val tables = admin.listTables() | |
tables.foreach(println) | |
} | |
def main(args: Array[String]) { | |
val conf = HBaseConfiguration.create() | |
val job = Job.getInstance(conf, "testJob") | |
job.setJarByClass(classOf[MyMapper]) | |
val scan = new Scan() | |
scan.setCacheBlocks(false) | |
TableMapReduceUtil.initTableMapperJob( | |
"item_reco_test".getBytes(), | |
scan, | |
classOf[MyMapper], | |
classOf[ImmutableBytesWritable], classOf[Result], | |
job) | |
job.setOutputFormatClass(classOf[NullOutputFormat[LongWritable, LongWritable]]) | |
val b = job.waitForCompletion(true) | |
println(b) | |
println("HelloWorld") | |
} | |
} | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// $ . runner.sh beforehand, run by hadoop jar {jarfile} HelloWorld | |
import org.apache.hadoop.hbase.client.{HBaseAdmin,Scan,Result} | |
import org.apache.hadoop.hbase.mapreduce.{TableMapper,TableMapReduceUtil} | |
import org.apache.hadoop.hbase.HBaseConfiguration | |
import org.apache.hadoop.hbase.io.ImmutableBytesWritable | |
import org.apache.hadoop.conf.Configuration | |
import org.apache.hadoop.mapreduce._ | |
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat | |
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat | |
import org.apache.hadoop.io.{Text,NullWritable,LongWritable,IntWritable} | |
import org.apache.hadoop.util._ | |
import org.apache.hadoop.fs.Path | |
class NoobMapper extends Mapper[Object, Text, Text, IntWritable] { | |
def map(key: Object, value: Text, context: Context) { | |
context.write(value, new IntWritable(1)) | |
} | |
} | |
object NoobDriver { | |
def main(args: Array[String]) { | |
val conf = new Configuration() | |
val otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs() | |
val job = new Job(conf, "word count") | |
job.setJarByClass(classOf[NoobMapper]) | |
job.setMapperClass(classOf[NoobMapper]) | |
job.setOutputKeyClass(classOf[Text]) | |
job.setOutputValueClass(classOf[IntWritable]) | |
FileInputFormat.addInputPath(job, new Path(otherArgs.head)) | |
FileOutputFormat.setOutputPath(job, new Path(otherArgs.last)) | |
job.waitForCompletion(true) | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# This runner was origin by https://github.com/derrickcheng/ScalaOnHadoop | |
# Authored by derrick | |
# export HADOOP_LIBS="/home/aa/projects/hadoop/hadoop/hadoop-core-1.0.3.jar:/home/aa/projects/hadoop/hadoop/lib/commons-logging-1.1.1.jar" #MODIFY THIS WITH HADOOP LIBS | |
export SCALA_LIBS="/usr/share/java/scala/jline.jar:/usr/share/java/scala-library-2.9.2.jar:/usr/share/java/scalap-2.9.2.jar" | |
export HBASE_LIBS=`hbase classpath` | |
# export SCALA_LIBS="${SCALA_ROOT}/jline.jar:${SCALA_ROOT}/scala-compiler.jar:${SCALA_ROOT}/scala-library.jar:${SCALA_ROOT}/scalap.jar:${SCALA_ROOT}/scalacheck.jar:${SCALA_ROOT}/scala-dbc.jar:${SCALA_ROOT}/scala-partest.jar:${SCALA_ROOT}/scala-swing.jar" | |
# export ALL_LIBS="${SCALA_LIBS}:${MATIO_LIBS}:${HADOOP_LIBS}:${BIDMAT_LIBS}:${JCUDA_LIBS}:${JAVA_HOME}/lib/tools.jar" | |
#export HADOOP_USER_CLASSPATH_FIRST="true" | |
export HADOOP_CLASSPATH="${SCALA_LIBS}:${HBASE_LIBS}" | |
#export HADOOP_CLASSPATH=lib/scala-library.jar:lib/BIDMat.jar:lib/jline-2.9.2.jar | |
export LIB_JARS=`echo ${HADOOP_CLASSPATH} | sed s/:/,/g` | |
# if [ `uname` = "Darwin" ]; then | |
# hadoop fs -put ${BIDMAT_ROOT}/lib/osx64/ osx64 | |
# export FILES="osx64/HDF5_Copyright.html,osx64/JCUDA5.0,osx64/JCUDA_Copyright.txt,osx64/libbidmatmkl.jnilib,osx64/libhdf4.settings,osx64/libhdf5.settings,osx64/libiomp5.dylib,osx64/libjhdf.jnilib,osx64/libjhdf5.jnilib" | |
# else | |
# hadoop fs -put ${BIDMAT_ROOT}/lib/linux64 linux64 | |
# export FILES="linux64/HDF5_Copyright.html,linux64/JCUDA4.2,linux64/JCUDA5.0,linux64/JCUDA_Copyright.txt,linux64/libbidmatmkl.so,linux64/libhdf4.settings,linux64/libhdf5.settings,linux64/libiomp5.so,linux64/libjhdf5.so,linux64/libjhdf.so" | |
# fi | |
# hadoop jar runJar.jar -libjars ${LIB_JARS} -files ${FILES} "BIDMatExample" input output | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi,
I followed almost the same as HelloWorld example.
I added a 2 ways of output:
class MyMapper extends TableMapper[Text, Text] {
def map(row: ImmutableBytesWritable, value: Result, context: Context) {
context.write(new Text("WORLD"), new Text("HELLO"))
println("i am here")
System.out.println("i am here with java");
// do nothing
}
}
I got the job finished but didn't find any output from MyMapper.
Could you please show me how can I make sure that MyMapper class was called from HelloWorld?
Hope to hear from you soon.
Thanks & Regards,
Micahel
Log Type: stderr
Log Length: 1070
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/usr/lib/zookeeper/lib/slf4j-log4j12-1.6.1.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/var/lib/hadoop-yarn/cache/yarn/nm-local-dir/usercache/root/appcache/application_1378797941378_0006/filecache/-5421579767982981155/job.jar/job.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/var/lib/hadoop-yarn/cache/yarn/nm-local-dir/usercache/root/filecache/4183485615875643550/hadoop-3828057274528849787.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/var/lib/hadoop-yarn/cache/yarn/nm-local-dir/usercache/root/filecache/-694838895948176220/hadoop-3992786908817592457.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/var/lib/hadoop-yarn/cache/yarn/nm-local-dir/usercache/root/filecache/8668426439724515383/hadoop-3900082634689490049.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
Log Type: stdout
Log Length: 514
java.lang.InterruptedException
at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.reportInterruptAfterWait(AbstractQueuedSynchronizer.java:1961)
at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.awaitNanos(AbstractQueuedSynchronizer.java:2038)
at java.util.concurrent.LinkedBlockingQueue.poll(LinkedBlockingQueue.java:424)
at org.apache.hadoop.mapreduce.v2.app.speculate.DefaultSpeculator$1.run(DefaultSpeculator.java:189)
at java.lang.Thread.run(Thread.java:662)
Log Type: syslog
Log Length: 39444
Showing 4096 bytes of 39444 total. Click here for the full log.
Thread-62] org.apache.hadoop.yarn.service.AbstractService: Service:org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncherImpl is stopped.
2013-09-10 15:10:34,254 INFO [Thread-62] org.apache.hadoop.yarn.service.AbstractService: Service:org.apache.hadoop.mapreduce.v2.app.MRAppMaster$ContainerLauncherRouter is stopped.
2013-09-10 15:10:34,256 INFO [Thread-62] org.apache.hadoop.mapreduce.v2.app.rm.RMContainerAllocator: Setting job diagnostics to
2013-09-10 15:10:34,256 INFO [Thread-62] org.apache.hadoop.mapreduce.v2.app.rm.RMContainerAllocator: History url is localhost:19888/jobhistory/job/job_1378797941378_0006
2013-09-10 15:10:34,261 INFO [Thread-62] org.apache.hadoop.yarn.service.AbstractService: Service:RMCommunicator is stopped.
2013-09-10 15:10:34,261 INFO [Thread-62] org.apache.hadoop.mapreduce.v2.app.rm.RMContainerAllocator: Final Stats: PendingReds:0 ScheduledMaps:0 ScheduledReds:0 AssignedMaps:0 AssignedReds:0 CompletedMaps:1 CompletedReds:1 ContAlloc:2 ContRel:0 HostLocal:0 RackLocal:1
2013-09-10 15:10:34,261 INFO [Thread-62] org.apache.hadoop.yarn.service.AbstractService: Service:org.apache.hadoop.mapreduce.v2.app.MRAppMaster$ContainerAllocatorRouter is stopped.
2013-09-10 15:10:34,261 INFO [Thread-62] org.apache.hadoop.yarn.service.AbstractService: Service:org.apache.hadoop.mapreduce.v2.app.speculate.DefaultSpeculator is stopped.
2013-09-10 15:10:34,261 INFO [Thread-62] org.apache.hadoop.ipc.Server: Stopping server on 41883
2013-09-10 15:10:34,261 ERROR [DefaultSpeculator background processing] org.apache.hadoop.mapreduce.v2.app.speculate.DefaultSpeculator: Background thread returning, interrupted : java.lang.InterruptedException
2013-09-10 15:10:34,261 INFO [IPC Server listener on 41883] org.apache.hadoop.ipc.Server: Stopping IPC Server listener on 41883
2013-09-10 15:10:34,262 INFO [IPC Server Responder] org.apache.hadoop.ipc.Server: Stopping IPC Server Responder
2013-09-10 15:10:34,263 INFO [Thread-62] org.mortbay.log: Stopped SelectChannelConnector@0.0.0.0:0
2013-09-10 15:10:34,363 INFO [Thread-62] org.apache.hadoop.yarn.service.AbstractService: Service:MRClientService is stopped.
2013-09-10 15:10:34,364 INFO [Thread-62] org.apache.hadoop.yarn.service.AbstractService: Service:CommitterEventHandler is stopped.
2013-09-10 15:10:34,364 INFO [Thread-62] org.apache.hadoop.ipc.Server: Stopping server on 49660
2013-09-10 15:10:34,365 INFO [IPC Server listener on 49660] org.apache.hadoop.ipc.Server: Stopping IPC Server listener on 49660
2013-09-10 15:10:34,365 INFO [Thread-62] org.apache.hadoop.yarn.service.AbstractService: Service:TaskHeartbeatHandler is stopped.
2013-09-10 15:10:34,365 INFO [IPC Server Responder] org.apache.hadoop.ipc.Server: Stopping IPC Server Responder
2013-09-10 15:10:34,365 INFO [TaskHeartbeatHandler PingChecker] org.apache.hadoop.mapreduce.v2.app.TaskHeartbeatHandler: TaskHeartbeatHandler thread interrupted
2013-09-10 15:10:34,365 INFO [Thread-62] org.apache.hadoop.yarn.service.AbstractService: Service:org.apache.hadoop.mapred.TaskAttemptListenerImpl is stopped.
2013-09-10 15:10:34,365 INFO [Thread-62] org.apache.hadoop.yarn.service.AbstractService: Service:Dispatcher is stopped.
2013-09-10 15:10:34,365 INFO [Thread-62] org.apache.hadoop.yarn.service.AbstractService: Service:org.apache.hadoop.mapreduce.v2.app.MRAppMaster is stopped.
2013-09-10 15:10:34,365 INFO [Thread-62] org.apache.hadoop.mapreduce.v2.app.MRAppMaster: Exiting MR AppMaster..GoodBye!
2013-09-10 15:10:34,368 INFO [Thread-1] org.apache.hadoop.mapreduce.v2.app.MRAppMaster: MRAppMaster received a signal. Signaling RMCommunicator and JobHistoryEventHandler.
2013-09-10 15:10:34,368 INFO [Thread-1] org.apache.hadoop.mapreduce.v2.app.rm.RMContainerAllocator: RMCommunicator notified that iSignalled is: true
2013-09-10 15:10:34,368 INFO [Thread-1] org.apache.hadoop.mapreduce.v2.app.rm.RMContainerAllocator: RMCommunicator notified that shouldUnregistered is: true
2013-09-10 15:10:34,368 INFO [Thread-1] org.apache.hadoop.mapreduce.jobhistory.JobHistoryEventHandler: JobHistoryEventHandler notified that forceJobCompletion is true