#### OS centos7
#### REALM EXAMPLE.COM (update accordingly)
#### AS and KDC are running on hostname rks253secure.hdp.local (update accordingly)
yum install -y krb5-server krb5-workstation pam_krb5
cd /var/kerberos/krb5kdc
nimbus.autocredential.plugins.classes ["org.apache.storm.hdfs.common.security.AutoHDFS"]
nimbus.credential.renewers.classes ["org.apache.storm.hdfs.common.security.AutoHDFS"]
hdfs.keytab.file /etc/security/keytabs/hdfs.headless.keytab
hdfs.kerberos.principal hdfs-s253_kerb@LAB.HORTONWORKS.NET
nimbus.credential.renewers.freq.secs 518400
nimbus.childopts -Xmx1024m _JAAS_PLACEHOLDER -javaagent:/usr/hdp/current/storm-nimbus/contrib/storm-jmxetric/lib/jmxetric-1.0.4.jar=host=localhost,port=8649,wireformat31x=true,mode=multicast,config=/usr/hdp/current/storm-nimbus/contrib/storm-jmxetric/conf/jmxetric-conf.xml:/etc/hadoop/conf/hdfs-site.xml:/etc/hadoop/conf/core-site.xml:/etc/hbase/conf/hbase-site.xml,process=Nimbus_JVM
hadoop fs -ls /tmp/sparkOozieShellAction/
Found 4 items
-rw-r--r-- 3 oozie hdfs 178 2017-05-08 07:00 /tmp/sparkOozieShellAction/job.properties
drwxr-xr-x - oozie hdfs 0 2017-05-08 07:01 /tmp/sparkOozieShellAction/lib
-rw-r--r-- 3 oozie hdfs 279 2017-05-08 07:12 /tmp/sparkOozieShellAction/spark-pi-job.sh
-rw-r--r-- 3 oozie hdfs 712 2017-05-08 07:34 /tmp/sparkOozieShellAction/workflow.xml
[oozie@rk253 ~]$ hadoop fs -lsr /tmp/sparkOozieAction
lsr: DEPRECATED: Please use 'ls -R' instead.
-rwxrwxrwx 3 oozie hdfs 167 2017-05-08 05:01 /tmp/sparkOozieAction/job.properties
drwxrwxrwx - oozie hdfs 0 2017-05-08 05:04 /tmp/sparkOozieAction/lib
-rwxrwxrwx 3 oozie hdfs 110488188 2017-05-08 04:58 /tmp/sparkOozieAction/lib/spark-examples-1.6.2.2.5.3.0-37-hadoop2.7.3.2.5.3.0-37.jar
-rw-r--r-- 3 oozie hdfs 1571 2017-05-08 05:46 /tmp/sparkOozieAction/workflow.xml
split generation in tez
2017-02-16 15:56:48,725 [INFO] [InputInitializer {Map 1} #0] |dag.RootInputInitializerManager|: Starting InputInitializer for Input: sample_07 on vertex vertex_1486830296338_0025_1_00 [Map 1]
invoke org.apache.hadoop.hive.ql.exec.tez.HiveSplitGenerator#initialize
2017-02-16 15:56:48,729 [INFO] [InputInitializer {Map 1} #0] |tez.HiveSplitGenerator|: InputInitializer {Map 1} #0 | initialize realInputFormatName : org.apache.hadoop.hive.ql.io.HiveInputFormat
2017-02-16 15:56:48,738 [INFO] [InputInitializer {Map 1} #0] |tez.HiveSplitGenerator|: InputInitializer {Map 1} #0 | initialize inputFormat org.apache.hadoop.hive.ql.io.HiveInputFormat@293c29b7
yum install -y python-pip | |
pip install kafka-python | |
//kafka producer sample code | |
vim kafka_producer.py | |
from kafka import KafkaProducer | |
from kafka.errors import KafkaError | |
producer = KafkaProducer(bootstrap_servers=['rkk1.hdp.local:6667']) | |
topic = "kafkatopic" |
mkdir kafkaproducerscala | |
cd kafkaproducerscala/ | |
mkdir -p src/main/scala | |
cd src/main/scala | |
vim KafkaProducerScala.scala | |
object KafkaProducerScala extends App { | |
import java.util.Properties | |
// Hive Tables | |
hive> select * from customer; | |
OK | |
1 Ramesh 32 Ahmedabad 000 | |
2 Khilan 25 Delhi 1500 | |
3 kaushik 23 Kota 2000 | |
4 Chaitali 25 Mumbai 6500 | |
5 Hardik 27 Bhopal 8500 | |
6 Komal 22 MP 4500 | |
Time taken: 0.568 seconds, Fetched: 6 row(s) |
import com.google.common.io.Resources; | |
import org.apache.kafka.clients.producer.KafkaProducer; | |
import org.apache.kafka.clients.producer.ProducerRecord; | |
import org.apache.kafka.clients.producer.RecordMetadata; | |
import org.apache.log4j.Logger; | |
import java.io.IOException; | |
import java.io.InputStream; | |
import java.util.Properties; |
End-to-end Latency | |
0.0543 ms ms (median) | |
0.003125 ms (99th percentile) | |
5 ms (99.9th percentile) | |
Producer and consumer | |
Producer - 1431170.2 records/sec (136.49 MB/sec) | |
Consumer - 3276754.7021 records/sec (312.4957 MB/sec) |