Rajkumar Singh rajkrrsingh

## SparkDFJoinUsingBroadcast
// Hive Tables
hive> select * from customer;
OK
1	Ramesh	32	Ahmedabad	000
2	Khilan	25	Delhi	1500
3	kaushik	23	Kota	2000
4	Chaitali	25	Mumbai	6500
5	Hardik	27	Bhopal	8500
6	Komal	22	MP	4500
Time taken: 0.568 seconds, Fetched: 6 row(s)

## kafkaproducerscala
mkdir kafkaproducerscala
cd kafkaproducerscala/
mkdir -p src/main/scala
cd src/main/scala
vim KafkaProducerScala.scala

object KafkaProducerScala extends App {

       import java.util.Properties


## kafka-python-producer
yum install -y python-pip
pip install kafka-python

//kafka producer sample code
vim kafka_producer.py
from kafka import KafkaProducer
from kafka.errors import KafkaError

producer = KafkaProducer(bootstrap_servers=['rkk1.hdp.local:6667'])
topic = "kafkatopic"

## Tez-Split-Calculation.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                rajkrrsingh
                / Tez-Split-Calculation.md
            
            
              Last active
              January 31, 2018 17:48
            
              
                how tez initial paralleism work (split calculation)
              
          
    split generation in tez
2017-02-16 15:56:48,725 [INFO] [InputInitializer {Map 1} #0] |dag.RootInputInitializerManager|: Starting InputInitializer for Input: sample_07 on vertex vertex_1486830296338_0025_1_00 [Map 1]

invoke
org.apache.hadoop.hive.ql.exec.tez.HiveSplitGenerator#initialize
2017-02-16 15:56:48,729 [INFO] [InputInitializer {Map 1} #0] |tez.HiveSplitGenerator|: InputInitializer {Map 1} #0 | initialize realInputFormatName : org.apache.hadoop.hive.ql.io.HiveInputFormat

2017-02-16 15:56:48,738 [INFO] [InputInitializer {Map 1} #0] |tez.HiveSplitGenerator|: InputInitializer {Map 1} #0 | initialize inputFormat org.apache.hadoop.hive.ql.io.HiveInputFormat@293c29b7


## oozie_spark_action_example.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              1 star
            
          
                rajkrrsingh
                / oozie_spark_action_example.md
            
            
              Last active
              April 24, 2019 14:39
            
              
                oozie spark action example
              
          
    directory structure at hdfs

[oozie@rk253 ~]$ hadoop fs -lsr /tmp/sparkOozieAction
lsr: DEPRECATED: Please use 'ls -R' instead.
-rwxrwxrwx   3 oozie hdfs        167 2017-05-08 05:01 /tmp/sparkOozieAction/job.properties
drwxrwxrwx   - oozie hdfs          0 2017-05-08 05:04 /tmp/sparkOozieAction/lib
-rwxrwxrwx   3 oozie hdfs  110488188 2017-05-08 04:58 /tmp/sparkOozieAction/lib/spark-examples-1.6.2.2.5.3.0-37-hadoop2.7.3.2.5.3.0-37.jar
-rw-r--r--   3 oozie hdfs       1571 2017-05-08 05:46 /tmp/sparkOozieAction/workflow.xml


## oozie_spark_shell_action.md

      
              1 file
            
          
              0 forks
            
          
              1 comment
            
          
              0 stars
            
          
                rajkrrsingh
                / oozie_spark_shell_action.md
            
            
              Created
              May 8, 2017 08:39
            
              
                oozie spark shell action
              
          
    workflow dir @hdfs

 hadoop fs -ls /tmp/sparkOozieShellAction/
Found 4 items
-rw-r--r--   3 oozie hdfs        178 2017-05-08 07:00 /tmp/sparkOozieShellAction/job.properties
drwxr-xr-x   - oozie hdfs          0 2017-05-08 07:01 /tmp/sparkOozieShellAction/lib
-rw-r--r--   3 oozie hdfs        279 2017-05-08 07:12 /tmp/sparkOozieShellAction/spark-pi-job.sh
-rw-r--r--   3 oozie hdfs        712 2017-05-08 07:34 /tmp/sparkOozieShellAction/workflow.xml


## Storm_AutoHDFS_configuration.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                rajkrrsingh
                / Storm_AutoHDFS_configuration.md
            
            
              Last active
              May 18, 2017 12:07
            
              
                steps to configure storm AutoHDFS configuration.
              
          
    Add these configuration to custom storm-site.

nimbus.autocredential.plugins.classes ["org.apache.storm.hdfs.common.security.AutoHDFS"]
nimbus.credential.renewers.classes ["org.apache.storm.hdfs.common.security.AutoHDFS"]
hdfs.keytab.file  /etc/security/keytabs/hdfs.headless.keytab
hdfs.kerberos.principal hdfs-s253_kerb@LAB.HORTONWORKS.NET
nimbus.credential.renewers.freq.secs 518400

nimbus.childopts -Xmx1024m _JAAS_PLACEHOLDER -javaagent:/usr/hdp/current/storm-nimbus/contrib/storm-jmxetric/lib/jmxetric-1.0.4.jar=host=localhost,port=8649,wireformat31x=true,mode=multicast,config=/usr/hdp/current/storm-nimbus/contrib/storm-jmxetric/conf/jmxetric-conf.xml:/etc/hadoop/conf/hdfs-site.xml:/etc/hadoop/conf/core-site.xml:/etc/hbase/conf/hbase-site.xml,process=Nimbus_JVM


## kerberos_installation_on_hdp_centos7.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                rajkrrsingh
                / kerberos_installation_on_hdp_centos7.md
            
            
              Last active
              May 21, 2017 14:25
            
              
                sample steps to setup kdc before installing kerberos through ambari on hortonworks cluster
              
          
    ENV

#### OS centos7
#### REALM EXAMPLE.COM (update accordingly)
#### AS and KDC are running on hostname rks253secure.hdp.local (update accordingly)

install required packages

yum install -y krb5-server krb5-workstation pam_krb5
cd  /var/kerberos/krb5kdc


## Spark LLAP Setup for Thrift server.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                rajkrrsingh
                / Spark LLAP Setup for Thrift server.md
            
            
              Last active
              May 27, 2017 15:51
            
              
                configuration required to setup Spark-LLAP
              
          
    ENV HDP-2.6.0.3-8

Download spark-llap assembly jar from http://repo.hortonworks.com/content/repositories/releases/com/hortonworks/spark-llap/
Add following in Custom spark-thrift-sparkconf

spark_thrift_cmd_opts --jars /usr/hdp/current/spark-client/lib/spark-llap-1.0.0.2.6.0.3-8-assembly.jar
spark.executor.extraClassPath /usr/hdp/current/spark-client/lib/spark-llap-1.0.0.2.6.0.3-8-assembly.jar
spark.hadoop.hive.llap.daemon.service.hosts @llap0
spark.jars /usr/hdp/current/spark-client/lib/spark-llap-1.0.0.2.6.0.3-8-assembly.jar


## SparkKafkaIntegration.md

      
              1 file
            
          
              1 fork
            
          
              0 comments
            
          
              0 stars
            
          
                rajkrrsingh
                / SparkKafkaIntegration.md
            
            
              Last active
              December 18, 2019 09:25
            
              
                Spark Kafka Consumer in secure( Kerberos) enviornment 
              
          
    Sample Application

using direct stream

 import kafka.serializer.StringDecoder;
 import org.apache.spark.SparkConf
 import org.apache.spark.streaming._
 import org.apache.spark.streaming.kafka._
 
 
 object SparkKafkaConsumer2 {
	// Hive Tables
	hive> select * from customer;
	OK
	1 Ramesh 32 Ahmedabad 000
	2 Khilan 25 Delhi 1500
	3 kaushik 23 Kota 2000
	4 Chaitali 25 Mumbai 6500
	5 Hardik 27 Bhopal 8500
	6 Komal 22 MP 4500
	Time taken: 0.568 seconds, Fetched: 6 row(s)
	mkdir kafkaproducerscala
	cd kafkaproducerscala/
	mkdir -p src/main/scala
	cd src/main/scala
	vim KafkaProducerScala.scala

	object KafkaProducerScala extends App {

	import java.util.Properties
	yum install -y python-pip
	pip install kafka-python

	//kafka producer sample code
	vim kafka_producer.py
	from kafka import KafkaProducer
	from kafka.errors import KafkaError

	producer = KafkaProducer(bootstrap_servers=['rkk1.hdp.local:6667'])
	topic = "kafkatopic"