#### ENV HDP-2.6.0.3-8 Download spark-llap assembly jar from http://repo.hortonworks.com/content/repositories/releases/com/hortonworks/spark-llap/ #### Add following in Custom spark-thrift-sparkconf ``` spark_thrift_cmd_opts --jars /usr/hdp/current/spark-client/lib/spark-llap-1.0.0.2.6.0.3-8-assembly.jar spark.executor.extraClassPath /usr/hdp/current/spark-client/lib/spark-llap-1.0.0.2.6.0.3-8-assembly.jar spark.hadoop.hive.llap.daemon.service.hosts @llap0 spark.jars /usr/hdp/current/spark-client/lib/spark-llap-1.0.0.2.6.0.3-8-assembly.jar spark.sql.hive.hiveserver2.url jdbc:hive2://hostname1.hwxblr.com:10500/;principal=hive/_HOST@EXAMPLE.COM;hive.server2.proxy.user=${user} spark.hadoop.hive.zookeeper.quorum hostname1.hwxblr.com:2181 ``` #### Add following to Custom-spark-defaults ``` spark.sql.hive.hiveserver2.url jdbc:hive2://hostname1.hwxblr.com:10500/;principal=hive/_HOST@EXAMPL spark.jars /usr/hdp/current/spark-client/lib/spark-llap-1.0.0.2.6.0.3-8-assembly.jar spark.hadoop.hive.zookeeper.quorum hostname1.hwxblr.com:2181 spark.hadoop.hive.llap.daemon.service.hosts @llap0 spark.executor.extraClassPath /usr/hdp/current/spark-client/lib/spark-llap-1.0.0.2.6.0.3-8-assemb ``` #### start thirft server from ambari and run query as follows ``` beeline -u "jdbc:hive2://hostname3.hwxblr.com:10015/;principal=hive/_HOST@EXAMPLE.COM" -e "select * from test;" ``` #### if your query failed with following exception then please check you spark-llap-assembly is available on executors classpath (revisit spark.executor.extraClassPath ) ``` Error: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 0.0 failed 4 times, most recent failure: Lost task 0.3 in stage 0.0 (TID 3, hostname1.hwxblr.com): java.lang.NullPointerException at org.apache.hadoop.hive.llap.tez.LlapProtocolClientProxy.<init>(LlapProtocolClientProxy.java:94) at org.apache.hadoop.hive.llap.ext.LlapTaskUmbilicalExternalClient.<init>(LlapTaskUmbilicalExternalClient.java:119) at org.apache.hadoop.hive.llap.LlapBaseInputFormat.getRecordReader(LlapBaseInputFormat.java:143) at org.apache.hadoop.hive.llap.LlapRowInputFormat.getRecordReader(LlapRowInputFormat.java:51) at org.apache.spark.rdd.HadoopRDD$$anon$1.<init>(HadoopRDD.scala:240) at org.apache.spark.rdd.HadoopRDD.compute(HadoopRDD.scala:211) at org.apache.spark.rdd.HadoopRDD.compute(HadoopRDD.scala:101) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:313) at org.apache.spark.rdd.RDD.iterator(RDD.scala:277) at org.apache.spark.rdd.HadoopRDD$HadoopMapPartitionsWithSplitRDD.compute(HadoopRDD.scala:388) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:313) at org.apache.spark.rdd.RDD.iterator(RDD.scala:277) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:313) at org.apache.spark.rdd.RDD.iterator(RDD.scala:277) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:313) at org.apache.spark.rdd.RDD.iterator(RDD.scala:277) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66) at org.apache.spark.scheduler.Task.run(Task.scala:89) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:227) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) ```