Andy Pernsteiner andypern

## spark classpath launcher
#!/usr/bin/bash


export SHARK_HOME=/opt/mapr/shark/shark-0.9.0
export SPARK_HOME=/opt/mapr/spark/spark-0.9.1
export SCALA_HOME=/usr/share/java
export CLASSPATH

## gist:5a205e11ab25763ac854
val records = ssc.socketTextStream(host, port.toInt, StorageLevel.MEMORY_ONLY_SER)


    //basically, foreach rdd inside the Dstream, perform a 'collect' on the RDD, which creates an array,
    // and run a foreach on the elements within the array.  Maybe there's a more 'sparky' way of doing this...
    records.foreach(rdd => {
      val rddarray = rdd.collect
        if(rddarray.length > 0) {
          var linecount = 0

## gist:60a98fa43607a9cf0bf7

##Running the demo

###Introduction

- Show a slide with a diagram showing the components
- show a slide detailing the clickstream use case
- talk about how the data comes in different formats, and we don't want to have to do any sort of manipulation to be able to query against it.

###Dataset intro

## gist:3447376c27cd0cf98ef1
10:29:41.443 [2634939c-333e-4d3f-8caf-31389859a336:frag:0:0] DEBUG o.a.d.e.w.fragment.FragmentExecutor - Caught exception while running fragment
java.lang.NullPointerException: null
	at org.apache.hadoop.hive.ql.metadata.Table.getFields(Table.java:403) ~[storage-hive-1.0.0-m2-incubating-SNAPSHOT.jar:1.0.0-m2-incubating-SNAPSHOT]
	at org.apache.drill.exec.store.hive.schema.DrillHiveTable.getRowType(DrillHiveTable.java:52) ~[drill-java-exec-1.0.0-m2-incubating-SNAPSHOT-rebuffed.jar:1.0.0-m2-incubating-SNAPSHOT]
	at org.apache.drill.exec.store.ischema.OptiqProvider$OptiqScanner.scanSchema(OptiqProvider.java:233) ~[drill-java-exec-1.0.0-m2-incubating-SNAPSHOT-rebuffed.jar:1.0.0-m2-incubating-SNAPSHOT]
	at org.apache.drill.exec.store.ischema.OptiqProvider$OptiqScanner.scanSchema(OptiqProvider.java:218) ~[drill-java-exec-1.0.0-m2-incubating-SNAPSHOT-rebuffed.jar:1.0.0-m2-incubating-SNAPSHOT]
	at org.apache.drill.exec.store.ischema.OptiqProvider$OptiqScanner.scanSchema(OptiqProvider.java:218) ~[drill-java-exec-1.0.0

## gist:b0aa02d446a8c5f413b6
// package drilljdbctest;

import java.io.IOException;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;

public class DrillJDBCTest {

## flume demo
#Twitter drill demo


Install flume:

	yum install -y mapr-flume

	cd /opt/mapr/flume/flume-1.5.0/conf

	cp flume-env.sh.template flume-env.sh

## gist:6b742025bc596949c930
[root@sandbox conf.empty]# hadoop fs -ls wasb://andypcontainer@maprazuretest.blob.core.windows.net/
15/03/23 21:23:25 DEBUG lib.MutableMetricsFactory: field org.apache.hadoop.metrics2.lib.MutableRate org.apache.hadoop.security.UserGroupInformation$UgiMetrics.loginSuccess with annotation @org.apache.hadoop.metrics2.annotation.Metric(value=[Rate of successful kerberos logins and latency (milliseconds)], about=, valueName=Time, type=DEFAULT, always=false, sampleName=Ops)
15/03/23 21:23:25 DEBUG lib.MutableMetricsFactory: field org.apache.hadoop.metrics2.lib.MutableRate org.apache.hadoop.security.UserGroupInformation$UgiMetrics.loginFailure with annotation @org.apache.hadoop.metrics2.annotation.Metric(value=[Rate of failed kerberos logins and latency (milliseconds)], about=, valueName=Time, type=DEFAULT, always=false, sampleName=Ops)
15/03/23 21:23:25 DEBUG lib.MutableMetricsFactory: field org.apache.hadoop.metrics2.lib.MutableRate org.apache.hadoop.security.UserGroupInformation$UgiMetrics.getGroups with annotati

## gist:cdc5d75d5cf9cc316410
0: jdbc:drill:> select cast(rptng_ts as varchar(20)) as cs from `data/file1.parquet` limit 1;
Error: exception while executing query: Failure while executing query. (state=,code=0)
Query failed: RemoteRpcException: Failure while running fragment., java.lang.UnsupportedOperationException [ 5380c240-5017-47da-8fb6-f870254056b1 on maprdemo:31010 ]
  (org.apache.drill.common.exceptions.ExecutionSetupException) java.lang.UnsupportedOperationException
    org.apache.drill.exec.store.parquet.columnreaders.ParquetRecordReader.setup():325
    org.apache.drill.exec.physical.impl.ScanBatch.<init>():99
    org.apache.drill.exec.store.parquet.ParquetScanBatchCreator.getBatch():156
    org.apache.drill.exec.store.parquet.ParquetScanBatchCreator.getBatch():56
    org.apache.drill.exec.physical.impl.ImplCreator.visitOp():62
    org.apache.drill.exec.physical.impl.ImplCreator.visitOp():39

## gist:5ba2121b8fdd9f4723d2
ATHERING FACTS ***************************************************************
failed: [localhost] => {"failed": true, "parsed": false}
[sudo via ansible, key=kvqiqrbdonminvyborgwewvidpmouxrb] password:


TASK: [remove fetched maprserverticket, ssl_keystore, and ssl_truststore] *****
failed: [localhost] => (item=maprserverticket) => {"failed": true, "item": "maprserverticket", "parsed": false}
[sudo via ansible, key=agayylrmdslfygktiyroczxheafboboc] password:

failed: [localhost] => (item=ssl_keystore) => {"failed": true, "item": "ssl_keystore", "parsed": false}

## gist:ed59c553af58cc506c7e
0: jdbc:drill:> explain plan for select prdct_id_val from posA18 where r_date >= '2014-01-01' and r_date <= '2014-12-31' group by prdct_id_val order by prdct_id_val LIMIT 50;
+------------+------------+
|    text    |    json    |
+------------+------------+
| 00-00    Screen
00-01      SelectionVectorRemover
00-02        Limit(fetch=[50])
00-03          SingleMergeExchange(sort0=[0 ASC])
01-01            SelectionVectorRemover
01-02              TopN(limit=[50])
	#!/usr/bin/bash




	export SHARK_HOME=/opt/mapr/shark/shark-0.9.0
	export SPARK_HOME=/opt/mapr/spark/spark-0.9.1
	export SCALA_HOME=/usr/share/java
	export CLASSPATH
	val records = ssc.socketTextStream(host, port.toInt, StorageLevel.MEMORY_ONLY_SER)



	//basically, foreach rdd inside the Dstream, perform a 'collect' on the RDD, which creates an array,
	// and run a foreach on the elements within the array. Maybe there's a more 'sparky' way of doing this...
	records.foreach(rdd => {
	val rddarray = rdd.collect
	if(rddarray.length > 0) {
	var linecount = 0

	##Running the demo

	###Introduction

	- Show a slide with a diagram showing the components
	- show a slide detailing the clickstream use case
	- talk about how the data comes in different formats, and we don't want to have to do any sort of manipulation to be able to query against it.

	###Dataset intro
	10:29:41.443 [2634939c-333e-4d3f-8caf-31389859a336:frag:0:0] DEBUG o.a.d.e.w.fragment.FragmentExecutor - Caught exception while running fragment
	java.lang.NullPointerException: null
	at org.apache.hadoop.hive.ql.metadata.Table.getFields(Table.java:403) ~[storage-hive-1.0.0-m2-incubating-SNAPSHOT.jar:1.0.0-m2-incubating-SNAPSHOT]
	at org.apache.drill.exec.store.hive.schema.DrillHiveTable.getRowType(DrillHiveTable.java:52) ~[drill-java-exec-1.0.0-m2-incubating-SNAPSHOT-rebuffed.jar:1.0.0-m2-incubating-SNAPSHOT]
	at org.apache.drill.exec.store.ischema.OptiqProvider$OptiqScanner.scanSchema(OptiqProvider.java:233) ~[drill-java-exec-1.0.0-m2-incubating-SNAPSHOT-rebuffed.jar:1.0.0-m2-incubating-SNAPSHOT]
	at org.apache.drill.exec.store.ischema.OptiqProvider$OptiqScanner.scanSchema(OptiqProvider.java:218) ~[drill-java-exec-1.0.0-m2-incubating-SNAPSHOT-rebuffed.jar:1.0.0-m2-incubating-SNAPSHOT]
	at org.apache.drill.exec.store.ischema.OptiqProvider$OptiqScanner.scanSchema(OptiqProvider.java:218) ~[drill-java-exec-1.0.0
	// package drilljdbctest;

	import java.io.IOException;
	import java.sql.Connection;
	import java.sql.DriverManager;
	import java.sql.ResultSet;
	import java.sql.SQLException;
	import java.sql.Statement;

	public class DrillJDBCTest {
	#Twitter drill demo


	Install flume:

	yum install -y mapr-flume

	cd /opt/mapr/flume/flume-1.5.0/conf

	cp flume-env.sh.template flume-env.sh
	[root@sandbox conf.empty]# hadoop fs -ls wasb://andypcontainer@maprazuretest.blob.core.windows.net/
	15/03/23 21:23:25 DEBUG lib.MutableMetricsFactory: field org.apache.hadoop.metrics2.lib.MutableRate org.apache.hadoop.security.UserGroupInformation$UgiMetrics.loginSuccess with annotation @org.apache.hadoop.metrics2.annotation.Metric(value=[Rate of successful kerberos logins and latency (milliseconds)], about=, valueName=Time, type=DEFAULT, always=false, sampleName=Ops)
	15/03/23 21:23:25 DEBUG lib.MutableMetricsFactory: field org.apache.hadoop.metrics2.lib.MutableRate org.apache.hadoop.security.UserGroupInformation$UgiMetrics.loginFailure with annotation @org.apache.hadoop.metrics2.annotation.Metric(value=[Rate of failed kerberos logins and latency (milliseconds)], about=, valueName=Time, type=DEFAULT, always=false, sampleName=Ops)
	15/03/23 21:23:25 DEBUG lib.MutableMetricsFactory: field org.apache.hadoop.metrics2.lib.MutableRate org.apache.hadoop.security.UserGroupInformation$UgiMetrics.getGroups with annotati
	0: jdbc:drill:> select cast(rptng_ts as varchar(20)) as cs from `data/file1.parquet` limit 1;
	Error: exception while executing query: Failure while executing query. (state=,code=0)
	Query failed: RemoteRpcException: Failure while running fragment., java.lang.UnsupportedOperationException [ 5380c240-5017-47da-8fb6-f870254056b1 on maprdemo:31010 ]
	(org.apache.drill.common.exceptions.ExecutionSetupException) java.lang.UnsupportedOperationException
	org.apache.drill.exec.store.parquet.columnreaders.ParquetRecordReader.setup():325
	org.apache.drill.exec.physical.impl.ScanBatch.<init>():99
	org.apache.drill.exec.store.parquet.ParquetScanBatchCreator.getBatch():156
	org.apache.drill.exec.store.parquet.ParquetScanBatchCreator.getBatch():56
	org.apache.drill.exec.physical.impl.ImplCreator.visitOp():62
	org.apache.drill.exec.physical.impl.ImplCreator.visitOp():39
	ATHERING FACTS ***************************************************************
	failed: [localhost] => {"failed": true, "parsed": false}
	[sudo via ansible, key=kvqiqrbdonminvyborgwewvidpmouxrb] password:


	TASK: [remove fetched maprserverticket, ssl_keystore, and ssl_truststore] *****
	failed: [localhost] => (item=maprserverticket) => {"failed": true, "item": "maprserverticket", "parsed": false}
	[sudo via ansible, key=agayylrmdslfygktiyroczxheafboboc] password:

	failed: [localhost] => (item=ssl_keystore) => {"failed": true, "item": "ssl_keystore", "parsed": false}
	0: jdbc:drill:> explain plan for select prdct_id_val from posA18 where r_date >= '2014-01-01' and r_date <= '2014-12-31' group by prdct_id_val order by prdct_id_val LIMIT 50;
	+------------+------------+
	\| text \| json \|
	+------------+------------+
	\| 00-00 Screen
	00-01 SelectionVectorRemover
	00-02 Limit(fetch=[50])
	00-03 SingleMergeExchange(sort0=[0 ASC])
	01-01 SelectionVectorRemover
	01-02 TopN(limit=[50])