Vince Gonzalez vicenteg

## getBigQuerySchema.sh
#!/bin/bash

if [ -z $1 ]; then
	echo "Provide a BQ table spec, ideally fully qualified."
	exit 1
fi


if $(echo "" | jq .); then
	bq --format json show $1 | jq -j '[.schema.fields[] | .name + ":" + .type] | join(",")'

## queries.sql
0: jdbc:drill:zk=localhost:2181> -- network performance with rpctest, node to node, round robin
0: jdbc:drill:zk=localhost:2181> select type,count(1) runs,avg(t.rateMBps) avgRateMBPS,avg(t.rpcspersecond) avgrpcspersecond from (select i.type, f.* from dfs.vr.instances_view i join dfs.vr.rpctest_1_1_view f on f.`time` = i.`time` and f.host = i.host) t group by type,`time` order by type ;
+--------------+-------+---------------------+---------------------+
|     type     | runs  |     avgRateMBPS     |  avgrpcspersecond   |
+--------------+-------+---------------------+---------------------+
| d2.2xlarge   | 12    | 273.3066660563151   | 2085.1624857584634  |
| d2.2xlarge   | 12    | 233.72499974568686  | 1783.183344523112   |
| d2.2xlarge   | 12    | 195.93666712443033  | 1494.8716837565105  |
| d2.2xlarge   | 12    | 239.15333557128906  | 1824.5950215657551  |
| d2.2xlarge   | 12    | 275.3549982706706   | 2100.788319905599   |

## README.md

      
              3 files
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                vicenteg
                / README.md
            
            
              Created
              September 27, 2016 21:53
            
          
    Install unixODBC-devel
yum -y install unixODBC-devel

Install python-virtualenv
yum -y install python-virtualenv


## zeppelin_mapr_streams.json
{
  "paragraphs": [
    {
      "text": "%md\n\nFirst thing you need to do to run this notebook is to make sure Zeppelin knows how to pull in MapR Streams maven dependencies.\n\nAdd this artifact to the Spark interpreter (update the version as needed):\n\n`org.apache.kafka:kafka-clients:0.9.0.0-mapr-1607`\n\nAlso make sure that the MapR repository is set up:\n\nhttp://repository.mapr.com/maven/\n\nFollow the [Zeppelin documentation for dependency management](http://zeppelin.apache.org/docs/0.6.1/manual/dependencymanagement.html) for instructions on to do this.\n",
      "dateUpdated": "2016-09-20T20:53:52-0700",
      "config": {
        "colWidth": 12,
        "graph": {
          "mode": "table",
          "height": 300,

## configure_mapr_core_site_xml_for_s3.yml
# Requires hadoop_properties: https://github.com/vicenteg/ansible-library
# Clone the repo to a library directory along side this playbook
# e.g.,
# mkdir mapr_to_s3 && cd mapr_to_s3 &&\
#	curl -L 'https://gist.githubusercontent.com/vicenteg/1b110cfd467d64487a16385ec10bdb42/raw/f20770712d90696e817cb8725181dcbb5c146020/configure_mapr_core_site_xml_for_s3.yml' -o configure_mapr_core_site_xml_for_s3.yml &&\
#	git clone https://github.com/vicenteg/ansible-library.git library
#
# Add your access key and secret key.
#
# You may need to change the group to match whatever you've named the

## console_spinner.py
import itertools
import sys

c = itertools.cycle(['|','/','-','\\'])
for i in c:
    sys.stdout.write(i)
    sys.stdout.flush()
    time.sleep(.05)
    sys.stdout.write('\r')

## mapr-5.0.0-drill-1.2.0-cluster.md

      
              2 files
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                vicenteg
                / mapr-5.0.0-drill-1.2.0-cluster.md
            
            
              Last active
              January 6, 2016 18:28
            
              
                repo manifest for MapR v5.0.0 with Drill 1.2.0
              
          
    A repo manifest file for a MapR cluster with Drill 1.2.0.

This Drill deployment can also work with Hive 1.2.0.
Quick Start

I assume you have successfully installed ansible (I use version 1.9) and have installed the dependencies for the ec2 modules. Specifically, you should install the boto python module (pip install boto) and awscli (pip install awscli). Then do awscli configure to store your EC2 credentials.
Step 0


## benchmark-commands.sh
export zookeepers=$(maprcli node listzookeepers -noheader)
export bootstrap_servers=$(maprcli node list -columns hostname -noheader -filter csvc==kafka | awk '{ print $1 }' | head -1)

# Producer

# Setup
bin/kafka-topics.sh --zookeeper $zookeepers --create --topic test-rep-one --partitions 6 --replication-factor 1
bin/kafka-topics.sh --zookeeper $zookeepers --create --topic test --partitions 6 --replication-factor 3

# Single thread, no replication

## leave_cluster.sh
#!/bin/bash

if maprcli node list -columns id; then
        NODEID=$(maprcli node list -columns id -filter hostname==`hostname -f` -noheader | cut -f 1 -d ' ')
        NODEVOLUMES=$(maprcli volume list -columns volumename  | egrep "^mapr.`hostname -f`")

        for volume in $NODEVOLUMES; do
                maprcli volume remove -name $volume
        done

## env.sh
#!/bin/bash
# Copyright (c) 2009 & onwards. MapR Tech, Inc., All rights reserved
# Please set all environment variable you want to be used during MapR cluster
# runtime here.
# namely MAPR_HOME, JAVA_HOME, MAPR_SUBNETS

#set JAVA_HOME to override default search
#export JAVA_HOME=
export MAPR_SUBNETS=
#export MAPR_HOME=
	#!/bin/bash

	if [ -z $1 ]; then
	echo "Provide a BQ table spec, ideally fully qualified."
	exit 1
	fi


	if $(echo "" \| jq .); then
	bq --format json show $1 \| jq -j '[.schema.fields[] \| .name + ":" + .type] \| join(",")'
	0: jdbc:drill:zk=localhost:2181> -- network performance with rpctest, node to node, round robin
	0: jdbc:drill:zk=localhost:2181> select type,count(1) runs,avg(t.rateMBps) avgRateMBPS,avg(t.rpcspersecond) avgrpcspersecond from (select i.type, f.* from dfs.vr.instances_view i join dfs.vr.rpctest_1_1_view f on f.`time` = i.`time` and f.host = i.host) t group by type,`time` order by type ;
	+--------------+-------+---------------------+---------------------+
	\| type \| runs \| avgRateMBPS \| avgrpcspersecond \|
	+--------------+-------+---------------------+---------------------+
	\| d2.2xlarge \| 12 \| 273.3066660563151 \| 2085.1624857584634 \|
	\| d2.2xlarge \| 12 \| 233.72499974568686 \| 1783.183344523112 \|
	\| d2.2xlarge \| 12 \| 195.93666712443033 \| 1494.8716837565105 \|
	\| d2.2xlarge \| 12 \| 239.15333557128906 \| 1824.5950215657551 \|
	\| d2.2xlarge \| 12 \| 275.3549982706706 \| 2100.788319905599 \|
	{
	"paragraphs": [
	{
	"text": "%md\n\nFirst thing you need to do to run this notebook is to make sure Zeppelin knows how to pull in MapR Streams maven dependencies.\n\nAdd this artifact to the Spark interpreter (update the version as needed):\n\n`org.apache.kafka:kafka-clients:0.9.0.0-mapr-1607`\n\nAlso make sure that the MapR repository is set up:\n\nhttp://repository.mapr.com/maven/\n\nFollow the [Zeppelin documentation for dependency management](http://zeppelin.apache.org/docs/0.6.1/manual/dependencymanagement.html) for instructions on to do this.\n",
	"dateUpdated": "2016-09-20T20:53:52-0700",
	"config": {
	"colWidth": 12,
	"graph": {
	"mode": "table",
	"height": 300,
	# Requires hadoop_properties: https://github.com/vicenteg/ansible-library
	# Clone the repo to a library directory along side this playbook
	# e.g.,
	# mkdir mapr_to_s3 && cd mapr_to_s3 &&\
	# curl -L 'https://gist.githubusercontent.com/vicenteg/1b110cfd467d64487a16385ec10bdb42/raw/f20770712d90696e817cb8725181dcbb5c146020/configure_mapr_core_site_xml_for_s3.yml' -o configure_mapr_core_site_xml_for_s3.yml &&\
	# git clone https://github.com/vicenteg/ansible-library.git library
	#
	# Add your access key and secret key.
	#
	# You may need to change the group to match whatever you've named the
	import itertools
	import sys

	c = itertools.cycle(['\|','/','-','\\'])
	for i in c:
	sys.stdout.write(i)
	sys.stdout.flush()
	time.sleep(.05)
	sys.stdout.write('\r')
	export zookeepers=$(maprcli node listzookeepers -noheader)
	export bootstrap_servers=$(maprcli node list -columns hostname -noheader -filter csvc==kafka \| awk '{ print $1 }' \| head -1)

	# Producer

	# Setup
	bin/kafka-topics.sh --zookeeper $zookeepers --create --topic test-rep-one --partitions 6 --replication-factor 1
	bin/kafka-topics.sh --zookeeper $zookeepers --create --topic test --partitions 6 --replication-factor 3

	# Single thread, no replication
	#!/bin/bash

	if maprcli node list -columns id; then
	NODEID=$(maprcli node list -columns id -filter hostname==`hostname -f` -noheader \| cut -f 1 -d ' ')
	NODEVOLUMES=$(maprcli volume list -columns volumename \| egrep "^mapr.`hostname -f`")

	for volume in $NODEVOLUMES; do
	maprcli volume remove -name $volume
	done
	#!/bin/bash
	# Copyright (c) 2009 & onwards. MapR Tech, Inc., All rights reserved
	# Please set all environment variable you want to be used during MapR cluster
	# runtime here.
	# namely MAPR_HOME, JAVA_HOME, MAPR_SUBNETS

	#set JAVA_HOME to override default search
	#export JAVA_HOME=
	export MAPR_SUBNETS=
	#export MAPR_HOME=