Miklos C mrchristine

## aws_spot_pricing.sh
#!/bin/bash

# catch ctrl+c handler
trap ctrl_c_cleanup INT

function ctrl_c_cleanup() {
  echo "** Interrupt handler caught"
  rm -rf spot_prices_*.json
}

## dbc_reset_scheduled_jobs.sh
#!/bin/bash

# catch ctrl+c handler
trap ctrl_c_cleanup INT

function ctrl_c_cleanup() {
  echo "** Interrupt handler caught"
  rm -rf $job_file
}

## dbc_deploy_cluster.sh
#!/bin/bash

IFS=$'\n'       # make newlines the only separator

while getopts ":o" opt; do
  case $opt in
    o)
      ondemand=true
      echo -e "Deploying on-demand cluster for mwc\n"
      ;;

## aws_assign_eip.sh
#/bin/bash

# Set Params
k=YOUR_AWS_KEYS
s=YOU_AWS_SECRETE
r=YOUR_REGION
# Assign EIP ID
eip_id=eipalloc-XXXXXXX

# Install awscli

## dbc_deploy_cluster_and_execute.sh
#!/bin/bash

IFS=$'\n'       # make newlines the only separator

while getopts ":p" opt; do
  case $opt in
    p)
      print_versions=true
      echo -e "Printing the spark verions and node types supported\n"
      ;;

## spark-submit-example-with-history.sh
#!/bin/bash

usage="Add jars to the input arguments to specify the spark job. -h list the supported spark versions"

RUNTIME_VERSION="3.2.x-scala2.11"
NODE_TYPE="r3.xlarge"

while getopts ':hs:' option; do
  case "$option" in
    h) echo "$usage"

## spark-submit-run-once.sh
#!/bin/bash

usage="Add jars to the input arguments to specify the spark job. -h list the supported spark versions"

RUNTIME_VERSION="3.2.x-scala2.11"
NODE_TYPE="r3.xlarge"

while getopts ':hs:' option; do
  case "$option" in
    h) echo "$usage"

## vector_sum_udaf.scala
package com.databricks.example.pivot

/**
This code allows a user to add vectors together for common keys.
The code in the comments show you how to register the scala UDAF to be called from pyspark.
The UDAF can only be called from a SQL expression (aka spark.sql() or df.expr() )
**/

/**
# Python code to register a scala UDAF

## update_legacy_job_templates.py
import json, pprint, requests, datetime

################################################################
## Replace the token variable and environment url below
################################################################

# Helper to pretty print json
def pprint_j(i):
  print json.dumps(i, indent=4, sort_keys=True)

## spark_schema_save_n_load.py
##### READ SPARK DATAFRAME
df = spark.read.option("header", "true").option("inferSchema", "true").csv(fname)
# store the schema from the CSV w/ the header in the first file, and infer the types for the columns
df_schema = df.schema

##### SAVE JSON SCHEMA INTO S3 / BLOB STORAGE
# save the schema to load from the streaming job, which we will load during the next job
dbutils.fs.rm("/home/mwc/airline_schema.json", True)

with open("/dbfs/home/mwc/airline_schema.json", "w") as f:
	#!/bin/bash

	# catch ctrl+c handler
	trap ctrl_c_cleanup INT

	function ctrl_c_cleanup() {
	echo "** Interrupt handler caught"
	rm -rf spot_prices_*.json
	}
	#!/bin/bash

	IFS=$'\n' # make newlines the only separator

	while getopts ":o" opt; do
	case $opt in
	o)
	ondemand=true
	echo -e "Deploying on-demand cluster for mwc\n"
	;;
	#/bin/bash

	# Set Params
	k=YOUR_AWS_KEYS
	s=YOU_AWS_SECRETE
	r=YOUR_REGION
	# Assign EIP ID
	eip_id=eipalloc-XXXXXXX

	# Install awscli
	#!/bin/bash

	IFS=$'\n' # make newlines the only separator

	while getopts ":p" opt; do
	case $opt in
	p)
	print_versions=true
	echo -e "Printing the spark verions and node types supported\n"
	;;
	#!/bin/bash

	usage="Add jars to the input arguments to specify the spark job. -h list the supported spark versions"

	RUNTIME_VERSION="3.2.x-scala2.11"
	NODE_TYPE="r3.xlarge"

	while getopts ':hs:' option; do
	case "$option" in
	h) echo "$usage"
	package com.databricks.example.pivot

	/**
	This code allows a user to add vectors together for common keys.
	The code in the comments show you how to register the scala UDAF to be called from pyspark.
	The UDAF can only be called from a SQL expression (aka spark.sql() or df.expr() )
	**/

	/**
	# Python code to register a scala UDAF
	import json, pprint, requests, datetime

	################################################################
	## Replace the token variable and environment url below
	################################################################

	# Helper to pretty print json
	def pprint_j(i):
	print json.dumps(i, indent=4, sort_keys=True)
	##### READ SPARK DATAFRAME
	df = spark.read.option("header", "true").option("inferSchema", "true").csv(fname)
	# store the schema from the CSV w/ the header in the first file, and infer the types for the columns
	df_schema = df.schema

	##### SAVE JSON SCHEMA INTO S3 / BLOB STORAGE
	# save the schema to load from the streaming job, which we will load during the next job
	dbutils.fs.rm("/home/mwc/airline_schema.json", True)

	with open("/dbfs/home/mwc/airline_schema.json", "w") as f: