Venkata Gowri Sai Rakesh Kumar Varanasi vvgsrk

## create_UDF_and_UDA_in_Cassandra.cqlsh
cqlsh:test_ks>

CREATE TABLE emp_dept_info(emp_id int,
                           emp_name text,
                           dept_id int,
                           created_time timeuuid,
                           PRIMARY KEY((emp_id), created_time));

# Insert data in emp_dept_info Table


## glue-endpoint-creation-with-aws-cli.cli
# Execute below commands on AWS CLI to create Glue Development Endpoint.
$GLUE_DEV_ENDPOINT_PUBLIC_KEY = Get-Content -Path 'Please_Put_Your_Public_Key_Path'

# Create development endpoint with role and public key
aws glue create-dev-endpoint --endpoint-name any-meaningful-name --role-arn arn:aws:iam::000000000000:role/intended_role --public-key $GLUE_DEV_ENDPOINT_PUBLIC_KEY

# Get the status of endpoint
aws glue get-dev-endpoint --endpoint-name any-meaningful-name

# To Delete the endpoint

## pre-requisites-before-starting-spark-shell-on-glue-dev-endpoint.properties
# Properties File : Create a properties file with the following configurations and name it as glue_spark_shell.properties

# Note: In below configurations, Replace the s3 access and secret keys with your key's

spark.hadoop.fs.s3a.impl        	  org.apache.hadoop.fs.s3a.S3AFileSystem
spark.driver.extraClassPath     	  /usr/share/aws/glue/etl/jars/*:/usr/lib/hadoop-lzo/lib/*:/usr/lib/hadoop/*:/usr/share/aws/aws-java-sdk/*:/usr/share/aws/emr/emrfs/lib/*:/usr/share/aws/hmclient/lib/*:/usr/share/java/Hive-JSON-Serde/*:/usr/share/aws/sagemaker-spark-sdk/lib/*:/usr/share/aws/emr/emrfs/conf:/usr/share/aws/glue/etl/python/PyGlue.zip:/usr/share/aws/emr/emrfs/auxlib/*:/usr/lib/hadoop/lib/native/*:/usr/share/aws/emr/security/conf:/usr/share/aws/emr/security/lib/*:/usr/share/aws/glue/etl/conf
spark.executor.extraClassPath       /usr/share/aws/glue/etl/jars/*:/usr/lib/hadoop-lzo/lib/*:/usr/lib/hadoop/*:/usr/share/aws/aws-java-sdk/*:/usr/share/aws/emr/emrfs/lib/*:/usr/share/aws/hmclient/lib/*:/usr/share/java/Hive-JSON-Serd

## glue-spark-shell-scala-commands.scala
// Invoke Spark Shell
$ glue-spark-shell -v --properties-file /home/glue/glue_spark_shell.properties --packages com.databricks:spark-avro_2.11:4.0.0

// Import Required Classes
import org.apache.spark.SparkContext
import com.amazonaws.services.glue.GlueContext
import com.amazonaws.services.glue.DynamicFrame
import com.amazonaws.services.glue.DynamicRecord
import com.amazonaws.services.glue.MappingSpec
import com.amazonaws.services.glue.errors.CallSite

## gluepyspark-python-commands.py
# Invoke Spark Shell
$ gluepyspark -v --properties-file /home/glue/glue_spark_shell.properties --packages com.databricks:spark-avro_2.11:4.0.0

# Import required classes
import sys
from awsglue.transforms import *
from awsglue.utils import getResolvedOptions
from pyspark.context import SparkContext
from awsglue.context import GlueContext
from awsglue.dynamicframe import DynamicFrame, DynamicFrameReader, DynamicFrameWriter, DynamicFrameCollection

## TestSnowflakeConnection.scala
package com.vvgsrk.data

import org.apache.spark.sql.SparkSession
import net.snowflake.spark.snowflake.Utils.SNOWFLAKE_SOURCE_NAME

/** This object test "snowflake on AWS" connection using spark
 *  from Eclipse, Windows PC.
 *
 * It uses Hadoop 2.7, Spark 2.3.2
 *

## One_Hour_Interval_Average_Load.sql
SELECT DATE_TRUNC('hour', start_time) start_time_trunced_at_hour,
       HOUR(start_time)               start_time_hour,
       AVG(avg_running)               avg_running,
       AVG(avg_queued_load)           avg_queued_load,
       AVG(avg_queued_provisioning)   avg_queued_provisioning,
       AVG(avg_blocked)               avg_blocked
 FROM snowflake.account_usage.warehouse_load_history
WHERE DATE_TRUNC('DAY', start_time) = '2020-03-17'
  AND warehouse_name = 'PUT_YOUR_WAREHOUSE_NAME'
GROUP BY start_time_trunced_at_hour, start_time_hour

## alter_warehouse_max_cluster.sql
ALTER WAREHOUSE put_your_warehouse_name SET max_cluster_count = 10;

## five_minute_interval_average_load.sql
SELECT DATE_TRUNC('hour', start_time) start_time_trunced_at_hour,
       HOUR(start_time)               start_time_hour,
       MINUTE(start_time)             start_time_min,
       AVG(avg_running)               avg_running,
       AVG(avg_queued_load)           avg_queued_load,
       AVG(avg_queued_provisioning)   avg_queued_provisioning,
       AVG(avg_blocked)               avg_blocked
 FROM snowflake.account_usage.warehouse_load_history
WHERE DATE_TRUNC('day', start_time) = '2020-03-26'
  AND warehouse_name = 'PUT_YOUR_WAREHOUSE_NAME'

## one_hour_interval_average_load_with_credits.sql
WITH wlh
     AS (SELECT DATE_TRUNC('hour', wl.start_time) start_time_trunced_at_hour,
                HOUR(wl.start_time)               start_time_hour,
                AVG(avg_running)                  avg_running,
                AVG(avg_queued_load)              avg_queued_load,
                AVG(avg_queued_provisioning)      avg_queued_provisioning,
                AVG(avg_blocked)                  avg_blocked
          FROM snowflake.account_usage.warehouse_load_history wl
         WHERE DATE_TRUNC('day', wl.start_time) = '2020-03-26'
           AND wl.warehouse_name = 'PUT_YOUR_WAREHOUSE_NAME'
	cqlsh:test_ks>

	CREATE TABLE emp_dept_info(emp_id int,
	emp_name text,
	dept_id int,
	created_time timeuuid,
	PRIMARY KEY((emp_id), created_time));

	# Insert data in emp_dept_info Table
	# Execute below commands on AWS CLI to create Glue Development Endpoint.
	$GLUE_DEV_ENDPOINT_PUBLIC_KEY = Get-Content -Path 'Please_Put_Your_Public_Key_Path'

	# Create development endpoint with role and public key
	aws glue create-dev-endpoint --endpoint-name any-meaningful-name --role-arn arn:aws:iam::000000000000:role/intended_role --public-key $GLUE_DEV_ENDPOINT_PUBLIC_KEY

	# Get the status of endpoint
	aws glue get-dev-endpoint --endpoint-name any-meaningful-name

	# To Delete the endpoint
	# Properties File : Create a properties file with the following configurations and name it as glue_spark_shell.properties

	# Note: In below configurations, Replace the s3 access and secret keys with your key's

	spark.hadoop.fs.s3a.impl org.apache.hadoop.fs.s3a.S3AFileSystem
	spark.driver.extraClassPath /usr/share/aws/glue/etl/jars/:/usr/lib/hadoop-lzo/lib/:/usr/lib/hadoop/:/usr/share/aws/aws-java-sdk/:/usr/share/aws/emr/emrfs/lib/:/usr/share/aws/hmclient/lib/:/usr/share/java/Hive-JSON-Serde/:/usr/share/aws/sagemaker-spark-sdk/lib/:/usr/share/aws/emr/emrfs/conf:/usr/share/aws/glue/etl/python/PyGlue.zip:/usr/share/aws/emr/emrfs/auxlib/:/usr/lib/hadoop/lib/native/:/usr/share/aws/emr/security/conf:/usr/share/aws/emr/security/lib/*:/usr/share/aws/glue/etl/conf
	spark.executor.extraClassPath /usr/share/aws/glue/etl/jars/:/usr/lib/hadoop-lzo/lib/:/usr/lib/hadoop/:/usr/share/aws/aws-java-sdk/:/usr/share/aws/emr/emrfs/lib/:/usr/share/aws/hmclient/lib/:/usr/share/java/Hive-JSON-Serd
	// Invoke Spark Shell
	$ glue-spark-shell -v --properties-file /home/glue/glue_spark_shell.properties --packages com.databricks:spark-avro_2.11:4.0.0

	// Import Required Classes
	import org.apache.spark.SparkContext
	import com.amazonaws.services.glue.GlueContext
	import com.amazonaws.services.glue.DynamicFrame
	import com.amazonaws.services.glue.DynamicRecord
	import com.amazonaws.services.glue.MappingSpec
	import com.amazonaws.services.glue.errors.CallSite
	# Invoke Spark Shell
	$ gluepyspark -v --properties-file /home/glue/glue_spark_shell.properties --packages com.databricks:spark-avro_2.11:4.0.0

	# Import required classes
	import sys
	from awsglue.transforms import *
	from awsglue.utils import getResolvedOptions
	from pyspark.context import SparkContext
	from awsglue.context import GlueContext
	from awsglue.dynamicframe import DynamicFrame, DynamicFrameReader, DynamicFrameWriter, DynamicFrameCollection
	package com.vvgsrk.data

	import org.apache.spark.sql.SparkSession
	import net.snowflake.spark.snowflake.Utils.SNOWFLAKE_SOURCE_NAME

	/** This object test "snowflake on AWS" connection using spark
	* from Eclipse, Windows PC.
	*
	* It uses Hadoop 2.7, Spark 2.3.2
	*
	SELECT DATE_TRUNC('hour', start_time) start_time_trunced_at_hour,
	HOUR(start_time) start_time_hour,
	AVG(avg_running) avg_running,
	AVG(avg_queued_load) avg_queued_load,
	AVG(avg_queued_provisioning) avg_queued_provisioning,
	AVG(avg_blocked) avg_blocked
	FROM snowflake.account_usage.warehouse_load_history
	WHERE DATE_TRUNC('DAY', start_time) = '2020-03-17'
	AND warehouse_name = 'PUT_YOUR_WAREHOUSE_NAME'
	GROUP BY start_time_trunced_at_hour, start_time_hour
	SELECT DATE_TRUNC('hour', start_time) start_time_trunced_at_hour,
	HOUR(start_time) start_time_hour,
	MINUTE(start_time) start_time_min,
	AVG(avg_running) avg_running,
	AVG(avg_queued_load) avg_queued_load,
	AVG(avg_queued_provisioning) avg_queued_provisioning,
	AVG(avg_blocked) avg_blocked
	FROM snowflake.account_usage.warehouse_load_history
	WHERE DATE_TRUNC('day', start_time) = '2020-03-26'
	AND warehouse_name = 'PUT_YOUR_WAREHOUSE_NAME'
	WITH wlh
	AS (SELECT DATE_TRUNC('hour', wl.start_time) start_time_trunced_at_hour,
	HOUR(wl.start_time) start_time_hour,
	AVG(avg_running) avg_running,
	AVG(avg_queued_load) avg_queued_load,
	AVG(avg_queued_provisioning) avg_queued_provisioning,
	AVG(avg_blocked) avg_blocked
	FROM snowflake.account_usage.warehouse_load_history wl
	WHERE DATE_TRUNC('day', wl.start_time) = '2020-03-26'
	AND wl.warehouse_name = 'PUT_YOUR_WAREHOUSE_NAME'