Claudinei Daitx claudinei-daitx

## SparkSessionKryo.scala
import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession

//create a spark session who works with Kryo.
object SparkSessionKryo {
    def getSparkSession: SparkSession = {
        val spark = SparkSession
            .builder
            .appName("my spark application name")
            .config(getConfig)

## SparkSessionS3.scala
import org.apache.spark.sql.SparkSession

object SparkSessionS3 {
    //create a spark session with optimizations to work with Amazon S3.
    def getSparkSession: SparkSession = {
        val spark = SparkSession
            .builder
            .appName("my spark application name")
            .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
            .config("spark.hadoop.fs.s3a.access.key", "my access key")

## get_penultimate_wednesday.sh
#!/bin/bash

start_date=$1
end_date=$2
current_date=
counter=0
index=0

until [ "$current_date" = "$end_date" ]
do

## schema_spy_redshift_configuration.conf
schemaspy.t=redshift
# Optional path to alternative jdbc drivers.
# Download the last redshift JDBC on https://docs.aws.amazon.com/redshift/latest/mgmt/configure-jdbc-connection.html#download-jdbc-driver
# In this example, I use the RedshiftJDBC42-1.2.10.1009.jar file.
schemaspy.dp=RedshiftJDBC42-1.2.10.1009.jar
# database properties: host, port number, name user, password
schemaspy.host=<<server_hostname>>
schemaspy.port=<<server_port>>
schemaspy.db=<<database_name>>
schemaspy.u=<<user_name>>
	import org.apache.spark.SparkConf
	import org.apache.spark.sql.SparkSession

	//create a spark session who works with Kryo.
	object SparkSessionKryo {
	def getSparkSession: SparkSession = {
	val spark = SparkSession
	.builder
	.appName("my spark application name")
	.config(getConfig)
	import org.apache.spark.sql.SparkSession

	object SparkSessionS3 {
	//create a spark session with optimizations to work with Amazon S3.
	def getSparkSession: SparkSession = {
	val spark = SparkSession
	.builder
	.appName("my spark application name")
	.config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
	.config("spark.hadoop.fs.s3a.access.key", "my access key")
	#!/bin/bash

	start_date=$1
	end_date=$2
	current_date=
	counter=0
	index=0

	until [ "$current_date" = "$end_date" ]
	do
	schemaspy.t=redshift
	# Optional path to alternative jdbc drivers.
	# Download the last redshift JDBC on https://docs.aws.amazon.com/redshift/latest/mgmt/configure-jdbc-connection.html#download-jdbc-driver
	# In this example, I use the RedshiftJDBC42-1.2.10.1009.jar file.
	schemaspy.dp=RedshiftJDBC42-1.2.10.1009.jar
	# database properties: host, port number, name user, password
	schemaspy.host=<<server_hostname>>
	schemaspy.port=<<server_port>>
	schemaspy.db=<<database_name>>
	schemaspy.u=<<user_name>>