SaiKumar saisgit

## string reversal using recursive function.scala

object reverseString extends App {

val s = "24Tutorials"
print(revs(s))

def revs(s: String): String = {
// if (s.isEmpty) ""
if (s.length == 1)  s
else revs(s.tail) + s.head

## wordcount.scala
val textFile = sc.textFile("hdfs://...")
val counts = textFile.flatMap(line => line.split(" "))
                 .map(word => (word, 1))
                 .reduceByKey(_ + _)
counts.saveAsTextFile("hdfs://...")

## Finding 2nd max salary using Spark SQL.scala
import org.apache.spark._
import org.apache.spark.SparkConf
import org.apache.spark.sql.hive.HiveContext
import com.databricks.sparck.csv

Object Solution extends App {

  val conf = new SparkConf().setAppName("Problem_Execution")
  val sc        = new SparkContext(conf)
  val hiveContext = new HiveContext(sc)

## sample.txt
a|b|c
x|y|z
l|m|n

## readHbaseTableAsDF.scala
import org.apache.spark.sql.SparkSession
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.hbase.client.Result
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.mapreduce.TableInputFormat
import org.apache.hadoop.hbase.io.ImmutableBytesWritable


object readHbaseTableAsDF extends Serializable {
	case class EmpRow(empID:String, name:String, city:String)

## FlattenJSON.scala
import org.apache.spark.sql.SparkSession
import org.apache.spark.SparkContext
import org.apache.spark.SparkConf
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.types.{StructType,ArrayType}
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.Column
import org.apache.spark.sql.functions.col
import org.apache.spark.sql.functions.explode_outer

## extractPwdFromJceks.py
'''
Function to Retrieve the password from JCEKS file in PySpark
Parameters: SparkContext(sc), JCEKSFILE in hadoop path(jceksfile), Password alias for the jceks(password_alias)
Returns: Password string
'''
def extractPwdFromJceks(sc,jceksfile,password_alias):
  config_jceks = sc._jsc.hadoopConfiguration()
  config_jceks.set("hadoop.security.credential.provider.path",jceksfile)
  temp = config_jceks.getPassword(password_alias)
  password = ""

## getDFbyRDBMS.py
'''
Function to Create Dataframe on RDBMS Table
Parameters: SparkSession Object, DB Username, DB password, JDBC Driver ClassName, JDBC URL, Query to run
Return: Spark Dataframe
'''
def getDFbyRDBMS(spark, user, password, driver, jdbcURL, readQuery):
  prop = {"user":user, "password":password, "driver": driver, "sslConnection":"false"}
  df = spark.read.jdbc(url = jdbcURL, table = readQuery, properties = prop)
  return df

## writeToHbase.scala
import org.apache.hadoop.hbase.{HBaseConfiguration, TableName}
import org.apache.hadoop.client.{ConnectionFactory, Put}
import org.apache.hadoop.hbase.util.Bytes
import java.time.format.DateTimeFormatter
/**
Function to Write data to Hbase using Scala
Parameters: hbase-site.xml along with path(hbaseConfPath), HBase Table name, RowKey value, Column family name, Data to be stored
Returns: Nothing(unit)
**/

## extractPwdFromJceks.scala
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.security.alias.CredentialProviderFactory
/**
Function to Retrieve the password from JCEKS file in Spark with Scala
Parameters: JCEKSFILE in hadoop path(jceksfile), Password alias for the jceks(password_alias)
Returns: Password string
**/
def extractPwdFromJceks(jceksfile:String, password_alias:String):String = {
  val conf:Configuration = new Configuration()
  conf.set(CredentialProviderFactory.CREDENTIAL_PROVIDER_PATH, jceksfile)

	object reverseString extends App {

	val s = "24Tutorials"
	print(revs(s))

	def revs(s: String): String = {
	// if (s.isEmpty) ""
	if (s.length == 1) s
	else revs(s.tail) + s.head
	val textFile = sc.textFile("hdfs://...")
	val counts = textFile.flatMap(line => line.split(" "))
	.map(word => (word, 1))
	.reduceByKey(_ + _)
	counts.saveAsTextFile("hdfs://...")
	import org.apache.spark._
	import org.apache.spark.SparkConf
	import org.apache.spark.sql.hive.HiveContext
	import com.databricks.sparck.csv

	Object Solution extends App {

	val conf = new SparkConf().setAppName("Problem_Execution")
	val sc = new SparkContext(conf)
	val hiveContext = new HiveContext(sc)
	import org.apache.spark.sql.SparkSession
	import org.apache.hadoop.hbase.util.Bytes
	import org.apache.hadoop.hbase.client.Result
	import org.apache.hadoop.hbase.HBaseConfiguration
	import org.apache.hadoop.hbase.mapreduce.TableInputFormat
	import org.apache.hadoop.hbase.io.ImmutableBytesWritable


	object readHbaseTableAsDF extends Serializable {
	case class EmpRow(empID:String, name:String, city:String)
	import org.apache.spark.sql.SparkSession
	import org.apache.spark.SparkContext
	import org.apache.spark.SparkConf
	import org.apache.spark.sql.SQLContext
	import org.apache.spark.sql.types.{StructType,ArrayType}
	import org.apache.spark.sql.DataFrame
	import org.apache.spark.sql.Column
	import org.apache.spark.sql.functions.col
	import org.apache.spark.sql.functions.explode_outer
	'''
	Function to Retrieve the password from JCEKS file in PySpark
	Parameters: SparkContext(sc), JCEKSFILE in hadoop path(jceksfile), Password alias for the jceks(password_alias)
	Returns: Password string
	'''
	def extractPwdFromJceks(sc,jceksfile,password_alias):
	config_jceks = sc._jsc.hadoopConfiguration()
	config_jceks.set("hadoop.security.credential.provider.path",jceksfile)
	temp = config_jceks.getPassword(password_alias)
	password = ""
	'''
	Function to Create Dataframe on RDBMS Table
	Parameters: SparkSession Object, DB Username, DB password, JDBC Driver ClassName, JDBC URL, Query to run
	Return: Spark Dataframe
	'''
	def getDFbyRDBMS(spark, user, password, driver, jdbcURL, readQuery):
	prop = {"user":user, "password":password, "driver": driver, "sslConnection":"false"}
	df = spark.read.jdbc(url = jdbcURL, table = readQuery, properties = prop)
	return df
	import org.apache.hadoop.hbase.{HBaseConfiguration, TableName}
	import org.apache.hadoop.client.{ConnectionFactory, Put}
	import org.apache.hadoop.hbase.util.Bytes
	import java.time.format.DateTimeFormatter
	/**
	Function to Write data to Hbase using Scala
	Parameters: hbase-site.xml along with path(hbaseConfPath), HBase Table name, RowKey value, Column family name, Data to be stored
	Returns: Nothing(unit)
	**/
	import org.apache.hadoop.conf.Configuration
	import org.apache.hadoop.security.alias.CredentialProviderFactory
	/**
	Function to Retrieve the password from JCEKS file in Spark with Scala
	Parameters: JCEKSFILE in hadoop path(jceksfile), Password alias for the jceks(password_alias)
	Returns: Password string
	**/
	def extractPwdFromJceks(jceksfile:String, password_alias:String):String = {
	val conf:Configuration = new Configuration()
	conf.set(CredentialProviderFactory.CREDENTIAL_PROVIDER_PATH, jceksfile)