Skip to content

Instantly share code, notes, and snippets.

View saisgit's full-sized avatar

SaiKumar saisgit

View GitHub Profile
object reverseString extends App {
val s = "24Tutorials"
print(revs(s))
def revs(s: String): String = {
// if (s.isEmpty) ""
if (s.length == 1)  s
else revs(s.tail) + s.head
val textFile = sc.textFile("hdfs://...")
val counts = textFile.flatMap(line => line.split(" "))
.map(word => (word, 1))
.reduceByKey(_ + _)
counts.saveAsTextFile("hdfs://...")
import org.apache.spark._
import org.apache.spark.SparkConf
import org.apache.spark.sql.hive.HiveContext
import com.databricks.sparck.csv
Object Solution extends App {
val conf = new SparkConf().setAppName("Problem_Execution")
val sc = new SparkContext(conf)
val hiveContext = new HiveContext(sc)
a|b|c
x|y|z
l|m|n
import org.apache.spark.sql.SparkSession
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.hbase.client.Result
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.mapreduce.TableInputFormat
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
object readHbaseTableAsDF extends Serializable {
case class EmpRow(empID:String, name:String, city:String)
import org.apache.spark.sql.SparkSession
import org.apache.spark.SparkContext
import org.apache.spark.SparkConf
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.types.{StructType,ArrayType}
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.Column
import org.apache.spark.sql.functions.col
import org.apache.spark.sql.functions.explode_outer
'''
Function to Retrieve the password from JCEKS file in PySpark
Parameters: SparkContext(sc), JCEKSFILE in hadoop path(jceksfile), Password alias for the jceks(password_alias)
Returns: Password string
'''
def extractPwdFromJceks(sc,jceksfile,password_alias):
config_jceks = sc._jsc.hadoopConfiguration()
config_jceks.set("hadoop.security.credential.provider.path",jceksfile)
temp = config_jceks.getPassword(password_alias)
password = ""
'''
Function to Create Dataframe on RDBMS Table
Parameters: SparkSession Object, DB Username, DB password, JDBC Driver ClassName, JDBC URL, Query to run
Return: Spark Dataframe
'''
def getDFbyRDBMS(spark, user, password, driver, jdbcURL, readQuery):
prop = {"user":user, "password":password, "driver": driver, "sslConnection":"false"}
df = spark.read.jdbc(url = jdbcURL, table = readQuery, properties = prop)
return df
import org.apache.hadoop.hbase.{HBaseConfiguration, TableName}
import org.apache.hadoop.client.{ConnectionFactory, Put}
import org.apache.hadoop.hbase.util.Bytes
import java.time.format.DateTimeFormatter
/**
Function to Write data to Hbase using Scala
Parameters: hbase-site.xml along with path(hbaseConfPath), HBase Table name, RowKey value, Column family name, Data to be stored
Returns: Nothing(unit)
**/
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.security.alias.CredentialProviderFactory
/**
Function to Retrieve the password from JCEKS file in Spark with Scala
Parameters: JCEKSFILE in hadoop path(jceksfile), Password alias for the jceks(password_alias)
Returns: Password string
**/
def extractPwdFromJceks(jceksfile:String, password_alias:String):String = {
val conf:Configuration = new Configuration()
conf.set(CredentialProviderFactory.CREDENTIAL_PROVIDER_PATH, jceksfile)