rajkiran485 rajkiran485

## jsonParser.scala
package hr_data

import java.io.IOException
import java.text.SimpleDateFormat
import java.util.Calendar

import com.typesafe.config.ConfigFactory
import org.apache.spark.sql.SparkSession

object jsonParser {

## PropertyTests.scala
/**
 * Examples of writing mixed unit/property-based (ScalaCheck) tests.
 *
 * Includes tables and generators as well as 'traditional' tests.
 *
 * @see http://www.scalatest.org/user_guide/selecting_a_style
 * @see http://www.scalatest.org/user_guide/property_based_testing
 */

import org.scalatest._

## MyTestSuite.scala
import org.scalatest.FunSuite

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext


class MyTestSuite extends FunSuite {

  val conf = new SparkConf()
                     .setAppName("My Spark test")

## BIGDATA_Spark_Python.md

      
              1 file
            
          
              0 forks
            
          
                0 comments
              
            
              0 stars
            
          
                rajkiran485
                / BIGDATA_Spark_Python.md
            
            
              Created
              August 13, 2019 17:10
            
              
                Big Data Spark Cheat sheet focusing on using Python API
              
          
    Big Data Spark Cheat Sheet

This cheat sheet assume below software version

Spark 2.2, which requires JDK 1.8
CDH 5.13
JDK 1.8


## BIGDATA_HIVE_Syntax.md

      
              1 file
            
          
              0 forks
            
          
                0 comments
              
            
              0 stars
            
          
                rajkiran485
                / BIGDATA_HIVE_Syntax.md
            
            
              Created
              August 13, 2019 16:45
                — forked from kzhangkzhang/BIGDATA_HIVE_Syntax.md
            
              
                Hive Syntax Cheat Sheet
              
          
    Hive Syntax Cheat Sheet

General rule


interchangeable constructs
hive is case sensitive
secmicolon to terminate statements

Hive Data Types


## gist:4377c107d32bf130b8e94988450e912b

      
              1 file
            
          
              0 forks
            
          
                0 comments
              
            
              0 stars
            
          
                rajkiran485
                / gist:4377c107d32bf130b8e94988450e912b
            
            
              Created
              August 13, 2019 14:03
                — forked from fabsta/gist:3abd63080f009306480911e6a74de597
            
              
                hive cheat sheet
              
          
    Metadata

#Selecting a database	
USE database;	USE database;

#Listing databases	
SHOW DATABASES;	SHOW DATABASES;


## DataFrameSuite.scala
package test.com.idlike.junit.df

import breeze.numerics.abs
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.functions.col
import org.apache.spark.sql.{Column, DataFrame, Row}

/**
  * Created by Umberto on 06/02/2017.
  */

## wordcount.scala
val textFile = sc.textFile("hdfs://...")
val counts = textFile.flatMap(line => line.split(" "))
                 .map(word => (word, 1))
                 .reduceByKey(_ + _)
counts.saveAsTextFile("hdfs://...")

## Finding 2nd max salary using Spark SQL.scala
import org.apache.spark._
import org.apache.spark.SparkConf
import org.apache.spark.sql.hive.HiveContext
import com.databricks.sparck.csv

Object Solution extends App {

  val conf = new SparkConf().setAppName("Problem_Execution")
  val sc        = new SparkContext(conf)
  val hiveContext = new HiveContext(sc)

## readHbaseTableAsDF.scala
import org.apache.spark.sql.SparkSession
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.hbase.client.Result
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.mapreduce.TableInputFormat
import org.apache.hadoop.hbase.io.ImmutableBytesWritable


object readHbaseTableAsDF extends Serializable {
	case class EmpRow(empID:String, name:String, city:String)
	package hr_data

	import java.io.IOException
	import java.text.SimpleDateFormat
	import java.util.Calendar

	import com.typesafe.config.ConfigFactory
	import org.apache.spark.sql.SparkSession

	object jsonParser {
	/**
	* Examples of writing mixed unit/property-based (ScalaCheck) tests.
	*
	* Includes tables and generators as well as 'traditional' tests.
	*
	* @see http://www.scalatest.org/user_guide/selecting_a_style
	* @see http://www.scalatest.org/user_guide/property_based_testing
	*/

	import org.scalatest._
	import org.scalatest.FunSuite

	import org.apache.spark.SparkConf
	import org.apache.spark.SparkContext


	class MyTestSuite extends FunSuite {

	val conf = new SparkConf()
	.setAppName("My Spark test")
	package test.com.idlike.junit.df

	import breeze.numerics.abs
	import org.apache.spark.rdd.RDD
	import org.apache.spark.sql.functions.col
	import org.apache.spark.sql.{Column, DataFrame, Row}

	/**
	* Created by Umberto on 06/02/2017.
	*/
	val textFile = sc.textFile("hdfs://...")
	val counts = textFile.flatMap(line => line.split(" "))
	.map(word => (word, 1))
	.reduceByKey(_ + _)
	counts.saveAsTextFile("hdfs://...")
	import org.apache.spark._
	import org.apache.spark.SparkConf
	import org.apache.spark.sql.hive.HiveContext
	import com.databricks.sparck.csv

	Object Solution extends App {

	val conf = new SparkConf().setAppName("Problem_Execution")
	val sc = new SparkContext(conf)
	val hiveContext = new HiveContext(sc)
	import org.apache.spark.sql.SparkSession
	import org.apache.hadoop.hbase.util.Bytes
	import org.apache.hadoop.hbase.client.Result
	import org.apache.hadoop.hbase.HBaseConfiguration
	import org.apache.hadoop.hbase.mapreduce.TableInputFormat
	import org.apache.hadoop.hbase.io.ImmutableBytesWritable


	object readHbaseTableAsDF extends Serializable {
	case class EmpRow(empID:String, name:String, city:String)