This cheat sheet assume below software version
- Spark 2.2, which requires JDK 1.8
- CDH 5.13
- JDK 1.8
| package hr_data | |
| import java.io.IOException | |
| import java.text.SimpleDateFormat | |
| import java.util.Calendar | |
| import com.typesafe.config.ConfigFactory | |
| import org.apache.spark.sql.SparkSession | |
| object jsonParser { |
| /** | |
| * Examples of writing mixed unit/property-based (ScalaCheck) tests. | |
| * | |
| * Includes tables and generators as well as 'traditional' tests. | |
| * | |
| * @see http://www.scalatest.org/user_guide/selecting_a_style | |
| * @see http://www.scalatest.org/user_guide/property_based_testing | |
| */ | |
| import org.scalatest._ |
| import org.scalatest.FunSuite | |
| import org.apache.spark.SparkConf | |
| import org.apache.spark.SparkContext | |
| class MyTestSuite extends FunSuite { | |
| val conf = new SparkConf() | |
| .setAppName("My Spark test") |
| package test.com.idlike.junit.df | |
| import breeze.numerics.abs | |
| import org.apache.spark.rdd.RDD | |
| import org.apache.spark.sql.functions.col | |
| import org.apache.spark.sql.{Column, DataFrame, Row} | |
| /** | |
| * Created by Umberto on 06/02/2017. | |
| */ |
| val textFile = sc.textFile("hdfs://...") | |
| val counts = textFile.flatMap(line => line.split(" ")) | |
| .map(word => (word, 1)) | |
| .reduceByKey(_ + _) | |
| counts.saveAsTextFile("hdfs://...") |
| import org.apache.spark._ | |
| import org.apache.spark.SparkConf | |
| import org.apache.spark.sql.hive.HiveContext | |
| import com.databricks.sparck.csv | |
| Object Solution extends App { | |
| val conf = new SparkConf().setAppName("Problem_Execution") | |
| val sc = new SparkContext(conf) | |
| val hiveContext = new HiveContext(sc) |
| import org.apache.spark.sql.SparkSession | |
| import org.apache.hadoop.hbase.util.Bytes | |
| import org.apache.hadoop.hbase.client.Result | |
| import org.apache.hadoop.hbase.HBaseConfiguration | |
| import org.apache.hadoop.hbase.mapreduce.TableInputFormat | |
| import org.apache.hadoop.hbase.io.ImmutableBytesWritable | |
| object readHbaseTableAsDF extends Serializable { | |
| case class EmpRow(empID:String, name:String, city:String) |