kovid-r/pyspark_cheatsheet_init.py

## pyspark_cheatsheet_init.py
import pyspark
from pyspark import SparkContext
from pyspark.sql import SparkSession
from pyspark.sql import SQLContext

# create a SparkSession instance with the name moviedb with Hive support enabled
# https://spark.apache.org/docs/latest/sql-data-sources-hive-tables.html
spark = SparkSession.builder.appName("moviedb").enableHiveSupport().getOrCreate()

# create a SparkContext instance which allows the Spark Application to access
# Spark Cluster with the help of a resource manager which is usually YARN or Mesos
sc = SparkContext.getOrCreate()

# create a SQLContext instance to access the SQL query engine built on top of Spark
sqlContext = SQLContext(spark)
	import pyspark
	from pyspark import SparkContext
	from pyspark.sql import SparkSession
	from pyspark.sql import SQLContext

	# create a SparkSession instance with the name moviedb with Hive support enabled
	# https://spark.apache.org/docs/latest/sql-data-sources-hive-tables.html
	spark = SparkSession.builder.appName("moviedb").enableHiveSupport().getOrCreate()

	# create a SparkContext instance which allows the Spark Application to access
	# Spark Cluster with the help of a resource manager which is usually YARN or Mesos
	sc = SparkContext.getOrCreate()

	# create a SQLContext instance to access the SQL query engine built on top of Spark
	sqlContext = SQLContext(spark)