lakshay-arora/spark-sql1.py

## spark-sql1.py
# importing required libraries
from pyspark.sql import SQLContext
from pyspark.sql import Row

# read the text data
raw_data = sc.textFile('sample_data_final_wh.txt').cache()

# get number of partitions
raw_data.getNumPartitions()
## >> 19

# view top 2 rows
raw_data.take(2)
	# importing required libraries
	from pyspark.sql import SQLContext
	from pyspark.sql import Row

	# read the text data
	raw_data = sc.textFile('sample_data_final_wh.txt').cache()

	# get number of partitions
	raw_data.getNumPartitions()
	## >> 19

	# view top 2 rows
	raw_data.take(2)