# importing required libraries
from pyspark.sql import SQLContext
from pyspark.sql import Row
# read the text data
raw_data = sc.textFile('sample_data_final_wh.txt').cache()
# get number of partitions
## >> 19
# view top 2 rows
