kovid-r/pyspark_cheatsheet_read_csv.py

## pyspark_cheatsheet_read_csv.py
# set the file_path variable in the beginning of the file
# or if your Spark application interacts with other applications, parameterize it
file_path = '/Users/kovid-r/datasets/moviedb/movies_metadata.csv'

# method 1 for reading a CSV file
df = spark.read.csv(file_path, header=True)

# method 2 for reading a CSV file
df = spark.read.format(csv_plugin).options(header='true', inferSchema='true').load(file_path)
	# set the file_path variable in the beginning of the file
	# or if your Spark application interacts with other applications, parameterize it
	file_path = '/Users/kovid-r/datasets/moviedb/movies_metadata.csv'

	# method 1 for reading a CSV file
	df = spark.read.csv(file_path, header=True)

	# method 2 for reading a CSV file
	df = spark.read.format(csv_plugin).options(header='true', inferSchema='true').load(file_path)