1ambda/df-loading.py

## df-loading.py
from pyspark.sql.types import *
from pyspark.sql.functions import *
from pyspark.sql.window import Window

dfCalendar = spark.read.load("./airbnb_calendar.csv",
                     format="csv", inferSchema=True, header=True,
                     quote='"', escape='"', sep=',', multiline=True)


dfListing = spark.read.load("./airbnb_listings.csv",
                     format="csv", inferSchema=True, header=True,
                     quote='"', escape='"', sep=',', multiline=True)

dfCalendar = dfCalendar.cache()
dfListing = dfListing.cache()

dfCalendar.printSchema()
dfCalendar.show()
dfListing.printSchema()
dfListing\
   .select("id", "listing_url", "name", "description", "property_type", "city", "review_scores_rating", "price")\
   .show()
	from pyspark.sql.types import *
	from pyspark.sql.functions import *
	from pyspark.sql.window import Window

	dfCalendar = spark.read.load("./airbnb_calendar.csv",
	format="csv", inferSchema=True, header=True,
	quote='"', escape='"', sep=',', multiline=True)


	dfListing = spark.read.load("./airbnb_listings.csv",
	format="csv", inferSchema=True, header=True,
	quote='"', escape='"', sep=',', multiline=True)

	dfCalendar = dfCalendar.cache()
	dfListing = dfListing.cache()

	dfCalendar.printSchema()
	dfCalendar.show()
	dfListing.printSchema()
	dfListing\
	.select("id", "listing_url", "name", "description", "property_type", "city", "review_scores_rating", "price")\
	.show()