1ambda/df-loading.py

## df-loading.py
from pyspark.sql.types import *
from pyspark.sql.functions import *
from pyspark.sql.window import Window

df = spark.read.load("./ecommerce_event.csv",
                     format="csv", inferSchema="true", header="true")

df.count() # 4264752, 약 450 MiB 파일
df.printSchema()

root
 |-- event_time: string (nullable = true)
 |-- event_type: string (nullable = true)
 |-- product_id: integer (nullable = true)
 |-- category_id: long (nullable = true)
 |-- category_code: string (nullable = true)
 |-- brand: string (nullable = true)
 |-- price: double (nullable = true)
 |-- user_id: integer (nullable = true)
 |-- user_session: string (nullable = true)
	from pyspark.sql.types import *
	from pyspark.sql.functions import *
	from pyspark.sql.window import Window

	df = spark.read.load("./ecommerce_event.csv",
	format="csv", inferSchema="true", header="true")

	df.count() # 4264752, 약 450 MiB 파일
	df.printSchema()

	root
	\|-- event_time: string (nullable = true)
	\|-- event_type: string (nullable = true)
	\|-- product_id: integer (nullable = true)
	\|-- category_id: long (nullable = true)
	\|-- category_code: string (nullable = true)
	\|-- brand: string (nullable = true)
	\|-- price: double (nullable = true)
	\|-- user_id: integer (nullable = true)
	\|-- user_session: string (nullable = true)