Skip to content

Instantly share code, notes, and snippets.

Last active April 8, 2021 00:07
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
What would you like to do?
// Implicits provide many shortcuts, including conversion from Row into a specific type
import spark.implicits._
// Case class to use as type for each Row
case class VehicleStopRaw(
stop_id: String, stop_cause: String, service_area: String, subject_race: String,
subject_sex: String, subject_age: String, timestamp: String, stop_date: String,
stop_time: String, sd_resident: String, arrested: String, searched: String,
obtained_consent: String, contraband_found: String, property_seized: String)
val cvDF =
.as[VehicleStopRaw] // convert each row to type VehicleStopRaw
val r = // print data
sample_data = [
["TestRecord1", "first entry", 1],
["TestRecord2", "second entry", 2],
["TestRecord3", "third entry", 3]
# Read with column names and implicit types
column_names = ['name', 'desc', 'value']
df = spark.createDataFrame(sample_data, column_names) # print data
# Read with Spark schema (specific types)
from pyspark.sql.types import StructType
schema = StructType().add('name', 'string').add('desc', 'string').add('value', 'integer')
df2 = spark.createDataFrame(sample_data, schema) # print data
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment