Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
import org.apache.spark.sql.types._
val ua_schema = StructType(
StructField("os_minor", StringType, false) ::
StructField("is_bot", BooleanType, false) ::
StructField("os_major", StringType, false) ::
StructField("device_family", StringType, false) ::
StructField("os_family", StringType, false) ::
StructField("browser_family", StringType, false) ::
StructField("browser_minor", StringType, false) ::
StructField("browser_major", StringType, false) ::
StructField("wmf_app_version", StringType, false) ::
StructField("is_mediawiki", BooleanType, false) ::
Nil
)
val t = spark.table("otto_json_refine_test.popups")
val ua_col = from_json(t("useragent"), ua_schema)
val t2 = t.withColumn("useragent", ua_col)
t2.limit(100).write.mode("overwrite").format("parquet").saveAsTable("otto_json_refine_test.popups_ua1")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.