Skip to content

Instantly share code, notes, and snippets.

val arr_distinct_df = df.withColumn("result", array_distinct($"array_col2"))
arr_distinct_df.show()
val arr_repeat_df = df.withColumn("result", array_repeat($"array_col2", 2))
arr_repeat_df.show(truncate = false)
val arr_sort_df = df.withColumn("result", array_sort($"array_col2"))
arr_sort_df.show()
val arr_union_df = full_df
.withColumn("result", array_union($"array_col1", $"array_col2"))
.drop("col1")
arr_union_df.show(truncate=false)
val arr_overlap_df = full_df
.withColumn("result", arrays_overlap($"array_col1", $"array_col2"))
arr_overlap_df.show()
// remove element "2" from array column "array_col2"
val temp_df = full_df
.withColumn("new_array_col", array_remove($"array_col2",2))
// zip column "array_col1" with newly created column "new_array_col"
val arr_zip_df = temp_df
.withColumn("result", arrays_zip($"array_col1", $"new_array_col"))
.select("array_col1", "new_array_col", "result")
arr_zip_df.show(truncate=false)
arr_zip_df.select("result").printSchema()
val arr_cat_df = full_df.withColumn("result", concat($"array_col1", $"array_col2"))
arr_cat_df.show(truncate=false)
val arr_element_at_df = df.withColumn("result", element_at($"array_col2", 1))
arr_element_at_df.show()
// Generate the nested array using the function "array_repeat".
val arr_repeat_df = df.withColumn("repeat", array_repeat($"array_col2", 2))
// flatten the nested array.
val arr_flat_df = arr_repeat_df
.withColumn("result", flatten($"repeat")).select("repeat", "result")
.select("repeat", "result")
arr_flat_df.show(truncate=false)