Skip to content

Instantly share code, notes, and snippets.

@karpanGit
Last active April 30, 2022 12:06
Show Gist options
  • Save karpanGit/a8c8b1ff8ed08ae6d103557749ca6c0a to your computer and use it in GitHub Desktop.
Save karpanGit/a8c8b1ff8ed08ae6d103557749ca6c0a to your computer and use it in GitHub Desktop.
pyspark, create map from arrays
# experiment with map
# experiment 1
df = [[1,2], [3,4]]
df = spark.createDataFrame(df, schema=['a', 'b'])
res = df.select(f.array([f.lit(col) for col in df.columns]).alias('names'), f.array(f.col('a'), f.col('b')).alias('values'))
res.printSchema()
res = res.select(f.map_from_arrays(f.col('names'), f.col('values')).alias('mapped'))
res.printSchema()
# |-- mapped: map (nullable = false)
# | |-- key: string
# | |-- value: long (valueContainsNull = true)
res.show()
# | mapped|
# +----------------+
# |{a -> 1, b -> 2}|
# |{a -> 3, b -> 4}|
# +----------------+
# experiment 2
df = [['mplah',2], ['3',None]]
df = spark.createDataFrame(df, schema=['a', 'b'])
res = df.select(f.create_map(f.col('a'), f.col('b')).alias('mapped'))
res.printSchema()
# root
# |-- mapped: map (nullable = false)
# | |-- key: string
# | |-- value: long (valueContainsNull = true)
res.show()
# | mapped|
# +------------+
# |{mplah -> 2}|
# | {3 -> null}|
# +------------+
# selecting values
res.select(f.col('mapped')['mplah']).show()
res.select(f.col('mapped.mplah')).show()
res.select(res['mapped']['mplah']).show()
res.select(res.mapped['mplah']).show()
res.select(res.mapped.mplah).show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment