Skip to content

Instantly share code, notes, and snippets.

@prodeezy
Last active April 22, 2019 18:04
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save prodeezy/072dfbc69774652640e36b9ad5f17c68 to your computer and use it in GitHub Desktop.
Save prodeezy/072dfbc69774652640e36b9ad5f17c68 to your computer and use it in GitHub Desktop.
Test for Complex Predicate over Iceberg not returning rows
root@61b7c92f78a4:/usr/local/spark/test# cat people.json
{"name":"Michael"}
{"name":"Andy", "age":30, "friends": {"Josh": 10, "Biswa": 25} }
{"name":"Justin", "age":19, "friends": {"Kannan": 75, "Sanjay": 100} }
$SPARK_HOME/bin/spark-shell --jars ~/iceberg-runtime-ce457ce.jar
import org.apache.spark.sql.types._ ;
import org.apache.iceberg.hadoop.HadoopTables;
import org.apache.iceberg.Schema;
import org.apache.iceberg.spark.SparkSchemaUtil
// Complex type schema
val schema = new StructType().add("age", IntegerType).add("name", StringType).add("friends", MapType(StringType, IntegerType))
// Read json
val json = spark.read.schema(schema).json("people.json")
json.show
json.printSchema
// Iceberg Dataset
val tables = new HadoopTables()
val iceSchema = SparkSchemaUtil.convert(json.schema)
val iceTable = tables.create(iceSchema, "iceberg-people-complex")
iceTable.schema
json.write.format("iceberg").mode("append").save("iceberg-people-complex")
// Create Iceberg sql table with DF
val iceDf = spark.read.format("iceberg").load("iceberg-people-complex")
iceDf.createOrReplaceTempView("people_iceberg_complex")
spark.sql("select * from people_iceberg_complex where age=30").show()
// Filter on Map field with Iceberg - THIS DOES NOT RETURN ROWS
spark.sql("select * from people_iceberg_complex where friends['Josh'] == 10").show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment