prodeezy/Test_Iceberg_File_Scan.txt

## Test_Iceberg_File_Scan.txt
# Test JSON files
bash-3.2$ cat people.json
{"name":"Michael"}
{"name":"Andy", "age":30, "friends": {"Josh": 10, "Biswa": 25} }
{"name":"Justin", "age":19, "friends": {"Kannan": 75, "Sanjay": 100} }

bash-3.2$ cat people2.json
{"name":"Biswa", "age":75, "friends": {"Kannan": 90, "Josh": 10} }
{"name":"Kannan", "age":90, "friends": {"Michael": 10, "Justin": 19} }


spark-shell

import org.apache.spark.sql.types._ ;
import org.apache.iceberg.hadoop.HadoopTables;
import org.apache.iceberg.Schema;
import org.apache.iceberg.spark.SparkSchemaUtil
import org.apache.iceberg.TableScan
import scala.collection.JavaConverters._
import com.netflix.iceberg.expressions._


val schema = new StructType().add("age", IntegerType).add("name", StringType).add("friends", MapType(StringType, IntegerType))
val iceSchema = SparkSchemaUtil.convert(schema)

val tables = new HadoopTables()

val iceTable = tables.create(iceSchema, "iceberg-people")
// val iceTable = tables.load("iceberg-people")


// read 2 json files
val json = spark.read.schema(schema).json("people.json")
val json2 = spark.read.schema(schema).json("people2.json")

// write both files to same iceberg table
json.write.format("iceberg").mode("append").save("iceberg-people")
json2.write.format("iceberg").mode("append").save("iceberg-people")


// Literal equals test
val ageExp = Expressions.equal("age", 30)
val ageScan = iceTable.newScan().filter(ageExp)

var fileCount = 0
ageScan.planFiles.asScala.foreach(fl => fileCount = fileCount + 1)

// SHOULD SCAN ONE FILE ONLY
scala> fileCount
res28: Int = 2


// Column isNull test
val isNullExp = Expressions.isNull("age")
val isNullScan = iceTable.newScan().filter(isNullExp)

var fileCount = 0

// SHOULD SCAN ONE FILE ONLY
isNullScan.planFiles.asScala.foreach(fl => fileCount = fileCount + 1)
fileCount
	# Test JSON files
	bash-3.2$ cat people.json
	{"name":"Michael"}
	{"name":"Andy", "age":30, "friends": {"Josh": 10, "Biswa": 25} }
	{"name":"Justin", "age":19, "friends": {"Kannan": 75, "Sanjay": 100} }

	bash-3.2$ cat people2.json
	{"name":"Biswa", "age":75, "friends": {"Kannan": 90, "Josh": 10} }
	{"name":"Kannan", "age":90, "friends": {"Michael": 10, "Justin": 19} }



	spark-shell

	import org.apache.spark.sql.types._ ;
	import org.apache.iceberg.hadoop.HadoopTables;
	import org.apache.iceberg.Schema;
	import org.apache.iceberg.spark.SparkSchemaUtil
	import org.apache.iceberg.TableScan
	import scala.collection.JavaConverters._
	import com.netflix.iceberg.expressions._


	val schema = new StructType().add("age", IntegerType).add("name", StringType).add("friends", MapType(StringType, IntegerType))
	val iceSchema = SparkSchemaUtil.convert(schema)

	val tables = new HadoopTables()

	val iceTable = tables.create(iceSchema, "iceberg-people")
	// val iceTable = tables.load("iceberg-people")


	// read 2 json files
	val json = spark.read.schema(schema).json("people.json")
	val json2 = spark.read.schema(schema).json("people2.json")

	// write both files to same iceberg table
	json.write.format("iceberg").mode("append").save("iceberg-people")
	json2.write.format("iceberg").mode("append").save("iceberg-people")



	// Literal equals test
	val ageExp = Expressions.equal("age", 30)
	val ageScan = iceTable.newScan().filter(ageExp)

	var fileCount = 0
	ageScan.planFiles.asScala.foreach(fl => fileCount = fileCount + 1)

	// SHOULD SCAN ONE FILE ONLY
	scala> fileCount
	res28: Int = 2



	// Column isNull test
	val isNullExp = Expressions.isNull("age")
	val isNullScan = iceTable.newScan().filter(isNullExp)

	var fileCount = 0

	// SHOULD SCAN ONE FILE ONLY
	isNullScan.planFiles.asScala.foreach(fl => fileCount = fileCount + 1)
	fileCount