velotiotech/.py

## .py
spark.sql(""" CREATE TABLE IF NOT EXISTS demo.db.iceberg_data_2(id INT, first_name String,
last_name String, address String, pincocde INT, net_income INT, source_of_income String,
state String, email_id String, description String, population INT, population_1 String,
population_2 String, population_3 String, population_4 String, population_5 String, population_6 String,
population_7 String, date INT)
USING iceberg
TBLPROPERTIES ('format'='parquet', 'format-version' = '2')
PARTITIONED BY (`date`)
location 's3a://abhishek-test-01012023/iceberg_v2/db/iceberg_data_2'""")

# Read the data that need to be written
# Reading the data from delta tables in spark Dataframe

df = spark.read.parquet("s3a://abhishek-test-01012023/delta-lake-sample-data/")

logging.info("Starting writing the data")

df.sortWithinPartitions("date").writeTo("demo.db.iceberg_data").partitionedBy("date").createOrReplace()

logging.info("Writing has been finished")

logging.info("Query the data from iceberg using spark SQL")

spark.sql("describe table demo.db.iceberg_data").show()
spark.sql("Select * from demo.db.iceberg_data limit 10").show()
	spark.sql(""" CREATE TABLE IF NOT EXISTS demo.db.iceberg_data_2(id INT, first_name String,
	last_name String, address String, pincocde INT, net_income INT, source_of_income String,
	state String, email_id String, description String, population INT, population_1 String,
	population_2 String, population_3 String, population_4 String, population_5 String, population_6 String,
	population_7 String, date INT)
	USING iceberg
	TBLPROPERTIES ('format'='parquet', 'format-version' = '2')
	PARTITIONED BY (`date`)
	location 's3a://abhishek-test-01012023/iceberg_v2/db/iceberg_data_2'""")

	# Read the data that need to be written
	# Reading the data from delta tables in spark Dataframe

	df = spark.read.parquet("s3a://abhishek-test-01012023/delta-lake-sample-data/")

	logging.info("Starting writing the data")

	df.sortWithinPartitions("date").writeTo("demo.db.iceberg_data").partitionedBy("date").createOrReplace()

	logging.info("Writing has been finished")

	logging.info("Query the data from iceberg using spark SQL")

	spark.sql("describe table demo.db.iceberg_data").show()
	spark.sql("Select * from demo.db.iceberg_data limit 10").show()