Priyansh121096/local_iceberg_table_using_trino.txt

## local_iceberg_table_using_trino.txt
Requirements: Linux, Trino
Topics:
 - Apache Iceberg (https://iceberg.apache.org/)
 - Trino (https://trino.io/)
 - MySQL (https://www.mysql.com/)
 - Hive metastore (https://hive.apache.org/)

# Install and configure mysql
# https://www.digitalocean.com/community/tutorials/how-to-install-mysql-on-ubuntu-20-04


# Create a mysql user for hive metastore and grant it access to "metastore"
# https://gridscale.io/en/community/tutorials/create-a-mysql-user/#:~:text=Creating%20a%20new%20user%20is,any%20database%20or%20table%20yet.


# Download hive-standalone-metastore
# Installation instructions: https://github.com/naushadh/hive-metastore/blob/main/Dockerfile (dont do the postgres part as we're using mysql instead)


# Configure hive-metastore to use mysql
# https://data-flair.training/blogs/configure-hive-metastore-to-mysql/


# Init schema
$HIVE_HOME/bin/schematool -dbType mysql -initSchema


# Create a warehouse dir (this is where your iceberg table's data/metadata will be stored).
# In prod, you'd use an s3 bucket but for local dev, a local directory is fine.
mkdir $HOME/iceberg_warehouse


# Start metastore
$HIVE_HOME/bin/start-metastore


# Bring up Trino with an iceberg catalog
# Trino building and running locally: https://github.com/trinodb/trino/
# Connector config: https://trino.io/docs/current/connector/iceberg.html#general-configuration


# Connect to trino via cli client
$TRINO_HOME/client/trino-cli/target/trino-cli-*-executable.jar


# Create a schema in the iceberg catalog
trino -> CREATE SCHEMA iceberg.foo
      -> WITH (location = 'file:$HOME/iceberg_warehouse');
trino -> USE iceberg.foo


# Create an Iceberg table
trino> CREATE TABLE iceberg.foo.bar (
    ->     c1 INTEGER,
    ->     c2 DATE,
    ->     c3 DOUBLE
    -> )
    -> WITH (
    ->     format = 'PARQUET'
    -> );


# Insert sample data
trino:foo> INSERT INTO bar VALUES (1, Date('2020-07-31'), 5.6);

# The data and metadata directories for your table should've been created in $HOME/iceberg_warehouse
# Have fun :)
	Requirements: Linux, Trino
	Topics:
	- Apache Iceberg (https://iceberg.apache.org/)
	- Trino (https://trino.io/)
	- MySQL (https://www.mysql.com/)
	- Hive metastore (https://hive.apache.org/)

	# Install and configure mysql
	# https://www.digitalocean.com/community/tutorials/how-to-install-mysql-on-ubuntu-20-04


	# Create a mysql user for hive metastore and grant it access to "metastore"
	# https://gridscale.io/en/community/tutorials/create-a-mysql-user/#:~:text=Creating%20a%20new%20user%20is,any%20database%20or%20table%20yet.


	# Download hive-standalone-metastore
	# Installation instructions: https://github.com/naushadh/hive-metastore/blob/main/Dockerfile (dont do the postgres part as we're using mysql instead)


	# Configure hive-metastore to use mysql
	# https://data-flair.training/blogs/configure-hive-metastore-to-mysql/


	# Init schema
	$HIVE_HOME/bin/schematool -dbType mysql -initSchema


	# Create a warehouse dir (this is where your iceberg table's data/metadata will be stored).
	# In prod, you'd use an s3 bucket but for local dev, a local directory is fine.
	mkdir $HOME/iceberg_warehouse


	# Start metastore
	$HIVE_HOME/bin/start-metastore


	# Bring up Trino with an iceberg catalog
	# Trino building and running locally: https://github.com/trinodb/trino/
	# Connector config: https://trino.io/docs/current/connector/iceberg.html#general-configuration


	# Connect to trino via cli client
	$TRINO_HOME/client/trino-cli/target/trino-cli-*-executable.jar


	# Create a schema in the iceberg catalog
	trino -> CREATE SCHEMA iceberg.foo
	-> WITH (location = 'file:$HOME/iceberg_warehouse');
	trino -> USE iceberg.foo


	# Create an Iceberg table
	trino> CREATE TABLE iceberg.foo.bar (
	-> c1 INTEGER,
	-> c2 DATE,
	-> c3 DOUBLE
	-> )
	-> WITH (
	-> format = 'PARQUET'
	-> );


	# Insert sample data
	trino:foo> INSERT INTO bar VALUES (1, Date('2020-07-31'), 5.6);

	# The data and metadata directories for your table should've been created in $HOME/iceberg_warehouse
	# Have fun :)