t04glovern/lets-try-pyiceberg.py

## lets-try-pyiceberg.py
#!/usr/bin/env python3
#
# -- Run other script to create the Iceberg table
#
# pip install boto3
# curl https://gist.githubusercontent.com/t04glovern/04f6f2934353eb1d0fffd487e9b9b6a3/raw \
#    > lets-try-iceberg.py \
#    && chmod +x lets-try-iceberg.py
# ./lets-try-iceberg.py --table lets_try_iceberg
#
# -- Run this script to connect to the Iceberg table using PyIceberg
#
# pip install pyarrow pyiceberg
# ./lets-try-pyiceberg.py --bucket <bucket-name>

import argparse
import logging
import os

from pyiceberg.catalog import load_catalog


logging.basicConfig(level=logging.INFO)

aws_region: str = "us-west-2"
# Required for pyarrow to work with S3
os.environ['AWS_DEFAULT_REGION'] = aws_region


def main(bucket_name: str):
    # Configure the catalog
    catalog = load_catalog(
        'default',
        **{
            'type': 'glue',
            'uri': 's3://{bucket_name}'.format(bucket_name=bucket_name),
            'region_name': aws_region
        }
    )

    # List the namespaces
    namespaces = catalog.list_namespaces()
    logging.info(f'Namespaces: {namespaces}')

    # List the tables (assumes the default namespace)
    tables = catalog.list_tables('default')
    logging.info(f'Tables: {tables}')

    # Load a table
    table = catalog.load_table(('default', 'lets_try_iceberg'))

    # Scan the table
    scan = table.scan(
        selected_fields=('id', 'timestamp', 'speed', 'temperature', 'location'),
        limit=10
    ).to_arrow()

    # Print the results
    logging.info(f'Scan: {scan}')


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description="PyIceberg sample script")
    parser.add_argument(
        "--bucket",
        type=str,
        help="The S3 bucket name where the existing Iceberg table is stored.",
        required=True,
    )
    args = parser.parse_args()
    bucket_name = args.bucket

    main(bucket_name)
	#!/usr/bin/env python3
	#
	# -- Run other script to create the Iceberg table
	#
	# pip install boto3
	# curl https://gist.githubusercontent.com/t04glovern/04f6f2934353eb1d0fffd487e9b9b6a3/raw \
	# > lets-try-iceberg.py \
	# && chmod +x lets-try-iceberg.py
	# ./lets-try-iceberg.py --table lets_try_iceberg
	#
	# -- Run this script to connect to the Iceberg table using PyIceberg
	#
	# pip install pyarrow pyiceberg
	# ./lets-try-pyiceberg.py --bucket <bucket-name>

	import argparse
	import logging
	import os

	from pyiceberg.catalog import load_catalog


	logging.basicConfig(level=logging.INFO)

	aws_region: str = "us-west-2"
	# Required for pyarrow to work with S3
	os.environ['AWS_DEFAULT_REGION'] = aws_region


	def main(bucket_name: str):
	# Configure the catalog
	catalog = load_catalog(
	'default',
	**{
	'type': 'glue',
	'uri': 's3://{bucket_name}'.format(bucket_name=bucket_name),
	'region_name': aws_region
	}
	)

	# List the namespaces
	namespaces = catalog.list_namespaces()
	logging.info(f'Namespaces: {namespaces}')

	# List the tables (assumes the default namespace)
	tables = catalog.list_tables('default')
	logging.info(f'Tables: {tables}')

	# Load a table
	table = catalog.load_table(('default', 'lets_try_iceberg'))

	# Scan the table
	scan = table.scan(
	selected_fields=('id', 'timestamp', 'speed', 'temperature', 'location'),
	limit=10
	).to_arrow()

	# Print the results
	logging.info(f'Scan: {scan}')


	if __name__ == '__main__':
	parser = argparse.ArgumentParser(description="PyIceberg sample script")
	parser.add_argument(
	"--bucket",
	type=str,
	help="The S3 bucket name where the existing Iceberg table is stored.",
	required=True,
	)
	args = parser.parse_args()
	bucket_name = args.bucket

	main(bucket_name)