Created
December 6, 2024 21:25
-
-
Save asarama/fcddfa9ec8f96045fb3e47bda59e6a14 to your computer and use it in GitHub Desktop.
pyIceberg and AWS Glue
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from pyiceberg.catalog import load_catalog | |
| from pyiceberg.schema import Schema, NestedField | |
| from pyiceberg.types import ( | |
| StringType, | |
| LongType, | |
| TimestampType | |
| ) | |
| # Create the catalog connection | |
| glue_catalog = load_catalog( | |
| 'default', | |
| **{ | |
| 'client.access-key-id': '********', # Update these parameters | |
| 'client.secret-access-key': '********', | |
| 'client.region': 'us-east-1' | |
| }, | |
| type='glue' | |
| ) | |
| # Create a new Glue database | |
| glue_catalog.create_namespace("test_database") | |
| # Setup a table schema | |
| schema = Schema( | |
| NestedField(1, "id", LongType(), required=True), | |
| NestedField(2, "name", StringType()), | |
| NestedField(3, "created_at", TimestampType()) | |
| ) | |
| table_location = "s3://test_bucket/table_1" | |
| # Create the table | |
| table = glue_catalog.create_table( | |
| identifier=("test_database", "table_1"), | |
| schema=schema, | |
| location=table_location | |
| ) | |
| # Get table metadata | |
| print(f"Table name: {table.name}") | |
| print(f"Table location: {table.location()}") | |
| print(f"Table schema: {table.schema()}") | |
| # Run a duckdb query | |
| connection = table.scan().to_duckdb(table_name="table_1_name") | |
| df = connection.execute("SELECT * FROM table_1_name").arrow() | |
| print(f"Query results: {df}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment