Skip to content

Instantly share code, notes, and snippets.

@asarama
Created December 6, 2024 21:25
Show Gist options
  • Select an option

  • Save asarama/fcddfa9ec8f96045fb3e47bda59e6a14 to your computer and use it in GitHub Desktop.

Select an option

Save asarama/fcddfa9ec8f96045fb3e47bda59e6a14 to your computer and use it in GitHub Desktop.
pyIceberg and AWS Glue
from pyiceberg.catalog import load_catalog
from pyiceberg.schema import Schema, NestedField
from pyiceberg.types import (
StringType,
LongType,
TimestampType
)
# Create the catalog connection
glue_catalog = load_catalog(
'default',
**{
'client.access-key-id': '********', # Update these parameters
'client.secret-access-key': '********',
'client.region': 'us-east-1'
},
type='glue'
)
# Create a new Glue database
glue_catalog.create_namespace("test_database")
# Setup a table schema
schema = Schema(
NestedField(1, "id", LongType(), required=True),
NestedField(2, "name", StringType()),
NestedField(3, "created_at", TimestampType())
)
table_location = "s3://test_bucket/table_1"
# Create the table
table = glue_catalog.create_table(
identifier=("test_database", "table_1"),
schema=schema,
location=table_location
)
# Get table metadata
print(f"Table name: {table.name}")
print(f"Table location: {table.location()}")
print(f"Table schema: {table.schema()}")
# Run a duckdb query
connection = table.scan().to_duckdb(table_name="table_1_name")
df = connection.execute("SELECT * FROM table_1_name").arrow()
print(f"Query results: {df}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment