Skip to content

Instantly share code, notes, and snippets.

#!/usr/bin/env python3
"""
PyIceberg Table Maintenance Script
Performs cleanup operations on Iceberg tables without requiring Hive Metastore registration
"""
import sys
import logging
from datetime import datetime, timedelta
from pyiceberg.catalog.sql import SqlCatalog
import pyiceberg
spark = SparkSession.builder \
.config("spark.sql.extensions", "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions") \
.config("spark.sql.catalog.hadoop_cat", "org.apache.iceberg.spark.SparkCatalog") \
.config("spark.sql.catalog.hadoop_cat.type", "hadoop") \
.config("spark.sql.catalog.hadoop_cat.warehouse", "/tmp/iceberg-warehouse") \
.getOrCreate()
spark.sql("""
CALL hadoop_cat.system.expire_snapshots(
table => 'gs://bucket/warehouse/db/table',