Skip to content

Instantly share code, notes, and snippets.

@Ugbot
Last active June 19, 2024 11:58
Show Gist options
  • Save Ugbot/c84b5fff7d8fd31d5063e400e9ee7dd2 to your computer and use it in GitHub Desktop.
Save Ugbot/c84b5fff7d8fd31d5063e400e9ee7dd2 to your computer and use it in GitHub Desktop.
-- Step 1: Set up environment to access the Paimon catalog
SET 'execution.runtime-mode' = 'batch';
SET 'table.dynamic-table-options.enabled' = 'true';
-- Step 2: Define the source table to read LAION Parquet files
CREATE TEMPORARY TABLE source_laion (
id STRING,
url STRING,
text STRING,
height INT,
width INT,
md5hash STRING,
punsafe DOUBLE,
pwatermark DOUBLE,
aesthetic DOUBLE
) WITH (
'connector' = 'filesystem',
'path' = 's3a://your_bucket_name/path/to/laion.parquet',
'format' = 'parquet'
);
-- Step 3: Create a Paimon catalog and use it
CREATE CATALOG my_paimon_catalog WITH (
'type' = 'paimon',
'warehouse' = 'file:///path/to/your/paimon-warehouse'
);
USE CATALOG my_paimon_catalog;
-- Step 4: Create a database in Paimon
CREATE DATABASE IF NOT EXISTS my_database;
USE my_database;
-- Step 5: Create a target Paimon table
CREATE TABLE IF NOT EXISTS laion_table (
id STRING,
url STRING,
text STRING,
height INT,
width INT,
md5hash STRING,
punsafe DOUBLE,
pwatermark DOUBLE,
aesthetic DOUBLE
) WITH (
'connector' = 'paimon',
'path' = 'file:///path/to/your/paimon-warehouse/my_database/laion_table'
);
-- Step 6: Insert data from the source table into the Paimon table
INSERT INTO laion_table
SELECT *
FROM source_laion;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment