Last active
June 19, 2024 11:58
-
-
Save Ugbot/c84b5fff7d8fd31d5063e400e9ee7dd2 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Step 1: Set up environment to access the Paimon catalog | |
SET 'execution.runtime-mode' = 'batch'; | |
SET 'table.dynamic-table-options.enabled' = 'true'; | |
-- Step 2: Define the source table to read LAION Parquet files | |
CREATE TEMPORARY TABLE source_laion ( | |
id STRING, | |
url STRING, | |
text STRING, | |
height INT, | |
width INT, | |
md5hash STRING, | |
punsafe DOUBLE, | |
pwatermark DOUBLE, | |
aesthetic DOUBLE | |
) WITH ( | |
'connector' = 'filesystem', | |
'path' = 's3a://your_bucket_name/path/to/laion.parquet', | |
'format' = 'parquet' | |
); | |
-- Step 3: Create a Paimon catalog and use it | |
CREATE CATALOG my_paimon_catalog WITH ( | |
'type' = 'paimon', | |
'warehouse' = 'file:///path/to/your/paimon-warehouse' | |
); | |
USE CATALOG my_paimon_catalog; | |
-- Step 4: Create a database in Paimon | |
CREATE DATABASE IF NOT EXISTS my_database; | |
USE my_database; | |
-- Step 5: Create a target Paimon table | |
CREATE TABLE IF NOT EXISTS laion_table ( | |
id STRING, | |
url STRING, | |
text STRING, | |
height INT, | |
width INT, | |
md5hash STRING, | |
punsafe DOUBLE, | |
pwatermark DOUBLE, | |
aesthetic DOUBLE | |
) WITH ( | |
'connector' = 'paimon', | |
'path' = 'file:///path/to/your/paimon-warehouse/my_database/laion_table' | |
); | |
-- Step 6: Insert data from the source table into the Paimon table | |
INSERT INTO laion_table | |
SELECT * | |
FROM source_laion; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Step 1: Define the source table | |
CREATE TABLE source_laion ( | |
id STRING, | |
url STRING, | |
text STRING, | |
height INT, | |
width INT, | |
md5hash STRING, | |
punsafe DOUBLE, | |
pwatermark DOUBLE, | |
aesthetic DOUBLE | |
) WITH ( | |
'connector' = 'filesystem', | |
'path' = 's3a://your_bucket_name/path/to/laion.parquet', | |
'format' = 'parquet' | |
); | |
-- Step 2: Define the target table in the Paimon catalog | |
CREATE CATALOG my_paimon_catalog WITH ( | |
'type' = 'paimon', | |
'warehouse' = 'file:///path/to/your/paimon-warehouse' | |
); | |
USE CATALOG my_paimon_catalog; | |
CREATE DATABASE IF NOT EXISTS my_database; | |
USE my_database; | |
CREATE TABLE IF NOT EXISTS laion_table ( | |
id STRING, | |
url STRING, | |
text STRING, | |
height INT, | |
width INT, | |
md5hash STRING, | |
punsafe DOUBLE, | |
pwatermark DOUBLE, | |
aesthetic DOUBLE | |
) WITH ( | |
'connector' = 'paimon', | |
'path' = 'file:///path/to/your/paimon-warehouse/my_database/laion_table' | |
); | |
-- Step 3: Insert data into the target table | |
INSERT INTO laion_table | |
SELECT * FROM default_catalog.default_database.source_laion; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment