Skip to content

Instantly share code, notes, and snippets.

@tom-clickhouse
Created October 17, 2023 19:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tom-clickhouse/808dd03d7bc4cf061bf98b7bbb77c3a6 to your computer and use it in GitHub Desktop.
Save tom-clickhouse/808dd03d7bc4cf061bf98b7bbb77c3a6 to your computer and use it in GitHub Desktop.
INSERT INTO pypi
SELECT
timestamp,
country_code,
url,
project,
(ifNull(file.filename, ''), ifNull(file.project, ''), ifNull(file.version, ''), ifNull(file.type, '')) AS file,
(ifNull(installer.name, ''), ifNull(installer.version, '')) AS installer,
python AS python,
(ifNull(implementation.name, ''), ifNull(implementation.version, '')) AS implementation,
(ifNull(distro.name, ''), ifNull(distro.version, ''), ifNull(distro.id, ''), (ifNull(distro.libc.lib, ''), ifNull(distro.libc.version, ''))) AS distro,
(ifNull(system.name, ''), ifNull(system.release, '')) AS system,
cpu AS cpu,
openssl_version AS openssl_version,
setuptools_version AS setuptools_version,
rustc_version AS rustc_version,
tls_protocol,
tls_cipher
FROM s3Cluster(
'default',
<URL_WITH_GLOB>,
'Parquet',
'timestamp DateTime64(6), country_code LowCardinality(String), url String, project String, `file.filename` String, `file.project` String, `file.version` String, `file.type` String, `installer.name` String, `installer.version` String, python String, `implementation.name` String, `implementation.version` String, `distro.name` String, `distro.version` String, `distro.id` String, `distro.libc.lib` String, `distro.libc.version` String, `system.name` String, `system.release` String, cpu String, openssl_version String, setuptools_version String, rustc_version String,tls_protocol String, tls_cipher String')
SETTINGS
input_format_null_as_default = 1,
input_format_parquet_import_nested = 1,
parallel_distributed_insert_select = 2,
min_insert_block_size_bytes = 0, -- disable byte threshold
min_insert_block_size_rows = <ROWS_PER_BLOCK>,
max_insert_threads = <INSERT_THREADS>;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment