Skip to content

Instantly share code, notes, and snippets.

@tom-clickhouse
Last active December 28, 2023 15:33
Show Gist options
  • Save tom-clickhouse/7596e3586b545506851635694ba4ad06 to your computer and use it in GitHub Desktop.
Save tom-clickhouse/7596e3586b545506851635694ba4ad06 to your computer and use it in GitHub Desktop.
Observe Parts
WITH
'default' AS db_name,
'upclick_metrics' AS table_name,
10 AS last_x_minutes,
4 AS last_m_parts_per_node_per_partition,
T_PART_LOG AS (
SELECT
hostName() AS node_name,
event_time,
part_name,
rows,
size_in_bytes AS bytes_on_disk,
partition_id,
part_type
FROM
clusterAllReplicas(default, system.part_log)
WHERE
database = db_name
AND table = table_name
AND event_time > now() - toIntervalMinute(last_x_minutes)
and event_type = 'NewPart' -- Replicas have event_type 'DownloadPart', therefore we only see the new parts here on the node that got the insert
),
T_PARTS AS (
SELECT
name,
partition AS opt_partition_name,
-- part_storage_type AS opt_part_storage_type,
data_compressed_bytes AS opt_data_compressed_bytes,
data_uncompressed_bytes AS opt_data_uncompressed_bytes
FROM
system.parts
WHERE
database = db_name
AND table = table_name
),
T_BOTH AS (
SELECT
node_name,
event_time,
part_name,
rows,
bytes_on_disk,
opt_data_compressed_bytes,
opt_data_uncompressed_bytes,
partition_id,
opt_partition_name,
part_type
-- ,opt_part_storage_type
FROM T_PART_LOG tpl LEFT OUTER JOIN T_PARTS tp ON tpl.part_name = tp.name
ORDER BY
event_time DESC
),
T1 AS (
SELECT
node_name,
partition_id,
any(opt_partition_name) AS opt_partition_name,
groupArray(last_m_parts_per_node_per_partition)(event_time) AS event_time_,
arrayPushBack(arrayMap(d -> formatReadableTimeDelta(d), arrayPopFront(arrayDifference(arrayMap(t -> -1 * toUnixTimestamp(t) ,event_time_)))), '-') AS diff_,
groupArray(last_m_parts_per_node_per_partition)(rows) AS rows_,
groupArray(last_m_parts_per_node_per_partition)(part_name) AS part_name_,
groupArray(last_m_parts_per_node_per_partition)(bytes_on_disk) AS bytes_on_disk_,
groupArray(last_m_parts_per_node_per_partition)(opt_data_compressed_bytes) AS opt_data_compressed_bytes_,
groupArray(last_m_parts_per_node_per_partition)(opt_data_uncompressed_bytes) AS opt_data_uncompressed_bytes_,
groupArray(last_m_parts_per_node_per_partition)(part_type) AS part_type_
-- ,groupArray(last_m_parts_per_node_per_partition)(opt_part_storage_type) AS opt_part_storage_type_
FROM T_BOTH
GROUP BY node_name, partition_id
),
T2 AS (
SELECT
DENSE_RANK() OVER(ORDER BY node_name) AS host_id,
DENSE_RANK() OVER(ORDER BY partition_id) AS partition_id,
--partition_id,
opt_partition_name,
formatDateTime(event_time_, '%T') AS event_time,
diff AS duration_since_prev_part,
rows,
bytes_on_disk,
opt_data_compressed_bytes,
opt_data_uncompressed_bytes,
part_type
-- ,opt_part_storage_type
FROM
T1 ARRAY JOIN
event_time_ AS event_time_,
diff_ AS diff,
rows_ AS rows,
bytes_on_disk_ AS bytes_on_disk,
opt_data_compressed_bytes_ AS opt_data_compressed_bytes,
opt_data_uncompressed_bytes_ AS opt_data_uncompressed_bytes,
part_type_ AS part_type
-- ,opt_part_storage_type_ AS opt_part_storage_type
ORDER BY
host_id ASC,
partition_id ASC,
event_time_ DESC
)
SELECT
host_id AS n,
replaceOne(opt_partition_name, 'tuple()', '') AS ptn,
event_time AS write,
replaceOne(duration_since_prev_part, 'seconds', 's') AS prev,
rows,
formatReadableSize(bytes_on_disk) AS on_disk
,formatReadableSize(opt_data_compressed_bytes) AS data_compressed
,formatReadableSize(opt_data_uncompressed_bytes) AS data_uncompressed
,part_type AS type
-- ,opt_part_storage_type AS storage
FROM T2
SETTINGS skip_unavailable_shards = 1;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment