Skip to content

Instantly share code, notes, and snippets.

@gtors
Last active October 28, 2022 15:53
Show Gist options
  • Save gtors/5682ea2d1d0939de4c34 to your computer and use it in GitHub Desktop.
Save gtors/5682ea2d1d0939de4c34 to your computer and use it in GitHub Desktop.
Tips
widget.content.gtk-theme-override : Adwaita

ClickHouse Show partitions for database

SELECT DISTINCT 
    table, 
    partition
FROM system.parts
WHERE database = 'db'

Show partitions on cluster

SELECT DISTINCT 
    hostName(), 
    database,
    table, 
    partition
FROM cluster('cluster', 'system', 'parts')

Show partition sizes

SELECT 
    database,
    table,
    partition,
    formatReadableSize(sum(bytes)) AS size
FROM system.parts
GROUP BY 
    database,
    table,
    partition
ORDER BY sum(bytes) DESC
LIMIT 10

Show partition sizes on cluster

SELECT 
    hostName(),
    database,
    table,
    partition,
    formatReadableSize(sum(bytes)) AS size
FROM remote('ch-{1..100}.example.net', 'system.parts')
WHERE database = 'db' AND table = 'table_sharded'
GROUP BY 
    hostName(),
    database,
    table,
    partition
ORDER BY sum(bytes) DESC

Show column sizes

SELECT 
    column, 
    formatReadableSize(size)
FROM 
(
    SELECT 
        column, 
        sum(column_bytes_on_disk) AS size
    FROM system.parts_columns
    WHERE (database = 'db') AND (table = 'table_sharded') AND (partition = '202010')
    GROUP BY column
    ORDER BY size ASC
)

Show max_part_count_for_partition metric

SELECT 
    hostName(),
    *
FROM remote('ch-{1..100}.example.net', 'system.asynchronous_metrics')
WHERE metric = 'MaxPartCountForPartition'
ORDER BY hostName() ASC

Merges in tables with ETA

SELECT
    hostName(),
    database,
    table,
    round(elapsed, 0) AS time,
    round(progress, 4) AS percent,
    formatReadableTimeDelta((elapsed / progress) - elapsed) AS ETA,
    num_parts,
    result_part_name
FROM clusterAllReplicas('mycluster', 'system.merges')
ORDER BY (elapsed / percent) - elapsed ASC

Lookup executing query by substring

SELECT substring(query, position(query, 'interesting query part'), 20)
FROM system.processes
WHERE (query LIKE '%INSERT%') AND (user = 'production')

Drop many custom partitions

SELECT partition
FROM system.parts
WHERE database='{db:String}'
  AND table='{table:String}'
GROUP BY partition
ORDER BY partition INTO outfile '{table:String}.tsv' 
FORMAT TSVRaw

clickhouse-client --param_db='db' --param_table='t'

FORMAT TSVRaw prevents escaping because drop partition query doesn’t need it.

Edit file and leave only partitions to drop.

Drop via bash loop

$ for p in $(cat table.tsv);do echo "$p"; clickhouse-client -h ch-1.example.net --query="alter table db.t on cluster c drop partition $p" ;done

Tail logs

Tail useful part of logs. Excludes stack traces, empty lines, version string. This will not work with trace level logs.

tail -F /var/log/clickhouse-server/clickhouse-server.log | grep -vP '\d\d?\. ' | grep -vP '^ *$' | grep -v 'version '

Distinct errors from log

grep '<Error>' clickhouse-server.log | cut -d' ' -f 7- | sort | uniq -c > errors.uniq.log

Distinct query errors from system.text_log

SELECT
    replaceAll(replaceRegexpAll(replaceRegexpOne(message, '\\(from \\[.*\\)', ''), '\n\\d+\\.[^\n].*', ''), '\n', '') AS m,
    count() AS freq,
    any(query_id) as query_id
FROM remote('ch-{1..100}.example.net', 'system.text_log')
WHERE (level = 'Error') AND (logger_name = 'executeQuery') AND (event_date >= '2021-08-31') AND (event_time >= '2021-08-31 14:45:00') AND (message NOT LIKE '%Received from%')
GROUP BY m
ORDER BY freq DESC
# Current branch name
git rev-parse --abbrev-ref HEAD
# First commit in branch
git master..yourbranch --oneline | tail -1 | cut -d " " -f1
# Last commit in branch
git master..yourbranch --oneline | head -1 | cud -d " " -f1
# Create and checkout to new branch
git checkout -b branchname
# Create new branch from another branch
git checkout -b branchname anotherbranch
# Checkout single file from another branch
git checkout branch -- /path/to/file
# Blame at the lines 100 to 100+11
git blame -L100,+11 -- filename
# Show evolution of lines 100 to 100+11
git log --pretty=short -u -L100,+11:filename
# Diff for directory since specific date
git diff HEAD 'HEAD@{2016-01-30}' path/to/dir
gid diff HEAD 'HEAD@{5 days ago}' path/to/dir
# Diff only conflicted changes
git diff --diff-filter=U
# Commits since specific date
git log --name-status --format=oneline --since="2016-01-30" path/to/dir
# Ignore changes in tracked file
git update-index --assume-unchanged path/to/file
# Revert actions above
git update-index --no-assume-unchanged path/to/file
# Diff between two branches
git log master^..dev --no-merges --pretty='%s' | sort -u
  • A repository contains one or more Go modules (most often exactly one module in the repository root).
  • Each module contains one or more Go packages.
  • Each package consists of one or more Go source files that all reside in a single directory.
  • Go source code:
    • declares its own package with a package foo statement.
    • automatically has access to other Go source code in the same package.
    • imports code from another package via an import path supplied in an import statement such as import "github.com/my/repo/pkg1". The import path always starts with the module path of that package, regardless of whether that package is in the same module or a different module.
-------------------------------------------------------------------------------
-- Range generation
-- generate_series(start, end, step)
-------------------------------------------------------------------------------
select generate_series(1, 10, 2);
-------------------------------------------------------------------------------
-- Create rows from custom values
-------------------------------------------------------------------------------
select * from (
values (1), (2), (3)
) as tbl(fld)
-------------------------------------------------------------------------------
-- Select table columns as rows
-------------------------------------------------------------------------------
select tablename from tablename;
-------------------------------------------------------------------------------
-- Unwrap array to rows
-------------------------------------------------------------------------------
select unnest(array[1,2,3]), 'shared field');
-------------------------------------------------------------------------------
-- most frequently occurring value
-------------------------------------------------------------------------------
select mode() within group (order by customer_id) from orders;
-------------------------------------------------------------------------------
-- Generates column definition list for specific table
--
-- May be usefull for json_to_record
-------------------------------------------------------------------------------
WITH
typed_columns AS (
SELECT
(col.column_name || ' ' || col.data_type) as c
FROM
information_schema.columns as col
WHERE
col.table_schema = 'public' AND
col.table_name = 'purchase_receipt'
ORDER BY
col.ordinal_position
),
joined_columns AS (
SELECT string_agg(c, ', ') as c FROM typed_columns
)
SELECT 'VALUES(' || c || ')' FROM joined_columns;
-------------------------------------------------------------------------------
-- Insert into table from a record variable
-------------------------------------------------------------------------------
DO
$$
DECLARE
user user_table;
BEGIN
user.id = nextval('seq_user');
user.name = 'name';
user.email = 'name@example.org';
INSERT INTO user_table VALUES(user.*);
END
$$
-------------------------------------------------------------------------------
-- Show statistics about seq scan and index scan
-------------------------------------------------------------------------------
SELECT
relname AS TableName,
to_char(seq_scan, '999,999,999,999') AS TotalSeqScan,
to_char(idx_scan, '999,999,999,999') AS TotalIndexScan,
to_char(n_live_tup, '999,999,999,999') AS TableRows,
pg_size_pretty(pg_relation_size(relname :: regclass)) AS TableSize
FROM pg_stat_all_tables
WHERE schemaname = 'public'
AND 50 * seq_scan > idx_scan -- more then 2%
AND n_live_tup > 10000
AND pg_relation_size(relname :: regclass) > 5000000
ORDER BY relname ASC;
-------------------------------------------------------------------------------
-- Show tables where we can add an index to increase performance
-------------------------------------------------------------------------------
SELECT
relname,
seq_scan - idx_scan AS too_much_seq,
CASE
WHEN
seq_scan - coalesce(idx_scan, 0) > 0
THEN
'Missing Index?'
ELSE
'OK'
END,
pg_relation_size(relname::regclass) AS rel_size, seq_scan, idx_scan
FROM
pg_stat_all_tables
WHERE
schemaname = 'public'
AND pg_relation_size(relname::regclass) > 80000
ORDER BY
too_much_seq DESC;
-------------------------------------------------------------------------------
-- Show unused indexes
-------------------------------------------------------------------------------
SELECT
indexrelid::regclass as index,
relid::regclass as table,
'DROP INDEX ' || indexrelid::regclass || ';' as drop_statement
FROM
pg_stat_user_indexes
JOIN
pg_index USING (indexrelid)
WHERE
idx_scan = 0
AND indisunique is false;
-------------------------------------------------------------------------------
-- GIS: Select random points inside polygon
-------------------------------------------------------------------------------
SELECT
geom AS point
FROM
ST_Dump(
ST_SetSRID(
ST_GeneratePoints(
ST_GeomFromText(
'POLYGON((48.9990234375 55.80784138701898,49.0924072265625 55.73522939875256,49.163818359375 55.71628170645908,49.21600341796874 55.727883425773065,49.2572021484375 55.76266790754882,49.21875 55.81054241340218,49.18373107910156 55.8367712028016,49.09446716308594 55.88224391055777,49.03953552246094 55.88070333425207,49.010009765625 55.84563870340299,48.9990234375 55.80784138701898))'
),
10
),
4326
)
)
-------------------------------------------------------------------------------
-- Drop all FK in all tables
-------------------------------------------------------------------------------
do
$$
declare t record;
declare r record;
begin
set search_path = 'deleted';
for t in (select tablename from pg_tables where schemaname = 'deleted') loop
for r in (select constraint_name from information_schema.table_constraints where table_schema = 'deleted' and table_name=t.tablename and constraint_type = 'FOREIGN KEY') loop
raise info 'dropping fk % > %', t.tablename, r.constraint_name;
execute CONCAT('ALTER TABLE "deleted".' || t.tablename ||' DROP CONSTRAINT "'||r.constraint_name||'" CASCADE');
end loop;
end loop;
end;
$$

Indexes: B-Tree - For most datatypes and queries GIN - For JSONB/hstore/arrays GiST - For full text search and geospatial datatypes SP-GiST - For larger datasets with natural but uneven clustering BRIN - For really large datasets that line up sequentially Hash - For equality operations, and generally B-Tree still what you want here

# Custom color for folded line
hi Folded ctermbg=7
# Fold rules for JS
setlocal foldmethod=marker foldlevel=1 foldmarker={,}
# copy matches to A buffer
:%s/regex/\=setreg('A', submatch(0))/n
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment