Skip to content

Instantly share code, notes, and snippets.

@den-crane
Created March 14, 2019 15:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save den-crane/f601571a970a0ae2833b294cf3ce46ab to your computer and use it in GitHub Desktop.
Save den-crane/f601571a970a0ae2833b294cf3ce46ab to your computer and use it in GitHub Desktop.
sample 0.33 offset 0.33 example
CREATE TABLE fff ( tx Int64) ENGINE = MergeTree() ORDER BY (intHash32(tx)) SAMPLE BY intHash32(tx)
insert into fff select number from numbers(10);
select groupArray(tx) from fff sample 0.33 offset 0
[9,6,3,1]
select groupArray(tx) from fff sample 0.33 offset 0.33
[8,2,5]
select groupArray(tx) from fff sample 0.33 offset 0.66
[7,4,0]
--------
select number n, intHash32(n) x from numbers(10) order by x;
┌─n─┬──────────x─┐
│ 9 │ 1241149650 │
│ 6 │ 1295823179 │
│ 3 │ 1298551497 │
│ 1 │ 1343103100 │
│ 8 │ 1618865725 │
│ 2 │ 1996614413 │
│ 5 │ 2641603337 │
│ 7 │ 3844986530 │
│ 4 │ 3902320246 │
│ 0 │ 4249604106 │
└───┴────────────┘
insert into fff select number from numbers(10);
select groupArray(tx) from fff sample 0.33 offset 0
[9,6,3,1,9,6,3,1]
optimize table fff final;
select groupArray(tx) from fff sample 0.33 offset 0
[9,9,6,6,3,3,1,1]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment