Skip to content

Instantly share code, notes, and snippets.

@den-crane
Last active September 8, 2020 07:28
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save den-crane/fcc95468f37e40f9ff2a22cf3e250c0f to your computer and use it in GitHub Desktop.
Save den-crane/fcc95468f37e40f9ff2a22cf3e250c0f to your computer and use it in GitHub Desktop.
CH argMin vs Limit
create table test (p Int64, t Int64, x String, y Int64) engine=MergeTree order by (p,t)
insert into test select intDiv(number,10000), rand() r, toString(r), number from numbers(10000000);
select count(), min(x) from (
select * from test order by p, t limit 1 by p
)
┌─count()─┬─min(x)─┐
│ 1000 │ 100420 │
└─────────┴────────┘
1 rows in set. Elapsed: 2.269 sec. Processed 10.00 million rows, 347.41 MB (4.41 million rows/s., 153.12 MB/s.)
select count(), min(x) from (
select p, V.1 t, V.2 x, V.3 y from (
select p, argMin((t,x,y),t) V from test
group by p)
)
┌─count()─┬─min(x)─┐
│ 1000 │ 100420 │
└─────────┴────────┘
1 rows in set. Elapsed: 0.136 sec. Processed 10.00 million rows, 427.41 MB (73.63 million rows/s., 3.15 GB/s.)
select count() from (
select p, min(t) from test
group by p
)
┌─count()─┐
│ 1000 │
└─────────┘
1 rows in set. Elapsed: 0.008 sec. Processed 10.00 million rows, 80.00 MB (1.26 billion rows/s., 10.11 GB/s.)
create table test (p Int64, t Int64, x String, y Int64) engine=MergeTree order by (p,x)
select count(), min(x) from (
select * from test order by p, t limit 1 by p
)
┌─count()─┬─min(x)──┐
│ 1000 │ 1000340 │
└─────────┴─────────┘
1 rows in set. Elapsed: 2.274 sec. Processed 10.00 million rows, 347.41 MB (4.40 million rows/s., 152.81 MB/s.)
select count(), min(x) from (
select p, V.1 t, V.2 x, V.3 y from (
select p, argMin((t,x,y),t) V from test
group by p)
)
┌─count()─┬─min(x)──┐
│ 1000 │ 1000340 │
└─────────┴─────────┘
1 rows in set. Elapsed: 0.144 sec. Processed 10.00 million rows, 427.41 MB (69.57 million rows/s., 2.97 GB/s.)
select count() from (
select p, min(t) from test
group by p
)
┌─count()─┐
│ 1000 │
└─────────┘
1 rows in set. Elapsed: 0.008 sec. Processed 10.00 million rows, 80.00 MB (1.25 billion rows/s., 9.96 GB/s.)
-----------------------------------------------------
ArgMin vs IN.
create table test (p Int64, t Int64, x String, y Int64) engine=MergeTree order by (p,t);
insert into test select intDiv(number,10000), rand() r, toString(r), number from numbers(100000000);
select count(), min(x) from (
select p, V.1 t, V.2 x, V.3 y from (
select p, argMin((t,x,y),t) V from test
group by p)
)
┌─count()─┬─min(x)──┐
│ 10000 │ 1000195 │
└─────────┴─────────┘
1 rows in set. Elapsed: 1.106 sec. Processed 100.00 million rows, 4.27 GB (90.44 million rows/s., 3.87 GB/s.)
select count(), min(x) from (
select * from test where (p,t) in (select p, min(t) min_t from test group by p)
)
┌─count()─┬─min(x)──┐
│ 10000 │ 1000195 │
└─────────┴─────────┘
1 rows in set. Elapsed: 2.435 sec. Processed 82.08 million rows, 2.85 GB (33.71 million rows/s., 1.17 GB/s.)
In does not want to go by sort key for 10000 groups :( .
insert into test select intDiv(number,100000), rand() r, toString(r), number from numbers(100000000);
select count(), min(x) from (
select p, V.1 t, V.2 x, V.3 y from (
select p, argMin((t,x,y),t) V from test
group by p)
)
┌─count()─┬─min(x)─┐
│ 1000 │ 100207 │
└─────────┴────────┘
1 rows in set. Elapsed: 1.115 sec. Processed 100.00 million rows, 4.27 GB (89.72 million rows/s., 3.83 GB/s.)
select count(), min(x) from (
select * from test prewhere (p,t) in (select p, min(t) min_t from test group by p)
)
┌─count()─┬─min(x)─┐
│ 1000 │ 100207 │
└─────────┴────────┘
1 rows in set. Elapsed: 1.231 sec. Processed 8.22 million rows, 131.48 MB (6.68 million rows/s., 106.84 MB/s.)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment