-
-
Save nikvas0/a6d66e833c0adaa42762fdf77a026e2b to your computer and use it in GitHub Desktop.
clickhouse bloom filters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
##################################################################################################################### | |
# URLDomain | |
##################################################################################################################### | |
# NO INDEX | |
ThinkPad-E570 :) select uniq(URL) from datasets.hits_v1 where URLDomain like 'yazana.%' | |
SELECT uniq(URL) | |
FROM datasets.hits_v1 | |
WHERE URLDomain LIKE 'yazana.%' | |
┌─uniq(URL)─┐ | |
│ 380 │ | |
└───────────┘ | |
1 rows in set. Elapsed: 0.079 sec. Processed 8.87 million rows, 222.52 MB (111.87 million rows/s., 2.81 GB/s.) | |
# NGRAM INDEX | |
# alter table hits_3gram add index domain (URLDomain) TYPE ngrambf(3, 512, 2, 0) GRANULARITY 1 | |
ThinkPad-E570 :) select uniq(URL) from hits_3gram where URLDomain like 'yazana.%' | |
SELECT uniq(URL) | |
FROM hits_3gram | |
WHERE URLDomain LIKE 'yazana.%' | |
┌─uniq(URL)─┐ | |
│ 380 │ | |
└───────────┘ | |
1 rows in set. Elapsed: 0.026 sec. Processed 557.06 thousand rows, 14.86 MB (21.50 million rows/s., 573.35 MB/s.) | |
# TOKENBF INDEX | |
# :) alter table hits_tokenbf add index domain (URLDomain) TYPE tokenbf(512, 2, 0) GRANULARITY 1 | |
ThinkPad-E570 :) select uniq(URL) from hits_tokenbf where URLDomain like 'yazana.%' | |
SELECT uniq(URL) | |
FROM hits_tokenbf | |
WHERE URLDomain LIKE 'yazana.%' | |
┌─uniq(URL)─┐ | |
│ 380 │ | |
└───────────┘ | |
1 rows in set. Elapsed: 0.024 sec. Processed 638.98 thousand rows, 16.78 MB (26.94 million rows/s., 707.24 MB/s.) | |
##################################################################################################################### | |
# SearchPhrase | |
##################################################################################################################### | |
# NO INDEX | |
ThinkPad-E570 :) select uniq(URL) from datasets.hits_v1 where SearchPhrase like '%яндекс%' | |
SELECT uniq(URL) | |
FROM datasets.hits_v1 | |
WHERE SearchPhrase LIKE '%яндекс%' | |
┌─uniq(URL)─┐ | |
│ 1512 │ | |
└───────────┘ | |
1 rows in set. Elapsed: 0.197 sec. Processed 8.87 million rows, 112.90 MB (45.05 million rows/s., 573.20 MB/s.) | |
# NGRAM INDEX | |
# TYPE ngrambf(3, 512, 2, 0) GRANULARITY 1 | |
ThinkPad-E570 :) select uniq(URL) from hits_3gram where SearchPhrase like '%яндекс%' | |
SELECT uniq(URL) | |
FROM hits_3gram | |
WHERE SearchPhrase LIKE '%яндекс%' | |
┌─uniq(URL)─┐ | |
│ 1512 │ | |
└───────────┘ | |
1 rows in set. Elapsed: 0.166 sec. Processed 5.17 million rows, 76.36 MB (31.10 million rows/s., 459.39 MB/s.) | |
##################################################################################################################### | |
# Title | |
##################################################################################################################### | |
# NO INDEX | |
ThinkPad-E570 :) select uniq(URL) from datasets.hits_v1 where Title like '% Яндекс.Видео %' | |
SELECT uniq(URL) | |
FROM datasets.hits_v1 | |
WHERE Title LIKE '% Яндекс.Видео %' | |
┌─uniq(URL)─┐ | |
│ 8464 │ | |
└───────────┘ | |
1 rows in set. Elapsed: 0.472 sec. Processed 8.87 million rows, 768.66 MB (18.80 million rows/s., 1.63 GB/s.) | |
# NGRAM INDEX | |
ThinkPad-E570 :) select uniq(URL) from hits_3gram where Title like '% Яндекс.Видео %' | |
SELECT uniq(URL) | |
FROM hits_3gram | |
WHERE Title LIKE '% Яндекс.Видео %' | |
┌─uniq(URL)─┐ | |
│ 8464 │ | |
└───────────┘ | |
1 rows in set. Elapsed: 0.507 sec. Processed 8.18 million rows, 754.69 MB (16.14 million rows/s., 1.49 GB/s.) | |
# TOKENBF INDEX (fullscan, only ascii) | |
ThinkPad-E570 :) select uniq(URL) from hits_tokenbf where Title like '% Яндекс.Видео %' | |
SELECT uniq(URL) | |
FROM hits_tokenbf | |
WHERE Title LIKE '% Яндекс.Видео %' | |
┌─uniq(URL)─┐ | |
│ 8464 │ | |
└───────────┘ | |
1 rows in set. Elapsed: 0.484 sec. Processed 8.87 million rows, 768.66 MB (18.33 million rows/s., 1.59 GB/s.) | |
# But for equals(,) | |
ThinkPad-E570 :) select uniq(URL) from hits_3gram where Title = 'Яндекс.Видео' | |
SELECT uniq(URL) | |
FROM hits_3gram | |
WHERE Title = 'Яндекс.Видео' | |
┌─uniq(URL)─┐ | |
│ 131 │ | |
└───────────┘ | |
1 rows in set. Elapsed: 0.225 sec. Processed 8.22 million rows, 753.53 MB (36.49 million rows/s., 3.35 GB/s.) | |
ThinkPad-E570 :) select uniq(URL) from datasets.hits_v1 where Title = 'Яндекс.Видео' | |
SELECT uniq(URL) | |
FROM datasets.hits_v1 | |
WHERE Title = 'Яндекс.Видео' | |
┌─uniq(URL)─┐ | |
│ 131 │ | |
└───────────┘ | |
1 rows in set. Elapsed: 0.225 sec. Processed 8.87 million rows, 766.96 MB (39.43 million rows/s., 3.41 GB/s.) | |
ThinkPad-E570 :) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment