Denny Crane den-crane

## gist:6eff375752a236a456e1b3dc2ca7db62
DROP TABLE IF EXISTS requests;

CREATE TABLE requests (
  request_date Date,
  request_time DateTime,
  response_time Int,
  request_uri String)
ENGINE  = MergeTree(request_date, (request_time, request_uri), 8192);


## AggregatingMergeTree-event-enrichment
CREATE TABLE states_raw(
    d date,
    uid UInt64,
    first_name String,
    last_name String,
    modification_timestamp_mcs DateTime64(3) default now64(3)
) ENGINE = Null;

CREATE TABLE final_states_by_month(
    d date,

## ttl_merge_of_wide_rows_mem_usage.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                den-crane
                / ttl_merge_of_wide_rows_mem_usage.md
            
            
              Last active
              September 6, 2023 22:05
            
              
                ttl_merge_of_wide_rows_mem_usage
              
          
    CREATE TABLE wide_rows2(`A` Int64, `D` Date, `S` String)
ENGINE = MergeTree PARTITION BY toYYYYMM(D) ORDER BY A
settings merge_with_ttl_timeout=300, materialize_ttl_recalculate_only = 1;

insert into wide_rows2 select number, toDate('2020-01-01')+ number%20, arrayMap(i-> cityHash64((number*i)%999), range(2000)) from numbers(1e5);
insert into wide_rows2 select number, toDate('2020-01-01')+ number%20, arrayMap(i-> cityHash64((number*i)%899), range(2000)) from numbers(1e5);
insert into wide_rows2 select number, toDate('2020-01-01')+ number%20, arrayMap(i-> cityHash64((number*i)%799), range(2000)) from numbers(1e5);
insert into wide_rows2 select number, toDate('2020-01-01')+ number%20, arrayMap(i-> cityHash64((number*i)%699), range(2000)) from numbers(1e5);
set materialize_ttl_after_modify=0;

  
## AggregatingMergeTree-groupArrayState

-----------------------    SimpleAggregateFunction + groupArrayArray  ----------------------------

drop table if exists states_raw;
drop table if exists final_states_by_month;
drop table if exists final_states_by_month_mv;

CREATE TABLE states_raw(d date, uid UInt64, s String) ENGINE = Null;
CREATE TABLE final_states_by_month(d date, uid UInt64, a_state SimpleAggregateFunction(groupArrayArray, Array(String)))
ENGINE = AggregatingMergeTree PARTITION BY toYYYYMM(d) ORDER BY (uid);

## ch_vs_sr.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                den-crane
                / ch_vs_sr.md
            
            
              Last active
              July 20, 2023 21:54
            
              
                ch vs sr
              
          
    CH 1-node
SELECT
    count(),
    min(paramkey),
    max(paramkey),
    min(ind),
    max(ind)
FROM data_table
┌───count()─┬────────min(paramkey)─┬───────max(paramkey)─┬─min(ind)─┬──max(ind)─┐

  
## gist:976d8213b15c8ef5b3f48c052402a9d1
https://github.com/ClickHouse/ClickHouse/issues/47092#issuecomment-1485052499

23.3.8.21
<default>
   <access_management>1</access_management>
</default>

create user foo;
grant all on *.* to foo;

## MinMax.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                den-crane
                / MinMax.md
            
            
              Last active
              June 6, 2023 18:38
            
              
                Using MinMax skip index to improve partition reading 
              
          
    create table test(tenant_id UInt32,  ts1 DateTime, timestamp DateTime)
engine=MergeTree
partition by toDate(ts1)
PRIMARY KEY (tenant_id, toStartOfHour(timestamp))
ORDER BY (tenant_id, toStartOfHour(timestamp),  timestamp)
as select 1, now() - number/10 x, x from numbers(1e8);

insert into test select number%1000, now() - number/10 x, 0 from numbers(1e8);

  
## ingest.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                den-crane
                / ingest.md
            
            
              Created
              April 27, 2023 12:17
            
              
                ingest data from file using clickhouse-http-java-client
              
          
    Test data

clickhouse-client -q "select number A, now() B, 'x' C from numbers(1e6) format Parquet" > test.parquet

Maven

    <dependencies>

  
## djoin.sql
-- in different versions of CH it works differently, and does not work at all with circle replication


drop table t1 on cluster segmented;
drop table t2 on cluster segmented;
create table t1 on cluster segmented (A Int64) Engine=MergeTree order by tuple();
create table t2 on cluster segmented (A Int64) Engine=MergeTree order by tuple();
create table t1d on cluster segmented as t1 Engine=Distributed(segmented, currentDatabase(), t1, A);
create table t2d on cluster segmented as t2 Engine=Distributed(segmented, currentDatabase(), t2, A);

## total_over_group.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                den-crane
                / total_over_group.md
            
            
              Created
              March 17, 2023 18:20
            
              
                Average / Total sum over group
              
          
    create table A (pk_col Int64, col1 Float64, col4 Float64, day Date) Engine=Memory
as select * from values( 
            (1, 3, 4, today()), (1, 1, 4, today()), (1, 2, 0, today()-1), 
            (2, 3, 4, today()), (2, 3, 0, today()-1) );
Window function
	DROP TABLE IF EXISTS requests;

	CREATE TABLE requests (
	request_date Date,
	request_time DateTime,
	response_time Int,
	request_uri String)
	ENGINE = MergeTree(request_date, (request_time, request_uri), 8192);
	CREATE TABLE states_raw(
	d date,
	uid UInt64,
	first_name String,
	last_name String,
	modification_timestamp_mcs DateTime64(3) default now64(3)
	) ENGINE = Null;

	CREATE TABLE final_states_by_month(
	d date,

	----------------------- SimpleAggregateFunction + groupArrayArray ----------------------------

	drop table if exists states_raw;
	drop table if exists final_states_by_month;
	drop table if exists final_states_by_month_mv;

	CREATE TABLE states_raw(d date, uid UInt64, s String) ENGINE = Null;
	CREATE TABLE final_states_by_month(d date, uid UInt64, a_state SimpleAggregateFunction(groupArrayArray, Array(String)))
	ENGINE = AggregatingMergeTree PARTITION BY toYYYYMM(d) ORDER BY (uid);
	https://github.com/ClickHouse/ClickHouse/issues/47092#issuecomment-1485052499

	23.3.8.21
	<default>
	<access_management>1</access_management>
	</default>

	create user foo;
	grant all on . to foo;
	-- in different versions of CH it works differently, and does not work at all with circle replication


	drop table t1 on cluster segmented;
	drop table t2 on cluster segmented;
	create table t1 on cluster segmented (A Int64) Engine=MergeTree order by tuple();
	create table t2 on cluster segmented (A Int64) Engine=MergeTree order by tuple();
	create table t1d on cluster segmented as t1 Engine=Distributed(segmented, currentDatabase(), t1, A);
	create table t2d on cluster segmented as t2 Engine=Distributed(segmented, currentDatabase(), t2, A);