Skip to content

Instantly share code, notes, and snippets.

@lanwen
Last active June 3, 2021 08:54
Show Gist options
  • Save lanwen/211f5a6dc7a463bdf364099850b8efcc to your computer and use it in GitHub Desktop.
Save lanwen/211f5a6dc7a463bdf364099850b8efcc to your computer and use it in GitHub Desktop.
----
CREATE DATABASE lanwen;
DROP TABLE IF EXISTS lanwen.source;
DROP TABLE IF EXISTS lanwen.aggregate;
DROP TABLE IF EXISTS lanwen.view;
DROP TABLE IF EXISTS lanwen.anonymous;
DROP TABLE IF EXISTS lanwen.anonymous_view;
CREATE TABLE lanwen.source (
subjectId String,
eventId String,
timestamp DateTime,
eventType String,
event String
)
ENGINE = ReplacingMergeTree
PARTITION BY toYYYYMM(timestamp) ORDER BY (subjectId, timestamp, eventId);
INSERT INTO lanwen.source FORMAT Values ('1', 'id1', '2018-09-23 12:16:16', 'user/added', '{"eventType":"user/added", "name":"1"}');
INSERT INTO lanwen.source FORMAT Values ('1', 'id2', '2018-09-23 12:17:16', 'user/updated', '{"eventType":"user/updated", "name":"2"}');
SELECT * FROM lanwen.source;
CREATE MATERIALIZED VIEW lanwen.aggregate
ENGINE = AggregatingMergeTree
PARTITION BY tuple() ORDER BY (subjectId)
POPULATE AS
SELECT subjectId,
maxState(timestamp) AS stateAt,
minState(if(
eventType = 'user/added',
timestamp,
null
)) AS createdAt,
maxState(if(
eventType = 'user/updated',
timestamp,
null
)) AS updatedAt,
argMaxState(if(
visitParamHas(event, 'name'),
visitParamExtractString(event, 'name'),
null
), timestamp) AS name
FROM (
SELECT subjectId, eventType, timestamp, event
FROM lanwen.source
WHERE eventType IN ('user/added', 'user/updated')
ORDER BY timestamp
)
GROUP BY subjectId;
CREATE VIEW lanwen.view AS
SELECT subjectId,
maxMerge(stateAt) as stateAt,
minMerge(createdAt) as createdAt,
maxMerge(updatedAt) as updatedAt,
argMaxMerge(name) as name FROM lanwen.aggregate
GROUP BY subjectId;
CREATE MATERIALIZED VIEW lanwen.anonymous
ENGINE = AggregatingMergeTree()
PARTITION BY tuple() ORDER BY (subjectId)
POPULATE AS
SELECT lower(hex(SHA1(concat(subjectId, (SELECT 'salt' AS subjectSalt))))) AS subjectId,
maxState(timestamp) AS stateAt,
minState(if(
eventType = 'user/added',
timestamp,
null
)) AS createdAt,
maxState(if(
eventType = 'user/updated',
timestamp,
null
)) AS updatedAt,
argMaxState(if(
visitParamHas(event, 'name'),
lower(hex(SHA1(concat(visitParamExtractString(event, 'name'), (SELECT 'salt' AS subjectSalt))))),
null
), timestamp) AS name
FROM (
SELECT subjectId, eventType, timestamp, event
FROM lanwen.source
WHERE eventType IN ('user/added', 'user/updated')
ORDER BY timestamp
)
GROUP BY subjectId;
CREATE VIEW lanwen.anonymous_view AS
SELECT subjectId,
maxMerge(stateAt) as stateAt,
minMerge(createdAt) as createdAt,
maxMerge(updatedAt) as updatedAt,
argMaxMerge(name) as name FROM lanwen.anonymous
GROUP BY subjectId;
SELECT * FROM lanwen.anonymous_view;
INSERT INTO lanwen.source FORMAT Values ('1', 'id3', '2018-09-23 12:18:16', 'user/updated', '{"eventType":"user/updated", "name":"3"}');
SELECT count() FROM lanwen.aggregate;
SELECT * FROM lanwen.anonymous;
SELECT subjectId,
maxMerge(stateAt) as stateAt,
minMerge(createdAt) as createdAt,
maxMerge(updatedAt) as updatedAt,
argMaxMerge(name) as name FROM lanwen.aggregate
GROUP BY subjectId;
SELECT * FROM lanwen.view;
INSERT INTO lanwen.source FORMAT Values ('1', 'id4', '2018-09-23 12:19:16', 'user/updated', '{"eventType":"user/updated", "name":"4"}');
SELECT count() from lanwen.aggregate;
SELECT subjectId,
maxMerge(stateAt) as stateAt,
minMerge(createdAt) as createdAt,
maxMerge(updatedAt) as updatedAt,
argMaxMerge(name) as name FROM lanwen.aggregate
GROUP BY subjectId;
SELECT * FROM lanwen.view;
OPTIMIZE TABLE lanwen.aggregate FINAL;
SELECT count() from lanwen.aggregate;
SELECT * FROM lanwen.anonymous;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment