Skip to content

Instantly share code, notes, and snippets.

@vladaman
Last active June 16, 2023 13:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vladaman/300b23b4f47a31ae618506afcd3f7594 to your computer and use it in GitHub Desktop.
Save vladaman/300b23b4f47a31ae618506afcd3f7594 to your computer and use it in GitHub Desktop.
Sample table structure for MySQL Activity Schema Data modeling - https://github.com/ActivitySchema
-- Table structure for table `activity_schema` for mySQL
--
CREATE TABLE `activity_schema` (
`activity_id` varchar(255) NOT NULL COMMENT 'Unique identifier for the activity record',
`ts` timestamp NOT NULL DEFAULT current_timestamp() ON UPDATE current_timestamp() COMMENT 'Timestamp in UTC for when the activity occurred',
`customer` varchar(255) DEFAULT NULL COMMENT 'Globally unique identifier for the customer',
`activity` varchar(255) NOT NULL COMMENT 'Name of the activity',
`anonymous_customer_id` varchar(255) DEFAULT NULL COMMENT 'Unique identifier for an anonymous customer (ex. ''segment_abfb8a'')',
`feature_1` varchar(255) DEFAULT NULL COMMENT 'Activity-specific feature 1',
`feature_2` varchar(255) DEFAULT NULL COMMENT 'Activity-specific feature 2',
`feature_3` varchar(255) DEFAULT NULL COMMENT 'Activity-specific feature 3',
`revenue_impact` float NOT NULL DEFAULT 0 COMMENT 'Revenue or cost associated with the activity',
`link` varchar(255) DEFAULT NULL COMMENT 'URL associated with the activity',
`activity_occurrence` int(11) UNSIGNED NOT NULL DEFAULT 1 COMMENT 'How many times this activity has happened for this customer. Used to streamline queries.',
`activity_repeated_at` timestamp NULL DEFAULT NULL COMMENT 'The timestamp of next instance of this activity for this customer. Used to streamline queries.',
`_activity_source` varchar(255) NOT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
-- Activity Schema V2 for Clickhouse
SET allow_experimental_object_type = 1;
CREATE OR REPLACE TABLE test1.activity_schema_v2
(
activity_id String,
ts DateTime,
customer Nullable(String),
activity String,
anonymous_customer_id Nullable(String),
feature_json JSON,
revenue_impact Nullable(Float32),
link Nullable(String)
)
ENGINE = MergeTree PARTITION BY toYYYYMM(ts) ORDER BY ts;
-- SET input_format_import_nested_json = 1;
INSERT INTO test1.activity_schema_v2 (activity_id, ts, customer, activity, anonymous_customer_id, feature_json,
revenue_impact, link)
VALUES ('f28dfaf5-8ce7-482c-9b5e-ef84af1d995e', now(), 'customer_id_33', 'activity_some', 'anonymous_customer_id1',
'{"feature3": "value3", "feature8": "value8"}', 1.0, 'https://yahoo.com/');
select *, feature_json.feature3, toDateTime(ts, 'Europe/Prague') as ts_prague
from test1.activity_schema_v2;
-- Activity Schema V1 for Redshift
CREATE TABLE activity_schema (
activity_id varchar(255) NOT NULL,
ts timestamp NOT NULL,
customer varchar(255) DEFAULT NULL,
activity varchar(255) NOT NULL,
anonymous_customer_id varchar(255) DEFAULT NULL,
feature_1 varchar(255) DEFAULT NULL ,
feature_2 varchar(255) DEFAULT NULL,
feature_3 varchar(255) DEFAULT NULL,
revenue_impact float NOT NULL,
link varchar(1024) DEFAULT NULL,
activity_occurrence int NOT NULL DEFAULT 1,
activity_repeated_at timestamp NULL DEFAULT NULL,
_activity_source varchar(255) NOT NULL
);
COMMENT ON COLUMN activity_schema.activity_id IS 'Unique identifier for the activity record';
COMMENT ON COLUMN activity_schema.ts IS 'Timestamp in UTC for when the activity occurred';
COMMENT ON COLUMN activity_schema.customer IS 'Globally unique identifier for the customer';
COMMENT ON COLUMN activity_schema.activity IS 'Name of the activity';
COMMENT ON COLUMN activity_schema.anonymous_customer_id IS 'Unique identifier for an anonymous customer (ex. "segment_abfb8a")';
COMMENT ON COLUMN activity_schema.feature_1 IS 'Activity-specific feature 1';
COMMENT ON COLUMN activity_schema.feature_1 IS 'Activity-specific feature 2';
COMMENT ON COLUMN activity_schema.feature_1 IS 'Activity-specific feature 3';
COMMENT ON COLUMN activity_schema.revenue_impact IS 'Revenue or cost associated with the activity';
COMMENT ON COLUMN activity_schema.link IS 'URL associated with the activity';
COMMENT ON COLUMN activity_schema.activity_occurrence IS 'How many times this activity has happened for this customer. Used to streamline queries.';
COMMENT ON COLUMN activity_schema.activity_repeated_at IS 'The timestamp of next instance of this activity for this customer. Used to streamline queries.';
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment