Skip to content

Instantly share code, notes, and snippets.

@tom-clickhouse
Created April 19, 2023 15:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tom-clickhouse/5d391b45a1c19948ed6d43c87cf7e788 to your computer and use it in GitHub Desktop.
Save tom-clickhouse/5d391b45a1c19948ed6d43c87cf7e788 to your computer and use it in GitHub Desktop.
CREATE DATABASE imdb_large;
CREATE TABLE imdb_large.actors
(
id UInt32,
first_name String,
last_name String,
gender FixedString(1)
) ENGINE = MergeTree ORDER BY (id, first_name, last_name, gender);
INSERT INTO imdb_large.actors WITH
(
SELECT groupArrayDistinct(first_name)
FROM imdb.actors
) AS first_names,
(
SELECT groupArrayDistinct(last_name)
FROM imdb.actors
) AS last_names,
(
SELECT groupArrayDistinct(gender)
FROM imdb.actors
) AS genders
SELECT
number AS id,
first_names[toInt32(floor(randUniform(1, length(first_names))))] AS first_name,
last_names[toInt32(floor(randUniform(1, length(last_names))))] AS last_name,
genders[toInt8(floor(randUniform(1, length(genders) + 1)))] AS gender
FROM numbers(1_000_000);
CREATE TABLE imdb_large.roles
(
created_at DateTime DEFAULT now(),
actor_id UInt32,
movie_id UInt32,
role String
) ENGINE = MergeTree ORDER BY (actor_id, movie_id);
INSERT INTO imdb_large.roles(actor_id, movie_id, role)
WITH
(
SELECT groupArrayDistinct(1_000_000)(role)
FROM imdb.roles
) AS roles,
(
SELECT groupArrayDistinct(movie_id)
FROM imdb.genres
) AS movies
SELECT
(rand() mod 1_000_000) AS actor_id,
movies[toInt32(floor(randUniform(1, length(movies))))] AS movie_id,
roles[toInt32(floor(randUniform(1, length(roles))))] AS role
FROM numbers(100_000_000);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment