Skip to content

Instantly share code, notes, and snippets.

@tom-clickhouse
Created June 22, 2023 17:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tom-clickhouse/67945d368093fc468d3d6eb7b6511202 to your computer and use it in GitHub Desktop.
Save tom-clickhouse/67945d368093fc468d3d6eb7b6511202 to your computer and use it in GitHub Desktop.
CREATE DATABASE imdb_xlarge;
CREATE TABLE imdb_xlarge.actors
(
id UInt32,
first_name String,
last_name String,
gender FixedString(1)
) ENGINE = MergeTree ORDER BY (id, first_name, last_name, gender);
INSERT INTO imdb_xlarge.actors WITH
(
SELECT groupArrayDistinct(first_name)
FROM imdb.actors
) AS first_names,
(
SELECT groupArrayDistinct(last_name)
FROM imdb.actors
) AS last_names,
(
SELECT groupArrayDistinct(gender)
FROM imdb.actors
) AS genders
SELECT
number AS id,
first_names[toInt32(floor(randUniform(1, length(first_names))))] AS first_name,
last_names[toInt32(floor(randUniform(1, length(last_names))))] AS last_name,
genders[toInt8(floor(randUniform(1, length(genders) + 1)))] AS gender
FROM numbers(100_000_000)
CREATE TABLE imdb_xlarge.roles
(
created_at DateTime DEFAULT now(),
actor_id UInt32,
movie_id UInt32,
role String
) ENGINE = MergeTree ORDER BY (actor_id, movie_id);
INSERT INTO imdb_xlarge.roles(actor_id, movie_id, role)
WITH
(
SELECT groupArrayDistinct(1_000_000)(role)
FROM imdb.roles
) AS roles,
(
SELECT groupArrayDistinct(movie_id)
FROM imdb.genres
) AS movies
SELECT
(rand() mod 100_000_000) AS actor_id,
movies[toInt32(floor(randUniform(1, length(movies))))] AS movie_id,
roles[toInt32(floor(randUniform(1, length(roles))))] AS role
FROM numbers(1_000_000_000);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment