Skip to content

Instantly share code, notes, and snippets.

@FranckPachot
Last active April 24, 2023 11:32
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save FranckPachot/828ea961d7f09b7cf5582a3e84faaf86 to your computer and use it in GitHub Desktop.
Save FranckPachot/828ea961d7f09b7cf5582a3e84faaf86 to your computer and use it in GitHub Desktop.
For the next time you hear that "Joins Don't Scale": Joining to a 7M rows table to return 20K rows with text search filters in 50 milliseconds on @yugabyte​DB and that scales as the plan doesn't depend on the table size😎 SQL databases rocks 🚀
-- load names
\! wget -cO/var/tmp/baby-names.csv "https://github.com/hadley/data-baby-names/blob/master/baby-names.csv?raw=true"
drop table if exists baby_names;
create table baby_names ( year int , name text, percent float, sex text, primary key (name,year, sex));
\copy baby_names from '/var/tmp/baby-names.csv' with ( skip 1, format csv );
-- joins scale
drop table if exists users,messages cascade;
create table users (
primary key (user_id)
, user_id bigint generated always as identity (cache 100)
, username text
);
create index users_username on users( upper(username) asc , user_id );
create table messages (
primary key (user_id, message_id)
, user_id bigint references users
, message_id bigint generated always as identity (cache 100)
, send_time timestamptz default now()
, message text
);
insert into users (username) select distinct name from baby_names;
create extension if not exists orafce;
prepare gen_messages(int) as
insert into messages (user_id, send_time, message)
select user_id , now() - generate_series * interval '1 minute' , dbms_random.string('p',100)
from users , generate_series(1,$1)
;
execute gen_messages(10);
analyze messages, users;
set yb_enable_optimizer_statistics to on;
set yb_bnl_batch_size to 1024;
prepare get_messages(text,int) as
select username, send_time from users join messages using(user_id)
where upper(username) like upper($1)
and send_time > ( now() - $2 * interval '1 days' )
;
execute gen_messages(10);
execute gen_messages(100);
execute gen_messages(1000);
explain (analyze, costs off)
execute get_messages('Fra%',3);
analyze messages, users;
select relname, reltuples from pg_class where relname in ('users','messages');
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment