Skip to content

Instantly share code, notes, and snippets.

@mihkell
Last active January 28, 2020 14:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mihkell/2fe58500ada3f9106dcd894bff896f53 to your computer and use it in GitHub Desktop.
Save mihkell/2fe58500ada3f9106dcd894bff896f53 to your computer and use it in GitHub Desktop.
Generate PostgresSQL testing data. This will create 50k accounts with total of 140 million transactions.
-- Used https://vnegrisolo.github.io/postgresql/generate-fake-data-using-sql as template to create this simplified solution.
-- psql -h localhost -U postgres
-- CREATE DATABASE test_db;
-- \c test_db
CREATE TABLE transaction (
id serial primary key,
amount integer,
account varchar,
date timestamp
);
CREATE TABLE account (
id serial primary key,
account varchar
);
-- generate accounts
INSERT INTO account(account)
SELECT
'account_' || seq AS account
FROM GENERATE_SERIES(1, 50000) seq;
-- generate transaction
CREATE or REPLACE FUNCTION generate_transactions() RETURNS integer AS
$$
DECLARE
max_account INTEGER;
account_account varchar;
account_id INTEGER;
BEGIN
RAISE NOTICE 'Generating transactions...';
SELECT ((SELECT max(a.id) from account a))::int into max_account;
FOR tempseq IN 1..140000000
LOOP
select floor((RANDOM() * 49999) + 1) into account_id;
select a.account from "account" a WHERE a.id = account_id into account_account;
INSERT INTO transaction(amount, account, date)
values (RANDOM() * 10,
account_account,
(select NOW() + (random() * (NOW() + '90 days' - NOW())) + '30 days'));
END LOOP;
RAISE NOTICE 'Done generating transactions.';
RETURN 1;
END;
$$ LANGUAGE plpgsql;
SELECT generate_transactions();
-- SOME SELECTS TESTING SPEEDS. Without index.
-- 1m 26sec for 140mln
select count(id), account
from transaction
group by account;
-- 21 s 422 ms
select count(id)
from transaction
where account = 'account_28876'
group by account;
-- 20 s 600 ms
select sum(amount), account
from transaction
where account = 'account_28876'
group by account;
-- 41 m 33 s 523 ms
create index on "transaction" (account);
-- SOME SELECTS TESTING SPEEDS. With index.
-- 529 ms first time slow
select sum(amount)
from transaction
where account = 'account_25576';
-- 10 ms second time
select sum(amount)
from transaction
where account = 'account_25576';
-- insert new transactions
INSERT INTO transaction(amount, account, date)
values (RANDOM() * 10,
'account_25576',
(select NOW() + (random() * (NOW() + '90 days' - NOW())) + '30 days'));
-- 13 ms after adding new transactions in the middle it's still fast.
select sum(amount)
from transaction
where account = 'account_25576';
-- 8 - 12 ms
select max(id), account
from transaction
where account = 'account_28876'
group by account;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment