Last active
January 28, 2020 14:30
-
-
Save mihkell/2fe58500ada3f9106dcd894bff896f53 to your computer and use it in GitHub Desktop.
Generate PostgresSQL testing data. This will create 50k accounts with total of 140 million transactions.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Used https://vnegrisolo.github.io/postgresql/generate-fake-data-using-sql as template to create this simplified solution. | |
-- psql -h localhost -U postgres | |
-- CREATE DATABASE test_db; | |
-- \c test_db | |
CREATE TABLE transaction ( | |
id serial primary key, | |
amount integer, | |
account varchar, | |
date timestamp | |
); | |
CREATE TABLE account ( | |
id serial primary key, | |
account varchar | |
); | |
-- generate accounts | |
INSERT INTO account(account) | |
SELECT | |
'account_' || seq AS account | |
FROM GENERATE_SERIES(1, 50000) seq; | |
-- generate transaction | |
CREATE or REPLACE FUNCTION generate_transactions() RETURNS integer AS | |
$$ | |
DECLARE | |
max_account INTEGER; | |
account_account varchar; | |
account_id INTEGER; | |
BEGIN | |
RAISE NOTICE 'Generating transactions...'; | |
SELECT ((SELECT max(a.id) from account a))::int into max_account; | |
FOR tempseq IN 1..140000000 | |
LOOP | |
select floor((RANDOM() * 49999) + 1) into account_id; | |
select a.account from "account" a WHERE a.id = account_id into account_account; | |
INSERT INTO transaction(amount, account, date) | |
values (RANDOM() * 10, | |
account_account, | |
(select NOW() + (random() * (NOW() + '90 days' - NOW())) + '30 days')); | |
END LOOP; | |
RAISE NOTICE 'Done generating transactions.'; | |
RETURN 1; | |
END; | |
$$ LANGUAGE plpgsql; | |
SELECT generate_transactions(); | |
-- SOME SELECTS TESTING SPEEDS. Without index. | |
-- 1m 26sec for 140mln | |
select count(id), account | |
from transaction | |
group by account; | |
-- 21 s 422 ms | |
select count(id) | |
from transaction | |
where account = 'account_28876' | |
group by account; | |
-- 20 s 600 ms | |
select sum(amount), account | |
from transaction | |
where account = 'account_28876' | |
group by account; | |
-- 41 m 33 s 523 ms | |
create index on "transaction" (account); | |
-- SOME SELECTS TESTING SPEEDS. With index. | |
-- 529 ms first time slow | |
select sum(amount) | |
from transaction | |
where account = 'account_25576'; | |
-- 10 ms second time | |
select sum(amount) | |
from transaction | |
where account = 'account_25576'; | |
-- insert new transactions | |
INSERT INTO transaction(amount, account, date) | |
values (RANDOM() * 10, | |
'account_25576', | |
(select NOW() + (random() * (NOW() + '90 days' - NOW())) + '30 days')); | |
-- 13 ms after adding new transactions in the middle it's still fast. | |
select sum(amount) | |
from transaction | |
where account = 'account_25576'; | |
-- 8 - 12 ms | |
select max(id), account | |
from transaction | |
where account = 'account_28876' | |
group by account; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment