The first 2 characters of the UUID value comes from the MD5 hash of the backend pid modulo 3. The next 2 characters are the MD5 hash of the concatenation of the current year and week number. This value is, of course, static over a week. The remaining of the UUID value comes from the MD5 of a random value and the current time at a precision of 1us. The third field is prefixed with a “4” to indicate it is a version 4 UUID type.
Last active
June 21, 2020 09:26
-
-
Save pyramation/302c5670b5604258e294bcf8e627ae7f to your computer and use it in GitHub Desktop.
pseudo ordered uuids w modulo pid prefix
https://twitter.com/FranckPachot/status/1218997202732843008
Yes, scattered random UUID means: more disk space, more I/O, more WAL, more cache misses, more RAM usage,... Sequential are better but may put contention on one block. Partitioning them a little (like prefixing with a modulo on pg_backend_pid) is sufficient to scale-up if needed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- this one has 2 chars that represent a modulo on the pid (some variation, chose 3 for now) | |
-- the 2nd chars are the year and month md5 (psudo order) | |
DROP FUNCTION IF EXISTS f_pseudo_order_mod_pid_uuid(); | |
CREATE FUNCTION f_pseudo_order_mod_pid_uuid () | |
RETURNS uuid | |
AS $$ | |
DECLARE | |
new_uuid char(36); | |
md5_str char(32); | |
BEGIN | |
md5_str := md5(concat(random(), now())); | |
new_uuid := concat( | |
LEFT (md5(mod(pg_backend_pid(), 3)::text), 2), | |
LEFT (md5(concat(extract(year FROM now()), extract(week FROM now()))), 2), | |
LEFT (md5_str, 4), '-', substring(md5_str, 5, 4), '-4', substring(md5_str, 9, 3), '-', substring(md5_str, 13, 4), '-', substring(md5_str, 17, 12) | |
); | |
RETURN new_uuid; | |
END; | |
$$ | |
LANGUAGE plpgsql; | |
DROP FUNCTION IF EXISTS f_pseudo_order_mod_pid_uuids(); | |
CREATE FUNCTION f_pseudo_order_mod_pid_uuids () | |
RETURNS SETOF uuid | |
AS $$ | |
BEGIN | |
RETURN QUERY SELECT f_pseudo_order_mod_pid_uuid() FROM generate_series(1,10,1); | |
END; | |
$$ | |
LANGUAGE plpgsql; | |
SELECT | |
* | |
FROM | |
f_pseudo_order_mod_pid_uuids (); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- first 4 chars represent an md5 hash of the pid and year hash ... | |
-- this could probably be improved. | |
DROP FUNCTION IF EXISTS f_pseudo_order_mod_pid_uuid(); | |
CREATE FUNCTION f_pseudo_order_mod_pid_uuid () | |
RETURNS uuid | |
AS $$ | |
DECLARE | |
new_uuid char(36); | |
md5_str char(32); | |
pid int; | |
BEGIN | |
md5_str := md5(concat(random(), now())); | |
pid = mod(pg_backend_pid(), 3); | |
new_uuid := concat( | |
LEFT (md5(concat(pid, extract(year FROM now()))), 4), | |
LEFT (md5_str, 4), '-', substring(md5_str, 5, 4), '-4', substring(md5_str, 9, 3), '-', substring(md5_str, 13, 4), '-', substring(md5_str, 17, 12) | |
); | |
RETURN new_uuid; | |
END; | |
$$ | |
LANGUAGE plpgsql; | |
DROP FUNCTION IF EXISTS f_pseudo_order_mod_pid_uuids(); | |
CREATE FUNCTION f_pseudo_order_mod_pid_uuids () | |
RETURNS SETOF uuid | |
AS $$ | |
BEGIN | |
RETURN QUERY SELECT f_pseudo_order_mod_pid_uuid() FROM generate_series(1,10,1); | |
END; | |
$$ | |
LANGUAGE plpgsql; | |
SELECT | |
* | |
FROM | |
f_pseudo_order_mod_pid_uuids (); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment