gustavohenrique/postgres.md

## postgres.md

      
    Raw
  

              postgres.md
            
          
    Run via Docker

docker run -d \
    -e POSTGRES_PASSWORD=mysecretpassword \
    -e POSTGRES_USER=root \
    -e POSTGRES_DB=mydb \
    -e PGDATA=/var/lib/postgresql/data/pgdata \
    -v ${HOME}/pgdata:/var/lib/postgresql/data \
    --name postgres \
    postgres
Dump & Restore

Script

scripts/postgres.sh

#!/bin/sh
postgres_user=admin
postgres_password=123456
postgres_db=maindb
postgres_host=ezkeep_db
now=`date +%Y-%m-%d"_"%H-%M`
bkp_dir=/backup

export PGPASSWORD=${postgres_password}

function dump {
    pg_dump \
        -U ${postgres_user} \
        -h ${postgres_host} \
        -Fc -v \
        --no-owner --no-acl \
        ${postgres_db} > ${bkp_dir}/${now}_ezkeep.dump
}

function restore {
    filename=${1}
    pg_restore \
        -U ${postgres_user} \
        -h ${postgres_host} \
        -d ${postgres_db} \
        -c -v \
        ${bkp_dir}/${filename}
}

case "${1}" in
    dump) dump ;;
    restore) restore ${2} ;;
    *) dump ;;
esac
Makefile

chmod:
    @chmod +x $(scripts_dir)/*

migrate_postgres: chmod
    @docker exec postgres bash -c "DB_ENGINE=postgres DATABASE_URL='$(postgres_url)' MIGRATIONS_DIR=/migrations bash /scripts/sql.sh apply"

postgres: drop_postgres
    @docker run -d --name postgres \
        -m 2G \
        --hostname ezkeep_db \
        --network ezkeep \
        -p 5432:5432 \
        -e TZ=America/Sao_Paulo \
        -e POSTGRES_USER=$(postgres_user) \
        -e POSTGRES_PASSWORD=$(postgres_password) \
        -e POSTGRES_DB=$(postgres_db) \
        -e ADDITIONAL_DB=root \
        -v $(scripts_dir):/scripts \
        -v $(CURDIR)/migrations/postgres:/migrations \
        gustavohenrique/postgres:14-alpine \
    && echo "Waiting for 3s..." \
    && sleep 3 \
    && docker exec postgres sh -c "PGPASSWORD=$(postgres_password) psql -U $(postgres_user) -d $(postgres_db) < /migrations/schema.sql"
    @$(MAKE) -s migrate db=postgres

drop_postgres:
    @docker rm -f postgres 2>/dev/null || exit 0

psql:
    @docker exec -it postgres sh -c "PGPASSWORD=$(postgres_password) psql -U $(postgres_user) -d $(postgres_db) $(pgoptions)"

postgres_restore:
    @docker run --rm \
        -v $(CURDIR)/backup:/backup \
        --network ezkeep \
        --mount type=bind,source=$(scripts_dir)/postgres.sh,target=/backup.sh \
        gustavohenrique/postgres:14-alpine sh -c "/backup.sh restore $(file)"

postgres_dump:
    @docker run --rm \
        -v $(CURDIR)/backup:/backup \
        --network ezkeep \
        --mount type=bind,source=$(scripts_dir)/postgres.sh,target=/backup.sh \
        gustavohenrique/postgres:14-alpine sh -c "/backup.sh dump"
Binary mode:

export PGPASSWORD="passwd"
# simple
pg_dump \
  -U ${USER} \
  -h ${HOST} \
  -Fc -v \
  --no-owner --no-acl \
  ${DATABASE} > ${DATABASE}.dump

# complex
pg_dump \
  -U ${USER} \
  -h ${HOST} \
  -Fc ${DATABASE} \
  -c -O -x \
  --superuser root \
  -n public \
  --no-security-labels \
  --if-exists \
  --disable-triggers > ${DATABASE}.dump
}

# simple
function restore {
  pg_restore \
    -U ${USER} \
    -h ${HOST} \
    -d ${DATABASE} \
    -c -v \
    ${DATABASE}.dump
}

# complex
function restore {
  pg_restore \
    -U ${USER} \
    -h ${HOST} \
    -d ${DATABASE} \
    --no-acl \
    --no-owner \
    -n public \
    --no-comments \
    --no-tablespaces \
    ${DATABASE}.dump
}

Text SQL:

pg_dump \
  -U ${USER} \
  -h ${HOST} \
  -O -x -c \
  --inserts \
  --no-security-labels \
  --if-exists \
  --disable-triggers \
  ${DATABASE} > ${DATABASE}.sql
}

# restore
psql -U ${USER} -h ${HOST} -d ${DATABASE} < "${DATABASE}.sql"

User

# admin
CREATE USER admin WITH ENCRYPTED PASSWORD 'strongpass';
ALTER USER admin WITH SUPERUSER;

# readonly
CREATE ROLE readonly;
GRANT USAGE ON SCHEMA public TO readonly;
GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA public TO readonly;
GRANT SELECT ON ALL TABLES IN SCHEMA public TO readonly;
GRANT SELECT ON ALL SEQUENCES IN SCHEMA public TO readonly;
CREATE USER reader WITH PASSWORD 'strongpass';
GRANT readonly TO reader;

# undo
REVOKE readonly FROM reader;
REVOKE USAGE ON SCHEMA public FROM readonly;
REVOKE EXECUTE ON ALL FUNCTIONS IN SCHEMA public FROM readonly;
REVOKE ALL PRIVILEGES ON ALL TABLES IN SCHEMA public FROM readonly;
REVOKE ALL PRIVILEGES ON ALL SEQUENCES IN SCHEMA public FROM readonly;
DROP ROLE readonly;
Queries

JSONB

-- charges = [{"status": "paid"}]
select charges #> '{0,"status"}' from orders;

-- payments = [{"credit_card": { "card": { "id": "card_1" }}}]
update recurrences set payments = jsonb_set(payments, '{0,credit_card,card,id}', '"card_zJgywDzI1IZLE6PK"', false);
update recurrences set payments = jsonb_insert(payments, '{0,amount}', to_jsonb(amount)) where status <> 'paid';

UPDATE purchases SET items_purchased = items_purchased ||  '{"name": "LG Ultrawide Monitor'
UPDATE purchases SET items_purchased = COALESCE(items_purchased, '[]'::jsonb) || '{"name": "LG Ultrawide Monitor'
UPDATE purchases SET items_purchased = items_purchased - 1 // remove index 1

UPDATE recurrences SET card_id = s.card_id FROM (SELECT cards->'0'->'id' AS card_id FROM recurrences) AS s WHERE recurrences.id = s.id;
Duplicated rows:
select id,order_number,status from recurrences t1 where (select count(*) from recurrences t2 where t1.order_id = t2.order_id and t1.due_at = t2.due_at) > 1;
Insert milion data

CREATE EXTENSION IF NOT EXISTS pgcrypto;
insert into tags (title) select digest(concat(current_timestamp::text, cast(gen_random_bytes(64) as text)), 'sha512') from generate_series(1, 1000000);
Create tables:
\timing
create table USERS ( USER_ID bigserial primary key, FIRST_NAME text, LAST_NAME text)
;
CREATE TABLE
create table ORDERS ( ORDER_ID bigserial primary key, ORDER_DATE timestamp, AMOUNT numeric(10,2), USER_ID bigint, DESCRIPTION text)
;

create index ORDERS_BY_USER on ORDERS(USER_ID);
Generates 10000 users in the USERS table with random names:
insert into  USERS (FIRST_NAME, LAST_NAME)
 with
  random_words as (

  select generate_series id,
   translate(md5(random()::text),'-0123456789','aeioughij') as word

  from generate_series(1,100)

  )
 select
  words1.word ,words2.word

  from
   random_words words1
  cross join
   random_words words2
  order by words1.id+words2.id
;
I have now one million orders generated as 100 orders for each users in the last year. You can play with the numbers to generate more and see how it scales. This is fast: 5 seconds to generate 1 million orders here.
insert into ORDERS ( ORDER_DATE, AMOUNT, USER_ID, DESCRIPTION)
 with
  random_amounts as (
      ------------> this generates 10 random order amounts
  select
   1e6*random() as AMOUNT
  from generate_series(1,10)
 )
 ,random_dates as (
      ------------> this generates 10 random order dates
  select
   now() - random() * interval '1 year' as ORDER_DATE
  from generate_series(1,10)
 )
select ORDER_DATE, AMOUNT, USER_ID, md5(random()::text) DESCRIPTION from
  random_dates
 cross join
  random_amounts
 cross join
  users
 order by ORDER_DATE
      ------------> I sort this by date because that's what happens in real life. Clustering by users would not be a fair test.
;
I’ve run a manual VACUUM to get a reproducible testcase rather than relying on autovacuum kicking-in after those table loading.
vacuum analyze;
Here are the sizes of my tables: The 1 million orders take 104MB and the 10 thousand users is 1MB:
select relkind,relname,reltuples,relpages,lpad(pg_size_pretty(relpages::bigint*8*1024),20) "relpages * 8k" from pg_class natural join (select oid relnamespace,nspname from pg_namespace) nsp where nspname='public' order by relpages;
Execute the query:
select order_month,sum(amount),count(*)
from
  (
  select date_trunc('month',ORDER_DATE) order_month, USER_ID, AMOUNT
  from ORDERS join USERS using(USER_ID)
  ) user_orders
where USER_ID=42
group by order_month
;
Explain the execution
explain (analyze,verbose,costs,buffers)
select order_month,sum(amount)
from
  (
  select date_trunc('month',ORDER_DATE) order_month, USER_ID, AMOUNT
  from ORDERS join USERS using(USER_ID)
  ) user_orders
where USER_ID=42
group by order_month
;
FullText Search

DROP TEXT SEARCH CONFIGURATION IF EXISTS vanilla CASCADE;
CREATE TEXT SEARCH CONFIGURATION vanilla (COPY=pg_catalog.portuguese);
ALTER TEXT SEARCH CONFIGURATION vanilla ALTER mapping
    FOR asciiword, asciihword, hword_asciipart, word, hword, hword_part
    WITH unaccent, simple, portuguese_stem;
SET default_text_search_config = 'vanilla';

CREATE TABLE IF NOT EXISTS products (
  id VARCHAR(36) NOT NULL PRIMARY KEY,
  sku VARCHAR(100) NOT NULL,
  name VARCHAR(255) NOT NULL CHECK (char_length(name) > 0) COLLATE "pt-BR-x-icu",
);
CREATE INDEX idx_products_fts ON products USING GIN (to_tsvector('vanilla', name || ' ' || sku));

SELECT * FROM products WHERE to_tsvector('public.vanilla', name) @@ to_tsquery('livro') LIMIT 10;
Management

-- Creating database and user
su postgres
CREATE USER gustavo WITH PASSWORD 'senha';
CREATE DATABASE third_party_subscription_manager;
GRANT ALL PRIVILEGES ON DATABASE third_party_subscription_manager to gustavo;
ALTER DATABASE third_party_subscription_manager OWNER TO gustavo;
ALTER USER gustavo WITH PASSWORD 'outra senha';

-- Database and table sizes
SELECT pg_database_size('third_party_subscription_manager')
SELECT pg_size_pretty(pg_database_size('third_party_subscription_manager'));
SELECT pg_size_pretty(pg_database_size('third_party_subscription_manager')) As fulldbsize;
SELECT pg_size_pretty(pg_total_relation_size('my_table'));
SELECT
  relname as "Table",
  pg_size_pretty(pg_total_relation_size(my_table)) As "Size",
  pg_size_pretty(pg_total_relation_size(my_table) - pg_relation_size(my_table)) as "External Size"
  FROM pg_catalog.pg_statio_user_tables ORDER BY pg_total_relation_size(my_table) DESC;

-- alter table
alter table subscriptions disable trigger all;
alter table subscriptions enable trigger all;
ALTER TABLE subscriptions ALTER CONSTRAINT subscriptions_user_id_599297d4_fk_users_id DEFERRABLE;

-- sequences
select nextval('django_migrations_id_seq');
ALTER SEQUENCE django_migrations_id_seq RESTART WITH 32;

-- rename database
CREATE DATABASE db;
\c postgres
SELECT * FROM pg_stat_activity WHERE datname = 'db';
SELECT pg_terminate_backend (pid) FROM pg_stat_activity WHERE datname = 'db';
ALTER DATABASE db RENAME TO newdb;
Links


https://tomcam.github.io/postgres/
https://postgresqlco.nf/en/doc/param/
https://techcommunity.microsoft.com/t5/azure-database-for-postgresql/analyzing-the-limits-of-connection-scalability-in-postgres/ba-p/1757266
https://www.postgresql.org/docs/9.6/static/backup-dump.html
https://changelog.com/podcast/417#transcript

Harderning (Quick notes from PGconf 2020)

share_buffer: 25% ram
max conn 100 ou 200
se db < ram, entao shared_buffers 75% da ram
> 16GB de shared_buffers requer discos rapidos
de 8-16GB de shared_buffers, huge_pages=on. (tem q habilitar no linux kernel tb). vm.nr_overcommit e vm.nr_hugepages
pg_bouncer
postgres workers usam ram para joins e sorting. 128MB de ram por processo eh bom pra comecar
muita ram pode causar out of memmory OOM. work_mem
maintanece_workmem = work_mem. use create index concurrently
vacum_cost_limit = 100
vacum_cost_deltay = 0
wal_level = replica
max_wal_size = 16G
checkpoint_timeout = 60min
autovacuum_vacuum_threshold = 50
autovacuum_vacuum_scale_factor = 0.05
autovacuum_naptime = 1s
autovacuum_max_workers = 10

Links


https://goteleport.com/blog/securing-postgres-postgresql/
https://gist.github.com/jcoleman/1e6ad1bf8de454c166da94b67537758b