Skip to content

Instantly share code, notes, and snippets.

@cabecada
Last active March 12, 2024 19:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cabecada/8024d98024559e9fc97ccfcb5324c09f to your computer and use it in GitHub Desktop.
Save cabecada/8024d98024559e9fc97ccfcb5324c09f to your computer and use it in GitHub Desktop.
corruption demo for blogs.
postgres@pg:~/udemy/16$ psql
psql (16.1 (Ubuntu 16.1-1.pgdg22.04+1))
Type "help" for help.
postgres=# drop table t;
DROP TABLE
postgres=# create table t(col1 text, col2 text);
CREATE TABLE
postgres=# insert into t select x::text, x::text from generate_series(1, 10) x;
INSERT 0 10
postgres=# select pg_relation_filepath('t');
pg_relation_filepath
----------------------
base/5/16588
(1 row)
postgres=# checkpoint;
CHECKPOINT
postgres=#
\q
postgres@pg:~/udemy/16$ pg_ctl -D db1 -l db1.log stop
waiting for server to shut down.... done
server stopped
postgres@pg:~/udemy/16$ cp db1/base/5/16588 /tmp/16588
postgres@pg:~/udemy/16$ pg_filedump -D text,text db1/base/5/16588
*******************************************************************
* PostgreSQL File/Block Formatted Dump Utility
*
* File: db1/base/5/16588
* Options used: -D text,text
*******************************************************************
Block 0 ********************************************************
<Header> -----
Block Offset: 0x00000000 Offsets: Lower 64 (0x0040)
Block: Size 8192 Version 4 Upper 7872 (0x1ec0)
LSN: logid 20 recoff 0xd6a3a000 Special 8192 (0x2000)
Items: 10 Free Space: 7808
Checksum: 0x3cf4 Prune XID: 0x00000000 Flags: 0x0000 ()
Length (including item array): 64
<Data> -----
Item 1 -- Length: 28 Offset: 8160 (0x1fe0) Flags: NORMAL
COPY: 1 1
Item 2 -- Length: 28 Offset: 8128 (0x1fc0) Flags: NORMAL
COPY: 2 2
Item 3 -- Length: 28 Offset: 8096 (0x1fa0) Flags: NORMAL
COPY: 3 3
Item 4 -- Length: 28 Offset: 8064 (0x1f80) Flags: NORMAL
COPY: 4 4
Item 5 -- Length: 28 Offset: 8032 (0x1f60) Flags: NORMAL
COPY: 5 5
Item 6 -- Length: 28 Offset: 8000 (0x1f40) Flags: NORMAL
COPY: 6 6
Item 7 -- Length: 28 Offset: 7968 (0x1f20) Flags: NORMAL
COPY: 7 7
Item 8 -- Length: 28 Offset: 7936 (0x1f00) Flags: NORMAL
COPY: 8 8
Item 9 -- Length: 28 Offset: 7904 (0x1ee0) Flags: NORMAL
COPY: 9 9
Item 10 -- Length: 30 Offset: 7872 (0x1ec0) Flags: NORMAL
COPY: 10 10
*** End of File Encountered. Last Block Read: 0 ***
postgres@pg:~/udemy/16$ #now try corrupting the pages by writing zeros to 28 bytes at offset 7936 bytes on the page
postgres@pg:~/udemy/16$ dd if=/dev/zero seek=7936 bs=1 of=db1/base/5/16588 oflag=seek_bytes conv=notrunc,noerror count=28
28+0 records in
28+0 records out
28 bytes copied, 0.000205796 s, 136 kB/s
#confirm via pg_filedump we cannot read those bytes, but others
postgres@pg:~/udemy/16$ pg_filedump -D text,text db1/base/5/16588
*******************************************************************
* PostgreSQL File/Block Formatted Dump Utility
*
* File: db1/base/5/16588
* Options used: -D text,text
*******************************************************************
Block 0 ********************************************************
<Header> -----
Block Offset: 0x00000000 Offsets: Lower 64 (0x0040)
Block: Size 8192 Version 4 Upper 7872 (0x1ec0)
LSN: logid 20 recoff 0xd6a3a000 Special 8192 (0x2000)
Items: 10 Free Space: 7808
Checksum: 0x3cf4 Prune XID: 0x00000000 Flags: 0x0000 ()
Length (including item array): 64
<Data> -----
Item 1 -- Length: 28 Offset: 8160 (0x1fe0) Flags: NORMAL
COPY: 1 1
Item 2 -- Length: 28 Offset: 8128 (0x1fc0) Flags: NORMAL
COPY: 2 2
Item 3 -- Length: 28 Offset: 8096 (0x1fa0) Flags: NORMAL
COPY: 3 3
Item 4 -- Length: 28 Offset: 8064 (0x1f80) Flags: NORMAL
COPY: 4 4
Item 5 -- Length: 28 Offset: 8032 (0x1f60) Flags: NORMAL
COPY: 5 5
Item 6 -- Length: 28 Offset: 8000 (0x1f40) Flags: NORMAL
COPY: 6 6
Item 7 -- Length: 28 Offset: 7968 (0x1f20) Flags: NORMAL
COPY: 7 7
Item 8 -- Length: 28 Offset: 7936 (0x1f00) Flags: NORMAL
Error: unable to decode a tuple, callback #1 returned -1. Partial data:
Item 9 -- Length: 28 Offset: 7904 (0x1ee0) Flags: NORMAL
COPY: 9 9
Item 10 -- Length: 30 Offset: 7872 (0x1ec0) Flags: NORMAL
COPY: 10 10
*** End of File Encountered. Last Block Read: 0 ***
#also check that we fail to query the table due to the same problem
postgres@pg:~/udemy/16$ pg_ctl -D db1 -l db1.log start
waiting for server to start.... done
server started
postgres@pg:~/udemy/16$ psql
psql (16.1 (Ubuntu 16.1-1.pgdg22.04+1))
Type "help" for help.
postgres=# select * from t;
WARNING: page verification failed, calculated checksum 24209 but expected 15604
ERROR: invalid page in block 0 of relation base/5/16588
postgres=#
\q
#stop db, for ease of understanding the recovery via two options
postgres@pg:~/udemy/16$ pg_ctl -D db1 -l db1.log stop
waiting for server to shut down.... done
server stopped
#one is assuming we figured out which block is corrupt, we can just copy the content that were broken and recover
postgres@pg:~/udemy/16$ dd if=/tmp/16588 skip=7936 iflag=skip_bytes seek=7936 bs=1 of=db1/base/5/16588 oflag=seek_bytes conv=notrunc,noerror count=28
28+0 records in
28+0 records out
28 bytes copied, 0.000138398 s, 202 kB/s
#confirm we can dump the data page
postgres@pg:~/udemy/16$ pg_filedump -D text,text db1/base/5/16588
*******************************************************************
* PostgreSQL File/Block Formatted Dump Utility
*
* File: db1/base/5/16588
* Options used: -D text,text
*******************************************************************
Block 0 ********************************************************
<Header> -----
Block Offset: 0x00000000 Offsets: Lower 64 (0x0040)
Block: Size 8192 Version 4 Upper 7872 (0x1ec0)
LSN: logid 20 recoff 0xd6a3a000 Special 8192 (0x2000)
Items: 10 Free Space: 7808
Checksum: 0x3cf4 Prune XID: 0x00000000 Flags: 0x0000 ()
Length (including item array): 64
<Data> -----
Item 1 -- Length: 28 Offset: 8160 (0x1fe0) Flags: NORMAL
COPY: 1 1
Item 2 -- Length: 28 Offset: 8128 (0x1fc0) Flags: NORMAL
COPY: 2 2
Item 3 -- Length: 28 Offset: 8096 (0x1fa0) Flags: NORMAL
COPY: 3 3
Item 4 -- Length: 28 Offset: 8064 (0x1f80) Flags: NORMAL
COPY: 4 4
Item 5 -- Length: 28 Offset: 8032 (0x1f60) Flags: NORMAL
COPY: 5 5
Item 6 -- Length: 28 Offset: 8000 (0x1f40) Flags: NORMAL
COPY: 6 6
Item 7 -- Length: 28 Offset: 7968 (0x1f20) Flags: NORMAL
COPY: 7 7
Item 8 -- Length: 28 Offset: 7936 (0x1f00) Flags: NORMAL
COPY: 8 8
Item 9 -- Length: 28 Offset: 7904 (0x1ee0) Flags: NORMAL
COPY: 9 9
Item 10 -- Length: 30 Offset: 7872 (0x1ec0) Flags: NORMAL
COPY: 10 10
*** End of File Encountered. Last Block Read: 0 ***
#confirm from reading the table too
postgres@pg:~/udemy/16$ pg_ctl -D db1 -l db1.log start
waiting for server to start.... done
server started
postgres@pg:~/udemy/16$ psql
psql (16.1 (Ubuntu 16.1-1.pgdg22.04+1))
Type "help" for help.
postgres=# select * from t;
col1 | col2
------+------
1 | 1
2 | 2
3 | 3
4 | 4
5 | 5
6 | 6
7 | 7
8 | 8
9 | 9
10 | 10
(10 rows)
postgres=#
\q
#option two for corruption test, where we dont have backups but only try to get available data
postgres@pg:~/udemy/16$ pg_ctl -D db1 -l db1.log stop
waiting for server to shut down.... done
server stopped
postgres@pg:~/udemy/16$ dd if=/dev/zero seek=7936 bs=1 of=db1/base/5/16588 oflag=seek_bytes conv=notrunc,noerror count=28
28+0 records in
28+0 records out
28 bytes copied, 0.000262996 s, 106 kB/s
#try reading corrupted / zeroed page, error
postgres@pg:~/udemy/16$ pg_filedump -D text,text db1/base/5/16588
*******************************************************************
* PostgreSQL File/Block Formatted Dump Utility
*
* File: db1/base/5/16588
* Options used: -D text,text
*******************************************************************
Block 0 ********************************************************
<Header> -----
Block Offset: 0x00000000 Offsets: Lower 64 (0x0040)
Block: Size 8192 Version 4 Upper 7872 (0x1ec0)
LSN: logid 20 recoff 0xd6a60b38 Special 8192 (0x2000)
Items: 10 Free Space: 7808
Checksum: 0x1bb1 Prune XID: 0x00000000 Flags: 0x0000 ()
Length (including item array): 64
<Data> -----
Item 1 -- Length: 28 Offset: 8160 (0x1fe0) Flags: NORMAL
COPY: 1 1
Item 2 -- Length: 28 Offset: 8128 (0x1fc0) Flags: NORMAL
COPY: 2 2
Item 3 -- Length: 28 Offset: 8096 (0x1fa0) Flags: NORMAL
COPY: 3 3
Item 4 -- Length: 28 Offset: 8064 (0x1f80) Flags: NORMAL
COPY: 4 4
Item 5 -- Length: 28 Offset: 8032 (0x1f60) Flags: NORMAL
COPY: 5 5
Item 6 -- Length: 28 Offset: 8000 (0x1f40) Flags: NORMAL
COPY: 6 6
Item 7 -- Length: 28 Offset: 7968 (0x1f20) Flags: NORMAL
COPY: 7 7
Item 8 -- Length: 28 Offset: 7936 (0x1f00) Flags: NORMAL
Error: unable to decode a tuple, callback #1 returned -1. Partial data:
Item 9 -- Length: 28 Offset: 7904 (0x1ee0) Flags: NORMAL
COPY: 9 9
Item 10 -- Length: 30 Offset: 7872 (0x1ec0) Flags: NORMAL
COPY: 10 10
#in this case we copy all the data to the file in a format that we can load as csv. we assume we know the schema
*** End of File Encountered. Last Block Read: 0 ***
postgres@pg:~/udemy/16$ pg_filedump -D text,text db1/base/5/16588 | grep COPY | perl -nle 's/COPY://; s/^\s+//; s/\s/,/; print $_'
1,1
2,2
3,3
4,4
5,5
6,6
7,7
9,9
10,10
postgres@pg:~/udemy/16$ #note 8 skipped
postgres@pg:~/udemy/16$ pg_filedump -D text,text db1/base/5/16588 | grep COPY | perl -nle 's/COPY://; s/^\s+//; s/\s/,/; print $_' > /tmp/foo
postgres@pg:~/udemy/16$ cat /tmp/foo
1,1
2,2
3,3
4,4
5,5
6,6
7,7
9,9
10,10
postgres@pg:~/udemy/16$ pg_ctl -D db1 -l db1.log start
waiting for server to start.... done
server started
#we cannot query table as corruption, but given zero page skip flag, we can query the table.
#but since the data we have is all on the same page, we lose everything
postgres@pg:~/udemy/16$ psql
psql (16.1 (Ubuntu 16.1-1.pgdg22.04+1))
Type "help" for help.
postgres=# select * from t;
WARNING: page verification failed, calculated checksum 32985 but expected 7089
ERROR: invalid page in block 0 of relation base/5/16588
postgres=# set zero_damaged_pages TO on;
SET
postgres=# select * from t;
WARNING: page verification failed, calculated checksum 32985 but expected 7089
WARNING: invalid page in block 0 of relation base/5/16588; zeroing out page
col1 | col2
------+------
(0 rows)
postgres=# vacuum full t;
VACUUM
#so we vacuum full the table after zero_damaged_pages enabled, and then restore data from earlier copied data from pg_filedump
postgres=# copy t from '/tmp/foo' delimiter ',' CSV; -- restore from filedump
COPY 9
-- note we lose the datapoint 8,8
postgres=# select * from t;
col1 | col2
------+------
1 | 1
2 | 2
3 | 3
4 | 4
5 | 5
6 | 6
7 | 7
9 | 9
10 | 10
(9 rows)
postgres=#
\q
postgres@pg:~/udemy/16$ pg_ctl -D db1 -l db1.log stop
waiting for server to shut down.... done
server stopped
postgres@pg:~/udemy/16$ pg_ctl -D db1 -l db1.log start
waiting for server to start.... done
server started
postgres@pg:~/udemy/16$ psql
psql (16.1 (Ubuntu 16.1-1.pgdg22.04+1))
Type "help" for help.
postgres=# truncate table t;
TRUNCATE TABLE
postgres=#
postgres=# insert into t select x::text, x::text from generate_series(1, 50000000) x;
INSERT 0 50000000
postgres=# select pg_relation_size('t'::regclass)::bigint;
pg_relation_size
------------------
2529943552
(1 row)
postgres=# show block_size;
block_size
------------
8192
(1 row)
postgres=# select 2529943552 / 8192; -- no of blocks
?column?
----------
308831
(1 row)
postgres=# -- now postgres stores datafiles in chunks of 1gb
postgres=# select 1024 * 1024 * 1024 / 8192 ; -- no of blocks in 1gb
?column?
----------
131072
(1 row)
postgres=# select 308831 / 131072 ; -- total number of chunks for the object t
?column?
----------
2
(1 row)
postgres=# -- lets confirm on disk
postgres=#
\q
postgres@pg:~/udemy/16$ psql
psql (16.1 (Ubuntu 16.1-1.pgdg22.04+1))
Type "help" for help.
postgres=# select pg_relation_filepath('t');
pg_relation_filepath
----------------------
base/5/16601
(1 row)
postgres=#
\q
postgres@pg:~/udemy/16$ ls -l db1/base/5/16601* | egrep -v 'fsm|vm'
-rw------- 1 postgres postgres 1073741824 Mar 13 00:38 db1/base/5/16601
-rw------- 1 postgres postgres 1073741824 Mar 13 00:38 db1/base/5/16601.1
-rw------- 1 postgres postgres 382459904 Mar 13 00:39 db1/base/5/16601.2
postgres@pg:~/udemy/16$ dd if=db1/base/5/16601 of=/dev/null bs=8k
131072+0 records in
131072+0 records out
1073741824 bytes (1.1 GB, 1.0 GiB) copied, 1.37884 s, 779 MB/s
postgres@pg:~/udemy/16$ dd if=db1/base/5/16601.1 of=/dev/null bs=8k
131072+0 records in
131072+0 records out
1073741824 bytes (1.1 GB, 1.0 GiB) copied, 1.25323 s, 857 MB/s
postgres@pg:~/udemy/16$ dd if=db1/base/5/16601.2 of=/dev/null bs=8k
46687+0 records in
46687+0 records out
382459904 bytes (382 MB, 365 MiB) copied, 0.543619 s, 704 MB/s
postgres@pg:~/udemy/16$ echo 131072 + 131072 + 46687 | bc -l
308831
postgres@pg:~/udemy/16$ #confirmed blocks as above
postgres@pg:~/udemy/16$ echo 1073741824 + 1073741824 + 382459904 | bc -l
2529943552
postgres@pg:~/udemy/16$ #confirmed size as above