Skip to content

Instantly share code, notes, and snippets.

@stalkerg
Last active November 17, 2016 11:46
Show Gist options
  • Save stalkerg/ab833d94e2f64df241f1835651e06e4b to your computer and use it in GitHub Desktop.
Save stalkerg/ab833d94e2f64df241f1835651e06e4b to your computer and use it in GitHub Desktop.
ptrack patch v7 for Postgres 10
--- src/backend/access/heap/Makefile
+++ src/backend/access/heap/Makefile
@@ -12,6 +12,6 @@ subdir = src/backend/access/heap
top_builddir = ../../../..
include $(top_builddir)/src/Makefile.global
-OBJS = heapam.o hio.o pruneheap.o rewriteheap.o syncscan.o tuptoaster.o visibilitymap.o
+OBJS = heapam.o hio.o pruneheap.o rewriteheap.o syncscan.o tuptoaster.o visibilitymap.o ptrack.o
include $(top_srcdir)/src/backend/common.mk
--- src/backend/access/heap/ptrack.c
+++ src/backend/access/heap/ptrack.c
@@ -0,0 +1,566 @@
+#include "postgres.h"
+
+#include "access/heapam_xlog.h"
+#include "access/heapam.h"
+#include "access/ptrack.h"
+#include "access/xlog.h"
+#include "access/xlogutils.h"
+#include "access/skey.h"
+#include "access/genam.h"
+#include "catalog/pg_depend.h"
+#include "access/htup_details.h"
+#include "miscadmin.h"
+#include "storage/bufmgr.h"
+#include "storage/lmgr.h"
+#include "storage/smgr.h"
+#include "utils/inval.h"
+#include "utils/array.h"
+#include "utils/relfilenodemap.h"
+#include <unistd.h>
+#include <sys/stat.h>
+
+/* Effective data size */
+#define MAPSIZE (BLCKSZ - MAXALIGN(SizeOfPageHeaderData))
+
+/* Number of bits allocated for each heap block. */
+#define BITS_PER_HEAPBLOCK 1
+
+/* Number of heap blocks we can represent in one byte. */
+#define HEAPBLOCKS_PER_BYTE 8
+
+#define HEAPBLK_TO_MAPBLOCK(x) ((x) / HEAPBLOCKS_PER_PAGE)
+#define HEAPBLK_TO_MAPBYTE(x) (((x) % HEAPBLOCKS_PER_PAGE) / HEAPBLOCKS_PER_BYTE)
+#define HEAPBLK_TO_MAPBIT(x) ((x) % HEAPBLOCKS_PER_BYTE)
+
+#define HEAPBLOCKS_PER_PAGE (MAPSIZE * HEAPBLOCKS_PER_BYTE)
+
+typedef struct BlockTrack
+{
+ BlockNumber block_number;
+ RelFileNode rel;
+} BlockTrack;
+
+static BlockTrack blocks_track[XLR_MAX_BLOCK_ID];
+unsigned int blocks_track_count = 0;
+bool ptrack_enable = false;
+
+static Buffer ptrack_readbuf(RelFileNode rnode, BlockNumber blkno, bool extend);
+static void ptrack_extend(SMgrRelation smgr, BlockNumber nvmblocks);
+static void ptrack_set(BlockNumber heapBlk, Buffer vmBuf);
+void SetPtrackClearLSN(bool set_invalid);
+Datum pg_ptrack_test(PG_FUNCTION_ARGS);
+
+/* Tracking memory block inside critical zone */
+void
+ptrack_add_block(BlockNumber block_number, RelFileNode rel)
+{
+ BlockTrack *bt = &blocks_track[blocks_track_count];
+ bt->block_number = block_number;
+ bt->rel = rel;
+ blocks_track_count++;
+ Assert(blocks_track_count < XLR_MAX_BLOCK_ID);
+}
+
+/* Save tracked memory block after end of critical zone */
+void
+ptrack_save(void)
+{
+ Buffer pbuf = InvalidBuffer;
+ unsigned int i;
+
+ for (i = 0; i < blocks_track_count; i++)
+ {
+ BlockTrack *bt = &blocks_track[i];
+ BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(bt->block_number);
+
+ /* Reuse the old pinned buffer if possible */
+ if (BufferIsValid(pbuf))
+ {
+ if (BufferGetBlockNumber(pbuf) == mapBlock)
+ goto set_bit;
+ else
+ ReleaseBuffer(pbuf);
+ }
+
+ pbuf = ptrack_readbuf(bt->rel, mapBlock, true);
+ set_bit:
+ ptrack_set(bt->block_number, pbuf);
+ }
+ if (pbuf != InvalidBuffer)
+ ReleaseBuffer(pbuf);
+
+ blocks_track_count = 0;
+}
+
+/* Set one bit to buffer */
+void
+ptrack_set(BlockNumber heapBlk, Buffer vmBuf)
+{
+ BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
+ uint32 mapByte = HEAPBLK_TO_MAPBYTE(heapBlk);
+ uint8 mapBit = HEAPBLK_TO_MAPBIT(heapBlk);
+ Page page;
+ char *map;
+
+ /* Check that we have the right VM page pinned */
+ if (!BufferIsValid(vmBuf) || BufferGetBlockNumber(vmBuf) != mapBlock)
+ elog(ERROR, "wrong VM buffer passed to ptrack_set");
+ page = BufferGetPage(vmBuf);
+ map = PageGetContents(page);
+ LockBuffer(vmBuf, BUFFER_LOCK_SHARE);
+
+ if (!(map[mapByte] & (1 << mapBit)))
+ {
+ /* Bad luck. Take an exclusive lock now after unlock share.*/
+ LockBuffer(vmBuf, BUFFER_LOCK_UNLOCK);
+ LockBuffer(vmBuf, BUFFER_LOCK_EXCLUSIVE);
+ if (!(map[mapByte] & (1 << mapBit)))
+ {
+ START_CRIT_SECTION();
+
+ map[mapByte] |= (1 << mapBit);
+ MarkBufferDirty(vmBuf);
+
+ END_CRIT_SECTION_WITHOUT_TRACK();
+ }
+ }
+
+ LockBuffer(vmBuf, BUFFER_LOCK_UNLOCK);
+}
+
+static Buffer
+ptrack_readbuf(RelFileNode rnode, BlockNumber blkno, bool extend)
+{
+ Buffer buf;
+
+ SMgrRelation smgr = smgropen(rnode, InvalidBackendId);
+
+ /*
+ * If we haven't cached the size of the ptrack map fork yet, check it
+ * first.
+ */
+ if (smgr->smgr_ptrack_nblocks == InvalidBlockNumber)
+ {
+ if (smgrexists(smgr, PAGESTRACK_FORKNUM))
+ smgr->smgr_ptrack_nblocks = smgrnblocks(smgr,
+ PAGESTRACK_FORKNUM);
+ else
+ smgr->smgr_ptrack_nblocks = 0;
+ }
+ /* Handle requests beyond EOF */
+ if (blkno >= smgr->smgr_ptrack_nblocks)
+ {
+ if (extend)
+ ptrack_extend(smgr, blkno + 1);
+ else
+ return InvalidBuffer;
+ }
+
+ /*
+ * Use ZERO_ON_ERROR mode, and initialize the page if necessary. It's
+ * always safe to clear bits, so it's better to clear corrupt pages than
+ * error out.
+ */
+ buf = ReadBufferWithoutRelcache2(smgr, PAGESTRACK_FORKNUM, blkno,
+ RBM_ZERO_ON_ERROR, NULL);
+
+ if (PageIsNew(BufferGetPage(buf)))
+ {
+ Page pg = BufferGetPage(buf);
+ PageInit(pg, BLCKSZ, 0);
+ }
+ return buf;
+}
+
+static void
+ptrack_extend(SMgrRelation smgr, BlockNumber vm_nblocks)
+{
+ BlockNumber vm_nblocks_now;
+ Page pg;
+
+ pg = (Page) palloc(BLCKSZ);
+ PageInit(pg, BLCKSZ, 0);
+
+ LockSmgrForExtension(smgr, ExclusiveLock);
+ /*
+ * Create the file first if it doesn't exist. If smgr_ptrack_nblocks is
+ * positive then it must exist, no need for an smgrexists call.
+ */
+ if ((smgr->smgr_ptrack_nblocks == 0 ||
+ smgr->smgr_ptrack_nblocks == InvalidBlockNumber) &&
+ !smgrexists(smgr, PAGESTRACK_FORKNUM))
+ smgrcreate(smgr, PAGESTRACK_FORKNUM, false);
+
+ vm_nblocks_now = smgrnblocks(smgr, PAGESTRACK_FORKNUM);
+
+ /* Now extend the file */
+ while (vm_nblocks_now < vm_nblocks)
+ {
+ PageSetChecksumInplace(pg, vm_nblocks_now);
+ smgrextend(smgr, PAGESTRACK_FORKNUM, vm_nblocks_now,
+ (char *) pg, false);
+ vm_nblocks_now++;
+ }
+ /*
+ * Send a shared-inval message to force other backends to close any smgr
+ * references they may have for this rel, which we are about to change.
+ * This is a useful optimization because it means that backends don't have
+ * to keep checking for creation or extension of the file, which happens
+ * infrequently.
+ */
+ CacheInvalidateSmgr(smgr->smgr_rnode);
+ /* Update local cache with the up-to-date size */
+ smgr->smgr_ptrack_nblocks = vm_nblocks_now;
+
+ pfree(pg);
+
+ UnlockSmgrForExtension(smgr, ExclusiveLock);
+}
+
+/* Clear all ptrack files */
+void
+ptrack_clear(void)
+{
+ HeapTuple tuple;
+ Relation catalog = heap_open(RelationRelationId, AccessShareLock);
+ SysScanDesc scan = systable_beginscan(catalog, InvalidOid, false, NULL, 0, NULL);
+
+ while (HeapTupleIsValid(tuple = systable_getnext(scan)))
+ {
+ BlockNumber nblock;
+ Relation rel = RelationIdGetRelation(HeapTupleGetOid(tuple));
+
+ RelationOpenSmgr(rel);
+ if (rel->rd_smgr == NULL)
+ goto end_rel;
+
+ LockSmgrForExtension(rel->rd_smgr, ExclusiveLock);
+
+ if (rel->rd_smgr->smgr_ptrack_nblocks == InvalidBlockNumber)
+ {
+ if (smgrexists(rel->rd_smgr, PAGESTRACK_FORKNUM))
+ rel->rd_smgr->smgr_ptrack_nblocks = smgrnblocks(rel->rd_smgr,
+ PAGESTRACK_FORKNUM);
+ else
+ rel->rd_smgr->smgr_ptrack_nblocks = 0;
+ }
+
+ for(nblock = 0; nblock < rel->rd_smgr->smgr_ptrack_nblocks; nblock++)
+ {
+ Buffer buf = ReadBufferExtended(rel, PAGESTRACK_FORKNUM,
+ nblock, RBM_ZERO_ON_ERROR, NULL);
+ Page page = BufferGetPage(buf);
+ char *map = PageGetContents(page);
+ LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
+ START_CRIT_SECTION();
+ MemSet(map, 0, MAPSIZE);
+ MarkBufferDirty(buf);
+ END_CRIT_SECTION_WITHOUT_TRACK();
+ LockBuffer(buf, BUFFER_LOCK_UNLOCK);
+ ReleaseBuffer(buf);
+ }
+
+ UnlockSmgrForExtension(rel->rd_smgr, ExclusiveLock);
+ end_rel:
+ RelationClose(rel);
+ }
+
+ systable_endscan(scan);
+ heap_close(catalog, AccessShareLock);
+
+ SetPtrackClearLSN(false);
+}
+
+/* Get ptrack file as bytea and clear him */
+bytea *
+ptrack_get_and_clear(Oid tablespace_oid, Oid table_oid)
+{
+ bytea *result = NULL;
+ BlockNumber nblock;
+
+ Relation rel = RelationIdGetRelation(RelidByRelfilenode(tablespace_oid,
+ table_oid));
+
+ if (table_oid == InvalidOid)
+ {
+ elog(WARNING, "InvalidOid");
+ goto full_end;
+ }
+
+
+
+ if (rel == InvalidRelation)
+ {
+ elog(WARNING, "InvalidRelation");
+ goto full_end;
+ }
+
+ RelationOpenSmgr(rel);
+ if (rel->rd_smgr == NULL)
+ goto end_rel;
+
+ LockSmgrForExtension(rel->rd_smgr, ExclusiveLock);
+
+ if (rel->rd_smgr->smgr_ptrack_nblocks == InvalidBlockNumber)
+ {
+ if (smgrexists(rel->rd_smgr, PAGESTRACK_FORKNUM))
+ rel->rd_smgr->smgr_ptrack_nblocks = smgrnblocks(rel->rd_smgr,
+ PAGESTRACK_FORKNUM);
+ else
+ rel->rd_smgr->smgr_ptrack_nblocks = 0;
+ }
+ if (rel->rd_smgr->smgr_ptrack_nblocks == 0)
+ {
+ UnlockSmgrForExtension(rel->rd_smgr, ExclusiveLock);
+ goto end_rel;
+ }
+ result = (bytea *) palloc(rel->rd_smgr->smgr_ptrack_nblocks*MAPSIZE + VARHDRSZ);
+ SET_VARSIZE(result, rel->rd_smgr->smgr_ptrack_nblocks*MAPSIZE + VARHDRSZ);
+
+ for(nblock = 0; nblock < rel->rd_smgr->smgr_ptrack_nblocks; nblock++)
+ {
+ Buffer buf = ReadBufferExtended(rel, PAGESTRACK_FORKNUM,
+ nblock, RBM_ZERO_ON_ERROR, NULL);
+ Page page = BufferGetPage(buf);
+ char *map = PageGetContents(page);
+ LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
+ START_CRIT_SECTION();
+ memcpy(VARDATA(result) + nblock*MAPSIZE, map, MAPSIZE);
+ MemSet(map, 0, MAPSIZE);
+ MarkBufferDirty(buf);
+ END_CRIT_SECTION_WITHOUT_TRACK();
+ LockBuffer(buf, BUFFER_LOCK_UNLOCK);
+ ReleaseBuffer(buf);
+ }
+
+ UnlockSmgrForExtension(rel->rd_smgr, ExclusiveLock);
+ end_rel:
+ RelationClose(rel);
+
+ SetPtrackClearLSN(false);
+ full_end:
+ if (result == NULL)
+ {
+ result = palloc0(VARHDRSZ);
+ SET_VARSIZE(result, VARHDRSZ);
+ }
+
+ return result;
+}
+
+void
+SetPtrackClearLSN(bool set_invalid)
+{
+ int fd;
+ XLogRecPtr ptr;
+ char file_path[MAXPGPATH];
+ if (set_invalid)
+ ptr = InvalidXLogRecPtr;
+ else
+ ptr = GetXLogInsertRecPtr();
+
+ join_path_components(file_path, DataDir, "global/ptrack_control");
+ canonicalize_path(file_path);
+
+ fd = BasicOpenFile(file_path,
+ O_RDWR | O_CREAT | PG_BINARY,
+ S_IRUSR | S_IWUSR);
+ if (fd < 0)
+ ereport(PANIC,
+ (errcode_for_file_access(),
+ errmsg("could not create ptrack control file \"%s\": %m",
+ "global/ptrack_control")));
+
+ errno = 0;
+ if (write(fd, &ptr, sizeof(XLogRecPtr)) != sizeof(XLogRecPtr))
+ {
+ /* if write didn't set errno, assume problem is no disk space */
+ if (errno == 0)
+ errno = ENOSPC;
+ ereport(PANIC,
+ (errcode_for_file_access(),
+ errmsg("could not write to ptrack control file: %m")));
+ }
+
+ if (pg_fsync(fd) != 0)
+ ereport(PANIC,
+ (errcode_for_file_access(),
+ errmsg("could not fsync ptrack control file: %m")));
+
+ if (close(fd))
+ ereport(PANIC,
+ (errcode_for_file_access(),
+ errmsg("could not close ptrack control file: %m")));
+}
+
+/* Test ptrack file */
+Datum
+pg_ptrack_test(PG_FUNCTION_ARGS)
+{
+ Oid relation_oid = PG_GETARG_OID(0);
+ BlockNumber nblock, num_blocks;
+ Relation rel;
+ XLogRecPtr ptrack_control_lsn;
+ Buffer ptrack_buf = InvalidBuffer;
+ Page page;
+ char *map;
+ int fd;
+ unsigned int excess_data_counter = 0;
+ unsigned int necessary_data_counter = 0;
+ ArrayType *result_array;
+ Datum result_elems[2];
+
+ if (!superuser() && !has_rolreplication(GetUserId()))
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ (errmsg("must be superuser or replication role to clear ptrack files"))));
+
+ /* get LSN from ptrack_control file */
+ fd = BasicOpenFile("global/ptrack_control",
+ O_RDONLY | PG_BINARY,
+ 0);
+
+ if (fd < 0)
+ ereport(PANIC,
+ (errcode_for_file_access(),
+ errmsg("could not open ptrack control file \"%s\": %m",
+ "global/ptrack_control")));
+ errno = 0;
+ if (read(fd, &ptrack_control_lsn, sizeof(XLogRecPtr)) != sizeof(XLogRecPtr))
+ {
+ /* if write didn't set errno, assume problem is no disk space */
+ if (errno == 0)
+ errno = ENOSPC;
+
+ ereport(PANIC,
+ (errcode_for_file_access(),
+ errmsg("could not read to ptrack control file: %m")));
+ }
+
+ if (close(fd))
+ ereport(PANIC,
+ (errcode_for_file_access(),
+ errmsg("could not close ptrack control file: %m")));
+
+ rel = RelationIdGetRelation(relation_oid);
+ if (rel == InvalidRelation)
+ {
+ elog(WARNING, "Relation not found.");
+ goto end_return;
+ }
+ LockRelationOid(relation_oid, AccessShareLock);
+ RelationOpenSmgr(rel);
+ if (rel->rd_smgr == NULL)
+ goto end_rel;
+
+ LockSmgrForExtension(rel->rd_smgr, ExclusiveLock);
+
+ num_blocks = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM);
+ if (rel->rd_smgr->smgr_ptrack_nblocks == InvalidBlockNumber)
+ {
+ if (smgrexists(rel->rd_smgr, PAGESTRACK_FORKNUM))
+ rel->rd_smgr->smgr_ptrack_nblocks = smgrnblocks(rel->rd_smgr, PAGESTRACK_FORKNUM);
+ else
+ rel->rd_smgr->smgr_ptrack_nblocks = 0;
+ }
+
+ for(nblock = 0; nblock < num_blocks; nblock++)
+ {
+ Buffer main_buf = ReadBufferExtended(rel,
+ MAIN_FORKNUM,
+ nblock,
+ RBM_ZERO_ON_ERROR,
+ NULL);
+ Page main_page;
+ XLogRecPtr main_page_lsn;
+
+ BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(nblock);
+ uint32 mapByte = HEAPBLK_TO_MAPBYTE(nblock);
+ uint8 mapBit = HEAPBLK_TO_MAPBIT(nblock);
+
+ /* Get page lsn */
+ LockBuffer(main_buf, BUFFER_LOCK_SHARE);
+ main_page = BufferGetPage(main_buf);
+ main_page_lsn = PageGetLSN(main_page);
+ LockBuffer(main_buf, BUFFER_LOCK_UNLOCK);
+ ReleaseBuffer(main_buf);
+
+ /* Reuse the old pinned buffer if possible */
+ if (BufferIsValid(ptrack_buf))
+ {
+ if (BufferGetBlockNumber(ptrack_buf) == mapBlock)
+ goto read_bit;
+ else
+ ReleaseBuffer(ptrack_buf);
+ }
+ ptrack_buf = ptrack_readbuf(rel->rd_node, mapBlock, false);
+
+ read_bit:
+ if (ptrack_buf == InvalidBuffer)
+ {
+ /* not tracked data */
+ if(ptrack_control_lsn < main_page_lsn)
+ {
+ necessary_data_counter++;
+ elog(WARNING, "Block %ud not track. Ptrack lsn:%X/%X page lsn:%X/%X",
+ nblock,
+ (uint32) (ptrack_control_lsn >> 32),
+ (uint32) ptrack_control_lsn,
+ (uint32) (main_page_lsn >> 32),
+ (uint32) main_page_lsn);
+ }
+ else
+ continue;
+ }
+
+ page = BufferGetPage(ptrack_buf);
+ map = PageGetContents(page);
+ LockBuffer(ptrack_buf, BUFFER_LOCK_SHARE);
+ if(map[mapByte] & (1 << mapBit))
+ {
+ /* excess data */
+ if (ptrack_control_lsn >= main_page_lsn)
+ {
+ excess_data_counter++;
+ elog(WARNING, "Block %ud not needed. Ptrack lsn:%X/%X page lsn:%X/%X",
+ nblock,
+ (uint32) (ptrack_control_lsn >> 32),
+ (uint32) ptrack_control_lsn,
+ (uint32) (main_page_lsn >> 32),
+ (uint32) main_page_lsn);
+ }
+ }
+ /* not tracked data */
+ else if (ptrack_control_lsn < main_page_lsn)
+ {
+ necessary_data_counter++;
+ elog(WARNING, "Block %ud not tracked. Ptrack lsn:%X/%X page lsn:%X/%X",
+ nblock,
+ (uint32) (ptrack_control_lsn >> 32),
+ (uint32) ptrack_control_lsn,
+ (uint32) (main_page_lsn >> 32),
+ (uint32) main_page_lsn);
+ }
+ LockBuffer(ptrack_buf, BUFFER_LOCK_UNLOCK);
+ }
+
+ end_rel:
+ if (ptrack_buf != InvalidBuffer)
+ ReleaseBuffer(ptrack_buf);
+ RelationClose(rel);
+ UnlockRelationOid(relation_oid, AccessShareLock);
+
+ end_return:
+ result_elems[0] = UInt32GetDatum(excess_data_counter);
+ result_elems[1] = UInt32GetDatum(necessary_data_counter);
+ result_array = construct_array(result_elems, 2, 23, 4, true, 'i');
+ PG_RETURN_ARRAYTYPE_P(result_array);
+}
+
+void
+assign_ptrack_enable(bool newval, void *extra)
+{
+ if(DataDir != NULL && !IsBootstrapProcessingMode() && !newval)
+ SetPtrackClearLSN(true);
+}
+
--- src/backend/access/transam/xlog.c
+++ src/backend/access/transam/xlog.c
@@ -6794,6 +6794,7 @@ StartupXLOG(void)
do
{
bool switchedTLI = false;
+ int nblock;
#ifdef WAL_DEBUG
if (XLOG_DEBUG ||
@@ -6947,6 +6948,17 @@ StartupXLOG(void)
/* Pop the error context stack */
error_context_stack = errcallback.previous;
+ if (ptrack_enable)
+ for(nblock = 0; nblock < xlogreader->max_block_id; nblock++)
+ {
+ if(xlogreader->blocks[nblock].forknum == MAIN_FORKNUM)
+ {
+ ptrack_add_block(xlogreader->blocks[nblock].blkno,
+ xlogreader->blocks[nblock].rnode);
+ ptrack_save();
+ }
+ }
+
/*
* Update lastReplayedEndRecPtr after this record has been
* successfully replayed.
--- src/backend/access/transam/xlogfuncs.c
+++ src/backend/access/transam/xlogfuncs.c
@@ -168,7 +168,6 @@ pg_stop_backup(PG_FUNCTION_ARGS)
PG_RETURN_LSN(stoppoint);
}
-
/*
* pg_stop_backup_v2: finish taking exclusive or nonexclusive on-line backup.
*
@@ -276,6 +275,30 @@ pg_stop_backup_v2(PG_FUNCTION_ARGS)
return (Datum) 0;
}
+Datum
+pg_ptrack_clear(PG_FUNCTION_ARGS)
+{
+ if (!superuser() && !has_rolreplication(GetUserId()))
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ (errmsg("must be superuser or replication role to clear ptrack files"))));
+
+ ptrack_clear();
+
+ PG_RETURN_VOID();
+}
+
+Datum
+pg_ptrack_get_and_clear(PG_FUNCTION_ARGS)
+{
+ if (!superuser() && !has_rolreplication(GetUserId()))
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ (errmsg("must be superuser or replication role to clear ptrack files"))));
+
+ PG_RETURN_BYTEA_P(ptrack_get_and_clear(PG_GETARG_OID(0), PG_GETARG_OID(1)));
+}
+
/*
* pg_switch_xlog: switch to next xlog file
*
--- src/backend/access/transam/xloginsert.c
+++ src/backend/access/transam/xloginsert.c
@@ -23,6 +23,7 @@
#include "access/xlog.h"
#include "access/xlog_internal.h"
#include "access/xloginsert.h"
+#include "access/ptrack.h"
#include "catalog/pg_control.h"
#include "common/pg_lzcompress.h"
#include "miscadmin.h"
@@ -256,6 +257,10 @@ XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
#endif
regbuf->in_use = true;
+ if (ptrack_enable && regbuf->forkno == MAIN_FORKNUM)
+ {
+ ptrack_add_block(regbuf->block, regbuf->rnode);
+ }
}
/*
--- src/backend/catalog/storage.c
+++ src/backend/catalog/storage.c
@@ -238,6 +238,7 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
rel->rd_smgr->smgr_targblock = InvalidBlockNumber;
rel->rd_smgr->smgr_fsm_nblocks = InvalidBlockNumber;
rel->rd_smgr->smgr_vm_nblocks = InvalidBlockNumber;
+ rel->rd_smgr->smgr_ptrack_nblocks = InvalidBlockNumber;
/* Truncate the FSM first if it exists */
fsm = smgrexists(rel->rd_smgr, FSM_FORKNUM);
--- src/backend/storage/buffer/bufmgr.c
+++ src/backend/storage/buffer/bufmgr.c
@@ -693,6 +693,17 @@ ReadBufferWithoutRelcache(RelFileNode rnode, ForkNumber forkNum,
mode, strategy, &hit);
}
+Buffer
+ReadBufferWithoutRelcache2(SMgrRelation smgr, ForkNumber forkNum,
+ BlockNumber blockNum, ReadBufferMode mode,
+ BufferAccessStrategy strategy)
+{
+ bool hit;
+
+ return ReadBuffer_common(smgr, RELPERSISTENCE_PERMANENT, forkNum, blockNum,
+ mode, strategy, &hit);
+}
+
/*
* ReadBuffer_common -- common logic for all ReadBuffer variants
--- src/backend/storage/lmgr/lmgr.c
+++ src/backend/storage/lmgr/lmgr.c
@@ -390,6 +390,30 @@ UnlockRelationForExtension(Relation relation, LOCKMODE lockmode)
LockRelease(&tag, lockmode, false);
}
+void
+LockSmgrForExtension(SMgrRelation smgr, LOCKMODE lockmode)
+{
+ LOCKTAG tag;
+
+ SET_LOCKTAG_SMGR(tag, smgr->smgr_rnode.node.relNode);
+
+ (void) LockAcquire(&tag, lockmode, false, false);
+}
+
+/*
+ * UnlockRelationForExtension
+ */
+void
+UnlockSmgrForExtension(SMgrRelation smgr, LOCKMODE lockmode)
+{
+ LOCKTAG tag;
+
+ SET_LOCKTAG_SMGR(tag ,smgr->smgr_rnode.node.relNode);
+
+ LockRelease(&tag, lockmode, false);
+}
+
+
/*
* LockPage
*
@@ -1022,6 +1046,11 @@ DescribeLockTag(StringInfo buf, const LOCKTAG *tag)
tag->locktag_field3,
tag->locktag_field4);
break;
+ case LOCKTAG_SMGR:
+ appendStringInfo(buf,
+ _("smgr lock %u"),
+ tag->locktag_field1);
+ break;
default:
appendStringInfo(buf,
_("unrecognized locktag type %d"),
--- src/backend/storage/smgr/smgr.c
+++ src/backend/storage/smgr/smgr.c
@@ -170,6 +170,7 @@ smgropen(RelFileNode rnode, BackendId backend)
reln->smgr_targblock = InvalidBlockNumber;
reln->smgr_fsm_nblocks = InvalidBlockNumber;
reln->smgr_vm_nblocks = InvalidBlockNumber;
+ reln->smgr_ptrack_nblocks = InvalidBlockNumber;
reln->smgr_which = 0; /* we only have md.c at present */
/* mark it not open */
--- src/backend/utils/init/postinit.c
+++ src/backend/utils/init/postinit.c
@@ -24,6 +24,7 @@
#include "access/sysattr.h"
#include "access/xact.h"
#include "access/xlog.h"
+#include "access/ptrack.h"
#include "catalog/catalog.h"
#include "catalog/indexing.h"
#include "catalog/namespace.h"
@@ -565,6 +566,8 @@ InitPostgres(const char *in_dbname, Oid dboid, const char *username,
elog(DEBUG3, "InitPostgres");
+ assign_ptrack_enable(ptrack_enable, NULL);
+
/*
* Add my PGPROC struct to the ProcArray.
*
--- src/backend/utils/misc/guc.c
+++ src/backend/utils/misc/guc.c
@@ -31,6 +31,7 @@
#include "access/transam.h"
#include "access/twophase.h"
#include "access/xact.h"
+#include "access/ptrack.h"
#include "catalog/namespace.h"
#include "commands/async.h"
#include "commands/prepare.h"
@@ -1001,6 +1002,16 @@ static struct config_bool ConfigureNamesBool[] =
},
{
+ {"ptrack_enable", PGC_SIGHUP, WAL_SETTINGS,
+ gettext_noop("Enable page tracking."),
+ NULL
+ },
+ &ptrack_enable,
+ false,
+ NULL, &assign_ptrack_enable, NULL
+ },
+
+ {
{"wal_compression", PGC_SUSET, WAL_SETTINGS,
gettext_noop("Compresses full-page writes written in WAL file."),
NULL
--- src/common/relpath.c
+++ src/common/relpath.c
@@ -35,7 +35,8 @@ const char *const forkNames[] = {
"main", /* MAIN_FORKNUM */
"fsm", /* FSM_FORKNUM */
"vm", /* VISIBILITYMAP_FORKNUM */
- "init" /* INIT_FORKNUM */
+ "init", /* INIT_FORKNUM */
+ "ptrack" /* PAGESTRACK_FORKNUM */
};
/*
--- src/include/access/ptrack.h
+++ src/include/access/ptrack.h
@@ -0,0 +1,23 @@
+#ifndef PTRACK_H
+#define PTRACK_H
+
+#include "access/xlogdefs.h"
+#include "storage/block.h"
+#include "storage/buf.h"
+#include "storage/relfilenode.h"
+#include "utils/relcache.h"
+
+extern PGDLLIMPORT unsigned int blocks_track_count;
+
+extern PGDLLIMPORT bool ptrack_enable;
+
+extern void ptrack_save(void);
+
+extern void ptrack_add_block(BlockNumber block_number, RelFileNode rel);
+
+extern void ptrack_clear(void);
+extern bytea *ptrack_get_and_clear(Oid tablespace_oid, Oid table_oid);
+extern void assign_ptrack_enable(bool newval, void *extra);
+
+#endif /* PTRACK_H */
+
--- src/include/access/xlog_fn.h
+++ src/include/access/xlog_fn.h
@@ -16,6 +16,8 @@
extern Datum pg_start_backup(PG_FUNCTION_ARGS);
extern Datum pg_stop_backup(PG_FUNCTION_ARGS);
extern Datum pg_stop_backup_v2(PG_FUNCTION_ARGS);
+extern Datum pg_ptrack_clear(PG_FUNCTION_ARGS);
+extern Datum pg_ptrack_get_and_clear(PG_FUNCTION_ARGS);
extern Datum pg_switch_xlog(PG_FUNCTION_ARGS);
extern Datum pg_create_restore_point(PG_FUNCTION_ARGS);
extern Datum pg_current_xlog_location(PG_FUNCTION_ARGS);
@@ -33,5 +35,6 @@ extern Datum pg_is_xlog_replay_paused(PG_FUNCTION_ARGS);
extern Datum pg_xlog_location_diff(PG_FUNCTION_ARGS);
extern Datum pg_is_in_backup(PG_FUNCTION_ARGS);
extern Datum pg_backup_start_time(PG_FUNCTION_ARGS);
+extern Datum pg_ptrack_test(PG_FUNCTION_ARGS);
#endif /* XLOG_FN_H */
--- src/include/catalog/pg_proc.h
+++ src/include/catalog/pg_proc.h
@@ -3154,6 +3154,12 @@ DATA(insert OID = 2850 ( pg_xlogfile_name_offset PGNSP PGUID 12 1 0 0 0 f f f f
DESCR("xlog filename and byte offset, given an xlog location");
DATA(insert OID = 2851 ( pg_xlogfile_name PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 25 "3220" _null_ _null_ _null_ _null_ _null_ pg_xlogfile_name _null_ _null_ _null_ ));
DESCR("xlog filename, given an xlog location");
+DATA(insert OID = 6016 ( pg_ptrack_clear PGNSP PGUID 12 1 0 0 0 f f f f t f v u 0 0 2278 "" _null_ _null_ _null_ _null_ _null_ pg_ptrack_clear _null_ _null_ _null_ ));
+DESCR("clear ptrack fork files");
+DATA(insert OID = 6017 ( pg_ptrack_test PGNSP PGUID 12 1 0 0 0 f f f f t f v u 1 0 1007 "26" _null_ _null_ _null_ _null_ _null_ pg_ptrack_test _null_ _null_ _null_ ));
+DESCR("test ptrack fork relation");
+DATA(insert OID = 6018 ( pg_ptrack_get_and_clear PGNSP PGUID 12 1 0 0 0 f f f f t f v s 2 0 17 "26 26" _null_ _null_ _null_ _null_ _null_ pg_ptrack_get_and_clear _null_ _null_ _null_ ));
+DESCR("get ptrack file as bytea and clear him");
DATA(insert OID = 3165 ( pg_xlog_location_diff PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 1700 "3220 3220" _null_ _null_ _null_ _null_ _null_ pg_xlog_location_diff _null_ _null_ _null_ ));
DESCR("difference in bytes, given two xlog locations");
--- src/include/common/relpath.h
+++ src/include/common/relpath.h
@@ -27,7 +27,8 @@ typedef enum ForkNumber
MAIN_FORKNUM = 0,
FSM_FORKNUM,
VISIBILITYMAP_FORKNUM,
- INIT_FORKNUM
+ INIT_FORKNUM,
+ PAGESTRACK_FORKNUM
/*
* NOTE: if you add a new fork, change MAX_FORKNUM and possibly
@@ -36,9 +37,9 @@ typedef enum ForkNumber
*/
} ForkNumber;
-#define MAX_FORKNUM INIT_FORKNUM
+#define MAX_FORKNUM PAGESTRACK_FORKNUM
-#define FORKNAMECHARS 4 /* max chars for a fork name */
+#define FORKNAMECHARS 6 /* max chars for a fork name */
extern const char *const forkNames[];
--- src/include/miscadmin.h
+++ src/include/miscadmin.h
@@ -24,6 +24,7 @@
#define MISCADMIN_H
#include "pgtime.h" /* for pg_time_t */
+#include "access/ptrack.h"
#define PG_BACKEND_VERSIONSTR "postgres (PostgreSQL) " PG_VERSION "\n"
@@ -133,6 +134,14 @@ do { \
do { \
Assert(CritSectionCount > 0); \
CritSectionCount--; \
+ if (!CritSectionCount && ptrack_enable && blocks_track_count > 0) \
+ ptrack_save(); \
+} while(0)
+
+#define END_CRIT_SECTION_WITHOUT_TRACK() \
+do { \
+ Assert(CritSectionCount > 0); \
+ CritSectionCount--; \
} while(0)
--- src/include/storage/bufmgr.h
+++ src/include/storage/bufmgr.h
@@ -18,6 +18,7 @@
#include "storage/buf.h"
#include "storage/bufpage.h"
#include "storage/relfilenode.h"
+#include "storage/smgr.h"
#include "utils/relcache.h"
#include "utils/snapmgr.h"
#include "utils/tqual.h"
@@ -183,6 +184,10 @@ extern Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum,
extern Buffer ReadBufferWithoutRelcache(RelFileNode rnode,
ForkNumber forkNum, BlockNumber blockNum,
ReadBufferMode mode, BufferAccessStrategy strategy);
+extern Buffer ReadBufferWithoutRelcache2(SMgrRelation smgr,
+ ForkNumber forkNum, BlockNumber blockNum,
+ ReadBufferMode mode, BufferAccessStrategy strategy);
+
extern void ReleaseBuffer(Buffer buffer);
extern void UnlockReleaseBuffer(Buffer buffer);
extern void MarkBufferDirty(Buffer buffer);
--- src/include/storage/lmgr.h
+++ src/include/storage/lmgr.h
@@ -17,6 +17,7 @@
#include "lib/stringinfo.h"
#include "storage/itemptr.h"
#include "storage/lock.h"
+#include "storage/smgr.h"
#include "utils/rel.h"
@@ -57,6 +58,9 @@ extern bool ConditionalLockRelationForExtension(Relation relation,
LOCKMODE lockmode);
extern int RelationExtensionLockWaiterCount(Relation relation);
+extern void LockSmgrForExtension(SMgrRelation smgr, LOCKMODE lockmode);
+extern void UnlockSmgrForExtension(SMgrRelation smgr, LOCKMODE lockmode);
+
/* Lock a page (currently only used within indexes) */
extern void LockPage(Relation relation, BlockNumber blkno, LOCKMODE lockmode);
extern bool ConditionalLockPage(Relation relation, BlockNumber blkno, LOCKMODE lockmode);
--- src/include/storage/lock.h
+++ src/include/storage/lock.h
@@ -161,7 +161,8 @@ typedef enum LockTagType
* Also, we use DB OID = 0 for shared objects such as tablespaces.
*/
LOCKTAG_USERLOCK, /* reserved for old contrib/userlock code */
- LOCKTAG_ADVISORY /* advisory user locks */
+ LOCKTAG_ADVISORY, /* advisory user locks */
+ LOCKTAG_SMGR /**/
} LockTagType;
#define LOCKTAG_LAST_TYPE LOCKTAG_ADVISORY
@@ -207,6 +208,14 @@ typedef struct LOCKTAG
(locktag).locktag_type = LOCKTAG_RELATION_EXTEND, \
(locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD)
+#define SET_LOCKTAG_SMGR(locktag,relfilenode) \
+ ((locktag).locktag_field1 = (relfilenode), \
+ (locktag).locktag_field2 = 0, \
+ (locktag).locktag_field3 = 0, \
+ (locktag).locktag_field4 = 0, \
+ (locktag).locktag_type = LOCKTAG_SMGR, \
+ (locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD)
+
#define SET_LOCKTAG_PAGE(locktag,dboid,reloid,blocknum) \
((locktag).locktag_field1 = (dboid), \
(locktag).locktag_field2 = (reloid), \
--- src/include/storage/smgr.h
+++ src/include/storage/smgr.h
@@ -55,6 +55,7 @@ typedef struct SMgrRelationData
BlockNumber smgr_targblock; /* current insertion target block */
BlockNumber smgr_fsm_nblocks; /* last known size of fsm fork */
BlockNumber smgr_vm_nblocks; /* last known size of vm fork */
+ BlockNumber smgr_ptrack_nblocks; /* last known size of vm fork */
/* additional public fields may someday exist here */
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment