Skip to content

Instantly share code, notes, and snippets.

@lubennikovaav
Created December 12, 2017 12:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lubennikovaav/475a1ac3d394ea08231966a07fecdbde to your computer and use it in GitHub Desktop.
Save lubennikovaav/475a1ac3d394ea08231966a07fecdbde to your computer and use it in GitHub Desktop.
ptrack_10.1_v1.4.patch
commit 1b4003d19a6356aaf22490fc201f30a4dc25585f
Author: Anastasia <a.lubennikova@postgrespro.ru>
Date: Tue Dec 12 15:24:36 2017 +0300
ptrack_10.1_v1.4.patch
diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c
index efebeb0..713a635 100644
--- a/src/backend/access/brin/brin.c
+++ b/src/backend/access/brin/brin.c
@@ -22,6 +22,7 @@
#include "access/reloptions.h"
#include "access/relscan.h"
#include "access/xloginsert.h"
+#include "access/ptrack.h"
#include "catalog/index.h"
#include "catalog/pg_am.h"
#include "miscadmin.h"
@@ -736,6 +737,7 @@ brinbuildempty(Relation index)
LockBuffer(metabuf, BUFFER_LOCK_EXCLUSIVE);
/* Initialize and xlog metabuffer. */
+ ptrack_add_block(index, BufferGetBlockNumber(metabuf));
START_CRIT_SECTION();
brin_metapage_init(BufferGetPage(metabuf), BrinGetPagesPerRange(index),
BRIN_CURRENT_VERSION);
diff --git a/src/backend/access/brin/brin_pageops.c b/src/backend/access/brin/brin_pageops.c
index 80f803e..7ed187f 100644
--- a/src/backend/access/brin/brin_pageops.c
+++ b/src/backend/access/brin/brin_pageops.c
@@ -15,6 +15,7 @@
#include "access/brin_revmap.h"
#include "access/brin_xlog.h"
#include "access/xloginsert.h"
+#include "access/ptrack.h"
#include "miscadmin.h"
#include "storage/bufmgr.h"
#include "storage/freespace.h"
@@ -175,6 +176,7 @@ brin_doupdate(Relation idxrel, BlockNumber pagesPerRange,
UnlockReleaseBuffer(newbuf);
}
+ ptrack_add_block(idxrel, BufferGetBlockNumber(oldbuf));
START_CRIT_SECTION();
if (!PageIndexTupleOverwrite(oldpage, oldoff, (Item) newtup, newsz))
elog(ERROR, "failed to replace BRIN tuple");
@@ -233,6 +235,9 @@ brin_doupdate(Relation idxrel, BlockNumber pagesPerRange,
revmapbuf = brinLockRevmapPageForUpdate(revmap, heapBlk);
+ ptrack_add_block(idxrel, BufferGetBlockNumber(newbuf));
+ ptrack_add_block(idxrel, BufferGetBlockNumber(oldbuf));
+ ptrack_add_block(idxrel, BufferGetBlockNumber(revmapbuf));
START_CRIT_SECTION();
/*
@@ -402,6 +407,8 @@ brin_doinsert(Relation idxrel, BlockNumber pagesPerRange,
blk = BufferGetBlockNumber(*buffer);
/* Execute the actual insertion */
+ ptrack_add_block(idxrel, BufferGetBlockNumber(*buffer));
+ ptrack_add_block(idxrel, BufferGetBlockNumber(revmapbuf));
START_CRIT_SECTION();
if (extended)
brin_page_init(BufferGetPage(*buffer), BRIN_PAGETYPE_REGULAR);
@@ -855,6 +862,7 @@ brin_initialize_empty_new_buffer(Relation idxrel, Buffer buffer)
"brin_initialize_empty_new_buffer: initializing blank page %u",
BufferGetBlockNumber(buffer)));
+ ptrack_add_block(idxrel, BufferGetBlockNumber(buffer));
START_CRIT_SECTION();
page = BufferGetPage(buffer);
brin_page_init(page, BRIN_PAGETYPE_REGULAR);
diff --git a/src/backend/access/brin/brin_revmap.c b/src/backend/access/brin/brin_revmap.c
index 22f2076..ac64baa 100644
--- a/src/backend/access/brin/brin_revmap.c
+++ b/src/backend/access/brin/brin_revmap.c
@@ -27,6 +27,7 @@
#include "access/brin_xlog.h"
#include "access/rmgr.h"
#include "access/xloginsert.h"
+#include "access/ptrack.h"
#include "miscadmin.h"
#include "storage/bufmgr.h"
#include "storage/lmgr.h"
@@ -617,6 +618,8 @@ revmap_physical_extend(BrinRevmap *revmap)
* Ok, we have now locked the metapage and the target block. Re-initialize
* it as a revmap page.
*/
+ ptrack_add_block(irel, BufferGetBlockNumber(buf));
+ ptrack_add_block(irel, BufferGetBlockNumber(revmap->rm_metaBuf));
START_CRIT_SECTION();
/* the rm_tids array is initialized to all invalid by PageInit */
diff --git a/src/backend/access/brin/brin_xlog.c b/src/backend/access/brin/brin_xlog.c
index 60daa54..c444179 100644
--- a/src/backend/access/brin/brin_xlog.c
+++ b/src/backend/access/brin/brin_xlog.c
@@ -15,6 +15,7 @@
#include "access/brin_xlog.h"
#include "access/bufmask.h"
#include "access/xlogutils.h"
+#include "access/ptrack.h"
/*
@@ -27,6 +28,11 @@ brin_xlog_createidx(XLogReaderState *record)
xl_brin_createidx *xlrec = (xl_brin_createidx *) XLogRecGetData(record);
Buffer buf;
Page page;
+ RelFileNode rnode;
+ BlockNumber blkno;
+
+ XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
/* create the index' metapage */
buf = XLogInitBufferForRedo(record, 0);
@@ -51,6 +57,13 @@ brin_xlog_insert_update(XLogReaderState *record,
BlockNumber regpgno;
Page page;
XLogRedoAction action;
+ RelFileNode rnode;
+ BlockNumber blkno;
+
+ XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
+ XLogRecGetBlockTag(record, 1, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
/*
* If we inserted the first and only tuple on the page, re-initialize the
@@ -138,9 +151,15 @@ brin_xlog_update(XLogReaderState *record)
xl_brin_update *xlrec = (xl_brin_update *) XLogRecGetData(record);
Buffer buffer;
XLogRedoAction action;
+ RelFileNode rnode;
+ BlockNumber blkno;
+
+ XLogRecGetBlockTag(record, 2, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
/* First remove the old tuple */
action = XLogReadBufferForRedo(record, 2, &buffer);
+
if (action == BLK_NEEDS_REDO)
{
Page page;
@@ -173,9 +192,15 @@ brin_xlog_samepage_update(XLogReaderState *record)
xl_brin_samepage_update *xlrec;
Buffer buffer;
XLogRedoAction action;
+ RelFileNode rnode;
+ BlockNumber blkno;
+
+ XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
xlrec = (xl_brin_samepage_update *) XLogRecGetData(record);
action = XLogReadBufferForRedo(record, 0, &buffer);
+
if (action == BLK_NEEDS_REDO)
{
Size tuplen;
@@ -214,14 +239,21 @@ brin_xlog_revmap_extend(XLogReaderState *record)
Page page;
BlockNumber targetBlk;
XLogRedoAction action;
+ RelFileNode rnode;
+ BlockNumber blkno;
xlrec = (xl_brin_revmap_extend *) XLogRecGetData(record);
- XLogRecGetBlockTag(record, 1, NULL, NULL, &targetBlk);
+ XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
+ XLogRecGetBlockTag(record, 1, &rnode, NULL, &targetBlk);
+ ptrack_add_block_redo(rnode, targetBlk);
+
Assert(xlrec->targetBlk == targetBlk);
/* Update the metapage */
action = XLogReadBufferForRedo(record, 0, &metabuf);
+
if (action == BLK_NEEDS_REDO)
{
Page metapg;
diff --git a/src/backend/access/gin/ginbtree.c b/src/backend/access/gin/ginbtree.c
index b02cb8a..d2e0c27 100644
--- a/src/backend/access/gin/ginbtree.c
+++ b/src/backend/access/gin/ginbtree.c
@@ -17,6 +17,7 @@
#include "access/gin_private.h"
#include "access/ginxlog.h"
#include "access/xloginsert.h"
+#include "access/ptrack.h"
#include "miscadmin.h"
#include "utils/memutils.h"
#include "utils/rel.h"
@@ -386,6 +387,9 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
else if (rc == GPTP_INSERT)
{
/* It will fit, perform the insertion */
+ ptrack_add_block(btree->index, BufferGetBlockNumber(stack->buffer));
+ if (BufferIsValid(childbuf))
+ ptrack_add_block(btree->index, BufferGetBlockNumber(childbuf));
START_CRIT_SECTION();
if (RelationNeedsWAL(btree->index))
@@ -535,6 +539,12 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
* the new contents of the root.
*/
+ ptrack_add_block(btree->index, BufferGetBlockNumber(rbuffer));
+ ptrack_add_block(btree->index, BufferGetBlockNumber(stack->buffer));
+ if (stack->parent == NULL)
+ ptrack_add_block(btree->index, BufferGetBlockNumber(lbuffer));
+ if (BufferIsValid(childbuf))
+ ptrack_add_block(btree->index, BufferGetBlockNumber(childbuf));
START_CRIT_SECTION();
MarkBufferDirty(rbuffer);
diff --git a/src/backend/access/gin/gindatapage.c b/src/backend/access/gin/gindatapage.c
index 2e5ea47..357f027 100644
--- a/src/backend/access/gin/gindatapage.c
+++ b/src/backend/access/gin/gindatapage.c
@@ -17,6 +17,7 @@
#include "access/gin_private.h"
#include "access/ginxlog.h"
#include "access/xloginsert.h"
+#include "access/ptrack.h"
#include "lib/ilist.h"
#include "miscadmin.h"
#include "utils/rel.h"
@@ -835,6 +836,7 @@ ginVacuumPostingTreeLeaf(Relation indexrel, Buffer buffer, GinVacuumState *gvs)
computeLeafRecompressWALData(leaf);
/* Apply changes to page */
+ ptrack_add_block(indexrel, BufferGetBlockNumber(buffer));
START_CRIT_SECTION();
dataPlaceToPageLeafRecompress(buffer, leaf);
@@ -1810,6 +1812,7 @@ createPostingTree(Relation index, ItemPointerData *items, uint32 nitems,
page = BufferGetPage(buffer);
blkno = BufferGetBlockNumber(buffer);
+ ptrack_add_block(index, BufferGetBlockNumber(buffer));
START_CRIT_SECTION();
PageRestoreTempPage(tmppage, page);
diff --git a/src/backend/access/gin/ginfast.c b/src/backend/access/gin/ginfast.c
index 59e4354..97096b8 100644
--- a/src/backend/access/gin/ginfast.c
+++ b/src/backend/access/gin/ginfast.c
@@ -21,6 +21,7 @@
#include "access/gin_private.h"
#include "access/ginxlog.h"
#include "access/xloginsert.h"
+#include "access/ptrack.h"
#include "access/xlog.h"
#include "commands/vacuum.h"
#include "catalog/pg_am.h"
@@ -69,6 +70,7 @@ writeListPage(Relation index, Buffer buffer,
/* workspace could be a local array; we use palloc for alignment */
workspace = palloc(BLCKSZ);
+ ptrack_add_block(index, BufferGetBlockNumber(buffer));
START_CRIT_SECTION();
GinInitBuffer(buffer, GIN_LIST);
@@ -295,6 +297,7 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
/*
* Main list is empty, so just insert sublist as main list
*/
+ ptrack_add_block(index, BufferGetBlockNumber(metabuffer));
START_CRIT_SECTION();
metadata->head = sublist.head;
@@ -318,6 +321,8 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
Assert(GinPageGetOpaque(page)->rightlink == InvalidBlockNumber);
+ ptrack_add_block(index, BufferGetBlockNumber(metabuffer));
+ ptrack_add_block(index, BufferGetBlockNumber(buffer));
START_CRIT_SECTION();
GinPageGetOpaque(page)->rightlink = sublist.head;
@@ -360,6 +365,8 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
if (needWal)
XLogBeginInsert();
+ ptrack_add_block(index, BufferGetBlockNumber(metabuffer));
+ ptrack_add_block(index, BufferGetBlockNumber(buffer));
START_CRIT_SECTION();
/*
@@ -555,6 +562,10 @@ shiftList(Relation index, Buffer metabuffer, BlockNumber newHead,
if (RelationNeedsWAL(index))
XLogEnsureRecordSpace(data.ndeleted, 0);
+ ptrack_add_block(index, BufferGetBlockNumber(metabuffer));
+ for (i = 0; i < data.ndeleted; i++)
+ ptrack_add_block(index, BufferGetBlockNumber(buffers[i]));
+
START_CRIT_SECTION();
metadata->head = blknoToDelete;
diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c
index 5378011..56d7258 100644
--- a/src/backend/access/gin/gininsert.c
+++ b/src/backend/access/gin/gininsert.c
@@ -17,6 +17,7 @@
#include "access/gin_private.h"
#include "access/ginxlog.h"
#include "access/xloginsert.h"
+#include "access/ptrack.h"
#include "catalog/index.h"
#include "miscadmin.h"
#include "storage/bufmgr.h"
@@ -336,6 +337,8 @@ ginbuild(Relation heap, Relation index, IndexInfo *indexInfo)
/* initialize the root page */
RootBuffer = GinNewBuffer(index);
+ ptrack_add_block(index, BufferGetBlockNumber(MetaBuffer));
+ ptrack_add_block(index, BufferGetBlockNumber(RootBuffer));
START_CRIT_SECTION();
GinInitMetabuffer(MetaBuffer);
MarkBufferDirty(MetaBuffer);
@@ -444,6 +447,8 @@ ginbuildempty(Relation index)
LockBuffer(RootBuffer, BUFFER_LOCK_EXCLUSIVE);
/* Initialize and xlog metabuffer and root buffer. */
+ ptrack_add_block(index, BufferGetBlockNumber(MetaBuffer));
+ ptrack_add_block(index, BufferGetBlockNumber(RootBuffer));
START_CRIT_SECTION();
GinInitMetabuffer(MetaBuffer);
MarkBufferDirty(MetaBuffer);
diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c
index 91e4a8c..5bebf05 100644
--- a/src/backend/access/gin/ginutil.c
+++ b/src/backend/access/gin/ginutil.c
@@ -18,6 +18,7 @@
#include "access/ginxlog.h"
#include "access/reloptions.h"
#include "access/xloginsert.h"
+#include "access/ptrack.h"
#include "catalog/pg_collation.h"
#include "catalog/pg_type.h"
#include "miscadmin.h"
@@ -667,6 +668,7 @@ ginUpdateStats(Relation index, const GinStatsData *stats)
metapage = BufferGetPage(metabuffer);
metadata = GinPageGetMeta(metapage);
+ ptrack_add_block(index, BufferGetBlockNumber(metabuffer));
START_CRIT_SECTION();
metadata->nTotalPages = stats->nTotalPages;
diff --git a/src/backend/access/gin/ginvacuum.c b/src/backend/access/gin/ginvacuum.c
index 31425e9..9ae8daf 100644
--- a/src/backend/access/gin/ginvacuum.c
+++ b/src/backend/access/gin/ginvacuum.c
@@ -17,6 +17,7 @@
#include "access/gin_private.h"
#include "access/ginxlog.h"
#include "access/xloginsert.h"
+#include "access/ptrack.h"
#include "commands/vacuum.h"
#include "miscadmin.h"
#include "postmaster/autovacuum.h"
@@ -153,6 +154,9 @@ ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkn
LockBuffer(lBuffer, GIN_EXCLUSIVE);
+ ptrack_add_block(gvs->index, BufferGetBlockNumber(pBuffer));
+ ptrack_add_block(gvs->index, BufferGetBlockNumber(lBuffer));
+ ptrack_add_block(gvs->index, BufferGetBlockNumber(dBuffer));
START_CRIT_SECTION();
/* Unlink the page by changing left sibling's rightlink */
@@ -630,6 +634,7 @@ ginbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
if (resPage)
{
+ ptrack_add_block(gvs.index, BufferGetBlockNumber(buffer));
START_CRIT_SECTION();
PageRestoreTempPage(resPage, page);
MarkBufferDirty(buffer);
diff --git a/src/backend/access/gin/ginxlog.c b/src/backend/access/gin/ginxlog.c
index 92cafe9..d35e4ba 100644
--- a/src/backend/access/gin/ginxlog.c
+++ b/src/backend/access/gin/ginxlog.c
@@ -17,6 +17,7 @@
#include "access/gin_private.h"
#include "access/ginxlog.h"
#include "access/xlogutils.h"
+#include "access/ptrack.h"
#include "utils/memutils.h"
static MemoryContext opCtx; /* working memory for operations */
@@ -27,6 +28,11 @@ ginRedoClearIncompleteSplit(XLogReaderState *record, uint8 block_id)
XLogRecPtr lsn = record->EndRecPtr;
Buffer buffer;
Page page;
+ RelFileNode rnode;
+ BlockNumber blkno;
+
+ XLogRecGetBlockTag(record, block_id, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
if (XLogReadBufferForRedo(record, block_id, &buffer) == BLK_NEEDS_REDO)
{
@@ -47,8 +53,12 @@ ginRedoCreateIndex(XLogReaderState *record)
Buffer RootBuffer,
MetaBuffer;
Page page;
+ RelFileNode rnode;
+
+ XLogRecGetBlockTag(record, 0, &rnode, NULL, NULL);
MetaBuffer = XLogInitBufferForRedo(record, 0);
+ ptrack_add_block_redo(rnode, BufferGetBlockNumber(MetaBuffer));
Assert(BufferGetBlockNumber(MetaBuffer) == GIN_METAPAGE_BLKNO);
page = (Page) BufferGetPage(MetaBuffer);
@@ -58,6 +68,7 @@ ginRedoCreateIndex(XLogReaderState *record)
MarkBufferDirty(MetaBuffer);
RootBuffer = XLogInitBufferForRedo(record, 1);
+ ptrack_add_block_redo(rnode, BufferGetBlockNumber(RootBuffer));
Assert(BufferGetBlockNumber(RootBuffer) == GIN_ROOT_BLKNO);
page = (Page) BufferGetPage(RootBuffer);
@@ -78,8 +89,13 @@ ginRedoCreatePTree(XLogReaderState *record)
char *ptr;
Buffer buffer;
Page page;
+ RelFileNode rnode;
+
+ XLogRecGetBlockTag(record, 0, &rnode, NULL, NULL);
buffer = XLogInitBufferForRedo(record, 0);
+ ptrack_add_block_redo(rnode, BufferGetBlockNumber(buffer));
+
page = (Page) BufferGetPage(buffer);
GinInitBuffer(buffer, GIN_DATA | GIN_LEAF | GIN_COMPRESSED);
@@ -331,6 +347,11 @@ ginRedoInsert(XLogReaderState *record)
#endif
BlockNumber rightChildBlkno = InvalidBlockNumber;
bool isLeaf = (data->flags & GIN_INSERT_ISLEAF) != 0;
+ RelFileNode rnode;
+ BlockNumber blkno;
+
+ XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
/*
* First clear incomplete-split flag on child page if this finishes a
@@ -384,6 +405,18 @@ ginRedoSplit(XLogReaderState *record)
rootbuf;
bool isLeaf = (data->flags & GIN_INSERT_ISLEAF) != 0;
bool isRoot = (data->flags & GIN_SPLIT_ROOT) != 0;
+ RelFileNode rnode;
+ BlockNumber blkno;
+
+ XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
+ XLogRecGetBlockTag(record, 1, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
+ if (isRoot)
+ {
+ XLogRecGetBlockTag(record, 2, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
+ }
/*
* First clear incomplete-split flag on child page if this finishes a
@@ -417,6 +450,11 @@ static void
ginRedoVacuumPage(XLogReaderState *record)
{
Buffer buffer;
+ RelFileNode rnode;
+ BlockNumber blkno;
+
+ XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
if (XLogReadBufferForRedo(record, 0, &buffer) != BLK_RESTORED)
{
@@ -430,6 +468,11 @@ ginRedoVacuumDataLeafPage(XLogReaderState *record)
{
XLogRecPtr lsn = record->EndRecPtr;
Buffer buffer;
+ RelFileNode rnode;
+ BlockNumber blkno;
+
+ XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
{
@@ -459,6 +502,15 @@ ginRedoDeletePage(XLogReaderState *record)
Buffer pbuffer;
Buffer lbuffer;
Page page;
+ RelFileNode rnode;
+ BlockNumber blkno;
+
+ XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
+ XLogRecGetBlockTag(record, 1, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
+ XLogRecGetBlockTag(record, 2, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
if (XLogReadBufferForRedo(record, 0, &dbuffer) == BLK_NEEDS_REDO)
{
@@ -504,6 +556,13 @@ ginRedoUpdateMetapage(XLogReaderState *record)
Buffer metabuffer;
Page metapage;
Buffer buffer;
+ RelFileNode rnode;
+ BlockNumber blkno;
+
+ XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
+ XLogRecGetBlockTag(record, 1, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
/*
* Restore the metapage. This is essentially the same as a full-page
@@ -602,6 +661,11 @@ ginRedoInsertListPage(XLogReaderState *record)
char *payload;
IndexTuple tuples;
Size totaltupsize;
+ RelFileNode rnode;
+ BlockNumber blkno;
+
+ XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
/* We always re-initialize the page. */
buffer = XLogInitBufferForRedo(record, 0);
@@ -651,6 +715,11 @@ ginRedoDeleteListPages(XLogReaderState *record)
Buffer metabuffer;
Page metapage;
int i;
+ RelFileNode rnode;
+ BlockNumber blkno;
+
+ XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
metabuffer = XLogInitBufferForRedo(record, 0);
Assert(BufferGetBlockNumber(metabuffer) == GIN_METAPAGE_BLKNO);
@@ -682,6 +751,9 @@ ginRedoDeleteListPages(XLogReaderState *record)
Buffer buffer;
Page page;
+ XLogRecGetBlockTag(record, i+1, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
+
buffer = XLogInitBufferForRedo(record, i + 1);
page = BufferGetPage(buffer);
GinInitBuffer(buffer, GIN_DELETED);
diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c
index 565525b..a23d6c1 100644
--- a/src/backend/access/gist/gist.c
+++ b/src/backend/access/gist/gist.c
@@ -16,6 +16,7 @@
#include "access/gist_private.h"
#include "access/gistscan.h"
+#include "access/ptrack.h"
#include "catalog/pg_collation.h"
#include "miscadmin.h"
#include "nodes/execnodes.h"
@@ -127,6 +128,7 @@ gistbuildempty(Relation index)
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
/* Initialize and xlog buffer */
+ ptrack_add_block(index, BufferGetBlockNumber(buffer));
START_CRIT_SECTION();
GISTInitBuffer(buffer, F_LEAF);
MarkBufferDirty(buffer);
@@ -454,6 +456,10 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
if (RelationNeedsWAL(rel))
XLogEnsureRecordSpace(npage, 1 + npage * 2);
+ for (ptr = dist; ptr; ptr = ptr->next)
+ ptrack_add_block(rel, BufferGetBlockNumber(ptr->buffer));
+ if (BufferIsValid(leftchildbuf))
+ ptrack_add_block(rel, BufferGetBlockNumber(leftchildbuf));
START_CRIT_SECTION();
/*
@@ -503,6 +509,9 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
/*
* Enough space. We always get here if ntup==0.
*/
+ ptrack_add_block(rel, BufferGetBlockNumber(buffer));
+ if (BufferIsValid(leftchildbuf))
+ ptrack_add_block(rel, BufferGetBlockNumber(leftchildbuf));
START_CRIT_SECTION();
/*
@@ -1544,6 +1553,7 @@ gistvacuumpage(Relation rel, Page page, Buffer buffer)
if (ndeletable > 0)
{
+ ptrack_add_block(rel, BufferGetBlockNumber(buffer));
START_CRIT_SECTION();
PageIndexMultiDelete(page, deletable, ndeletable);
diff --git a/src/backend/access/gist/gistbuild.c b/src/backend/access/gist/gistbuild.c
index 4756a70..da38813 100644
--- a/src/backend/access/gist/gistbuild.c
+++ b/src/backend/access/gist/gistbuild.c
@@ -20,6 +20,7 @@
#include "access/gist_private.h"
#include "access/gistxlog.h"
#include "access/xloginsert.h"
+#include "access/ptrack.h"
#include "catalog/index.h"
#include "miscadmin.h"
#include "optimizer/cost.h"
@@ -172,6 +173,7 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)
Assert(BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO);
page = BufferGetPage(buffer);
+ ptrack_add_block(index, BufferGetBlockNumber(buffer));
START_CRIT_SECTION();
GISTInitBuffer(buffer, F_LEAF);
diff --git a/src/backend/access/gist/gistvacuum.c b/src/backend/access/gist/gistvacuum.c
index 77d9d12..c866c91 100644
--- a/src/backend/access/gist/gistvacuum.c
+++ b/src/backend/access/gist/gistvacuum.c
@@ -16,6 +16,7 @@
#include "access/genam.h"
#include "access/gist_private.h"
+#include "access/ptrack.h"
#include "commands/vacuum.h"
#include "miscadmin.h"
#include "storage/indexfsm.h"
@@ -212,6 +213,7 @@ gistbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
if (ntodelete)
{
+ ptrack_add_block(rel, BufferGetBlockNumber(buffer));
START_CRIT_SECTION();
MarkBufferDirty(buffer);
diff --git a/src/backend/access/gist/gistxlog.c b/src/backend/access/gist/gistxlog.c
index 7fd91ce..67a3b55 100644
--- a/src/backend/access/gist/gistxlog.c
+++ b/src/backend/access/gist/gistxlog.c
@@ -18,6 +18,7 @@
#include "access/gistxlog.h"
#include "access/xloginsert.h"
#include "access/xlogutils.h"
+#include "access/ptrack.h"
#include "utils/memutils.h"
static MemoryContext opCtx; /* working memory for operations */
@@ -40,6 +41,11 @@ gistRedoClearFollowRight(XLogReaderState *record, uint8 block_id)
Buffer buffer;
Page page;
XLogRedoAction action;
+ RelFileNode rnode;
+ BlockNumber blkno;
+
+ XLogRecGetBlockTag(record, block_id, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
/*
* Note that we still update the page even if it was restored from a full
@@ -70,6 +76,11 @@ gistRedoPageUpdateRecord(XLogReaderState *record)
gistxlogPageUpdate *xldata = (gistxlogPageUpdate *) XLogRecGetData(record);
Buffer buffer;
Page page;
+ RelFileNode rnode;
+ BlockNumber blkno;
+
+ XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
{
@@ -198,6 +209,7 @@ gistRedoPageSplitRecord(XLogReaderState *record)
int i;
bool isrootsplit = false;
+
/*
* We must hold lock on the first-listed page throughout the action,
* including while updating the left child page (if any). We can unlock
@@ -215,8 +227,11 @@ gistRedoPageSplitRecord(XLogReaderState *record)
int num;
BlockNumber blkno;
IndexTuple *tuples;
+ RelFileNode rnode;
+
+ XLogRecGetBlockTag(record, i + 1, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
- XLogRecGetBlockTag(record, i + 1, NULL, NULL, &blkno);
if (blkno == GIST_ROOT_BLKNO)
{
Assert(i == 0);
@@ -287,6 +302,11 @@ gistRedoCreateIndex(XLogReaderState *record)
XLogRecPtr lsn = record->EndRecPtr;
Buffer buffer;
Page page;
+ RelFileNode rnode;
+ BlockNumber blkno;
+
+ XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
buffer = XLogInitBufferForRedo(record, 0);
Assert(BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO);
diff --git a/src/backend/access/heap/Makefile b/src/backend/access/heap/Makefile
index b83d496..788c55c 100644
--- a/src/backend/access/heap/Makefile
+++ b/src/backend/access/heap/Makefile
@@ -12,6 +12,6 @@ subdir = src/backend/access/heap
top_builddir = ../../../..
include $(top_builddir)/src/Makefile.global
-OBJS = heapam.o hio.o pruneheap.o rewriteheap.o syncscan.o tuptoaster.o visibilitymap.o
+OBJS = heapam.o hio.o pruneheap.o rewriteheap.o syncscan.o tuptoaster.o visibilitymap.o ptrack.o
include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index b41f2a2..dc8f5e3 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -54,6 +54,7 @@
#include "access/xlog.h"
#include "access/xloginsert.h"
#include "access/xlogutils.h"
+#include "access/ptrack.h"
#include "catalog/catalog.h"
#include "catalog/namespace.h"
#include "miscadmin.h"
@@ -2437,6 +2438,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
CheckForSerializableConflictIn(relation, NULL, InvalidBuffer);
/* NO EREPORT(ERROR) from here till changes are logged */
+ ptrack_add_block(relation, BufferGetBlockNumber(buffer));
START_CRIT_SECTION();
RelationPutHeapTuple(relation, buffer, heaptup,
@@ -2732,6 +2734,7 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
page = BufferGetPage(buffer);
/* NO EREPORT(ERROR) from here till changes are logged */
+ ptrack_add_block(relation, BufferGetBlockNumber(buffer));
START_CRIT_SECTION();
/*
@@ -3248,6 +3251,7 @@ l1:
xid, LockTupleExclusive, true,
&new_xmax, &new_infomask, &new_infomask2);
+ ptrack_add_block(relation, BufferGetBlockNumber(buffer));
START_CRIT_SECTION();
/*
@@ -4008,6 +4012,7 @@ l2:
Assert(HEAP_XMAX_IS_LOCKED_ONLY(infomask_lock_old_tuple));
+ ptrack_add_block(relation, BufferGetBlockNumber(buffer));
START_CRIT_SECTION();
/* Clear obsolete visibility flags ... */
@@ -4182,6 +4187,9 @@ l2:
&old_key_copied);
/* NO EREPORT(ERROR) from here till changes are logged */
+ ptrack_add_block(relation, BufferGetBlockNumber(buffer));
+ if (newbuf != buffer)
+ ptrack_add_block(relation, BufferGetBlockNumber(newbuf));
START_CRIT_SECTION();
/*
@@ -5092,6 +5100,7 @@ failed:
GetCurrentTransactionId(), mode, false,
&xid, &new_infomask, &new_infomask2);
+ ptrack_add_block(relation, BufferGetBlockNumber(*buffer));
START_CRIT_SECTION();
/*
@@ -5865,6 +5874,7 @@ l4:
VISIBILITYMAP_ALL_FROZEN))
cleared_all_frozen = true;
+ ptrack_add_block(rel, BufferGetBlockNumber(buf));
START_CRIT_SECTION();
/* ... and set them */
@@ -6020,6 +6030,7 @@ heap_finish_speculative(Relation relation, HeapTuple tuple)
"invalid speculative token constant");
/* NO EREPORT(ERROR) from here till changes are logged */
+ ptrack_add_block(relation, BufferGetBlockNumber(buffer));
START_CRIT_SECTION();
Assert(HeapTupleHeaderIsSpeculative(tuple->t_data));
@@ -6133,6 +6144,7 @@ heap_abort_speculative(Relation relation, HeapTuple tuple)
* do anything special with infomask bits.
*/
+ ptrack_add_block(relation, BufferGetBlockNumber(buffer));
START_CRIT_SECTION();
/*
@@ -6266,6 +6278,7 @@ heap_inplace_update(Relation relation, HeapTuple tuple)
elog(ERROR, "wrong tuple length");
/* NO EREPORT(ERROR) from here till changes are logged */
+ ptrack_add_block(relation, BufferGetBlockNumber(buffer));
START_CRIT_SECTION();
memcpy((char *) htup + htup->t_hoff,
@@ -7968,6 +7981,7 @@ heap_xlog_clean(XLogReaderState *record)
XLogRedoAction action;
XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
/*
* We're about to remove tuples. In Hot Standby mode, ensure that there's
@@ -8059,6 +8073,7 @@ heap_xlog_visible(XLogReaderState *record)
XLogRedoAction action;
XLogRecGetBlockTag(record, 1, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
/*
* If there are any Hot Standby transactions running that have an xmin
@@ -8169,6 +8184,11 @@ heap_xlog_freeze_page(XLogReaderState *record)
TransactionId cutoff_xid = xlrec->cutoff_xid;
Buffer buffer;
int ntup;
+ RelFileNode rnode;
+ BlockNumber blkno;
+
+ XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
/*
* In Hot Standby mode, ensure that there's no queries running which still
@@ -8254,6 +8274,7 @@ heap_xlog_delete(XLogReaderState *record)
ItemPointerData target_tid;
XLogRecGetBlockTag(record, 0, &target_node, NULL, &blkno);
+ ptrack_add_block_redo(target_node, blkno);
ItemPointerSetBlockNumber(&target_tid, blkno);
ItemPointerSetOffsetNumber(&target_tid, xlrec->offnum);
@@ -8332,6 +8353,7 @@ heap_xlog_insert(XLogReaderState *record)
XLogRedoAction action;
XLogRecGetBlockTag(record, 0, &target_node, NULL, &blkno);
+ ptrack_add_block_redo(target_node, blkno);
ItemPointerSetBlockNumber(&target_tid, blkno);
ItemPointerSetOffsetNumber(&target_tid, xlrec->offnum);
@@ -8454,6 +8476,7 @@ heap_xlog_multi_insert(XLogReaderState *record)
xlrec = (xl_heap_multi_insert *) XLogRecGetData(record);
XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
/*
* The visibility map may need to be fixed even if the heap page is
@@ -8600,8 +8623,10 @@ heap_xlog_update(XLogReaderState *record, bool hot_update)
oldtup.t_len = 0;
XLogRecGetBlockTag(record, 0, &rnode, NULL, &newblk);
+ ptrack_add_block_redo(rnode, newblk);
if (XLogRecGetBlockTag(record, 1, NULL, NULL, &oldblk))
{
+ ptrack_add_block_redo(rnode, oldblk);
/* HOT updates are never done across pages */
Assert(!hot_update);
}
@@ -8847,6 +8872,11 @@ heap_xlog_confirm(XLogReaderState *record)
OffsetNumber offnum;
ItemId lp = NULL;
HeapTupleHeader htup;
+ RelFileNode rnode;
+ BlockNumber blkno;
+
+ XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
{
@@ -8883,6 +8913,11 @@ heap_xlog_lock(XLogReaderState *record)
OffsetNumber offnum;
ItemId lp = NULL;
HeapTupleHeader htup;
+ RelFileNode rnode;
+ BlockNumber blkno;
+
+ XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
/*
* The visibility map may need to be fixed even if the heap page is
@@ -8954,6 +8989,11 @@ heap_xlog_lock_updated(XLogReaderState *record)
OffsetNumber offnum;
ItemId lp = NULL;
HeapTupleHeader htup;
+ RelFileNode rnode;
+ BlockNumber blkno;
+
+ XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
xlrec = (xl_heap_lock_updated *) XLogRecGetData(record);
@@ -9016,6 +9056,11 @@ heap_xlog_inplace(XLogReaderState *record)
HeapTupleHeader htup;
uint32 oldlen;
Size newlen;
+ RelFileNode rnode;
+ BlockNumber blkno;
+
+ XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
{
diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c
index 52231ac..57aa4f4 100644
--- a/src/backend/access/heap/pruneheap.c
+++ b/src/backend/access/heap/pruneheap.c
@@ -19,6 +19,7 @@
#include "access/transam.h"
#include "access/htup_details.h"
#include "access/xlog.h"
+#include "access/ptrack.h"
#include "catalog/catalog.h"
#include "miscadmin.h"
#include "pgstat.h"
@@ -227,6 +228,7 @@ heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin,
}
/* Any error while applying the changes is critical */
+ ptrack_add_block(relation, BufferGetBlockNumber(buffer));
START_CRIT_SECTION();
/* Have we found any prunable items? */
diff --git a/src/backend/access/heap/ptrack.c b/src/backend/access/heap/ptrack.c
new file mode 100644
index 0000000..5ac962f
--- /dev/null
+++ b/src/backend/access/heap/ptrack.c
@@ -0,0 +1,647 @@
+/*-------------------------------------------------------------------------
+ *
+ * ptrack.c
+ * bitmap for tracking updates of relation's pages
+ *
+ * IDENTIFICATION
+ * src/backend/access/heap/ptrack.c
+ *
+ * INTERFACE ROUTINES (PostgreSQL side)
+ *
+ * ptrack_add_block - set a bit to track dirtied page
+ * ptrack_add_block_redo - set a bit to track recovered page
+ * create_ptrack_init_file - create PTRACK_INIT_FILE
+ * in the given database directory
+ *
+ * EXTERNAL INTERFACE ROUTINES (Backup utility side)
+ * pg_ptrack_version() - Returns PTRACK version currently in use.
+ * pg_ptrack_control_lsn() - Gets LSN from ptrack_control file.
+ * pg_ptrack_clear() - Resets bits in all PTRACK files.
+ * This function must be called for each database in the cluster.
+ * pg_ptrack_get_and_clear(Oid tablespace_oid, Oid table_oid) -
+ * Reads a PTRACK file for the given relation and resets it.
+ * Returns the PTRACK content as bytea. It is essential to receive and clear the map
+ * atomically in order to avoid losing PTRACK bits because of race conditions.
+ * (Imagine that your backup tool reads the map, then some blocks of the relation are
+ * updated and the ptrack bits are set, after that the backup tool cleans up the map
+ * and resets ptrack_clear_lsn. So, we may lose some of the updates).
+ * This function must be called for each database in the cluster.
+ * pg_ptrack_init_get_and_clear(Oid db_oid, Oid tablespace_oid) -
+ * Checks whether PTRACK_INIT_FILE exists in the given database and deletes it.
+ * Returns true if the file was found. This function is analogous to
+ * pg_ptrack_get_and_clear(), but it handles directory-level changes
+ * (i.e. CREATE DATABASE, ALTER DATABASE SET TABLESPACE).
+ * This function must be called for each database in the cluster.
+ *
+ */
+
+#include "postgres.h"
+
+#include "access/heapam_xlog.h"
+#include "access/heapam.h"
+#include "access/ptrack.h"
+#include "access/xlog.h"
+#include "access/xlogutils.h"
+#include "access/skey.h"
+#include "access/genam.h"
+#include "access/generic_xlog.h"
+#include "catalog/pg_depend.h"
+#include "catalog/pg_tablespace.h"
+#include "access/htup_details.h"
+#include "miscadmin.h"
+#include "storage/bufmgr.h"
+#include "storage/lmgr.h"
+#include "storage/smgr.h"
+#include "utils/inval.h"
+#include "utils/array.h"
+#include "utils/relfilenodemap.h"
+#include "utils/builtins.h"
+#include "utils/pg_lsn.h"
+#include <unistd.h>
+#include <sys/stat.h>
+
+/* Effective data size */
+#define MAPSIZE (BLCKSZ - MAXALIGN(SizeOfPageHeaderData))
+
+/* Number of heap blocks we can represent in one byte. */
+#define HEAPBLOCKS_PER_BYTE (BITS_PER_BYTE / PTRACK_BITS_PER_HEAPBLOCK)
+
+/* Number of heap blocks we can represent in one ptrack map page. */
+#define HEAPBLOCKS_PER_PAGE (MAPSIZE * HEAPBLOCKS_PER_BYTE)
+
+/* Mapping from heap block number to the right bit in the ptrack map */
+#define HEAPBLK_TO_MAPBLOCK(x) ((x) / HEAPBLOCKS_PER_PAGE)
+#define HEAPBLK_TO_MAPBYTE(x) (((x) % HEAPBLOCKS_PER_PAGE) / HEAPBLOCKS_PER_BYTE)
+/* NOTE If you're going to increase PTRACK_BITS_PER_HEAPBLOCK, update macro below */
+#define HEAPBLK_TO_MAPBIT(x) ((x) % HEAPBLOCKS_PER_BYTE)
+
+bool ptrack_enable = false;
+
+static Buffer ptrack_readbuf(Relation rel, BlockNumber blkno, bool extend);
+static void ptrack_extend(Relation rel, BlockNumber nvmblocks);
+static void ptrack_set(BlockNumber heapBlk, Buffer ptrackBuf);
+
+void SetPtrackClearLSN(bool set_invalid);
+
+Datum pg_ptrack_clear(PG_FUNCTION_ARGS);
+Datum pg_ptrack_get_and_clear(PG_FUNCTION_ARGS);
+Datum pg_ptrack_get_and_clear_db(PG_FUNCTION_ARGS);
+Datum pg_ptrack_control_lsn(PG_FUNCTION_ARGS);
+
+static void drop_ptrack_init_file(char *dest_dir);
+
+/*
+ * Mark tracked memory block during recovery.
+ * We should not miss any recovery actions, including
+ * recovery from full-page writes.
+ */
+void
+ptrack_add_block_redo(RelFileNode rnode, BlockNumber heapBlk)
+{
+ Relation reln;
+ reln = CreateFakeRelcacheEntry(rnode);
+ ptrack_add_block(reln, heapBlk);
+ FreeFakeRelcacheEntry(reln);
+}
+
+/* Save tracked memory block inside critical zone */
+void
+ptrack_add_block(Relation rel, BlockNumber heapBlk)
+{
+ Buffer ptrackbuf = InvalidBuffer;
+
+ /*
+ * Do not track changes for unlogged and temp relations,
+ * since we are not going to backup them anyway.
+ */
+ if (rel->rd_rel->relpersistence != RELPERSISTENCE_PERMANENT)
+ return;
+
+ if (ptrack_enable)
+ {
+ BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
+ ptrackbuf = ptrack_readbuf(rel, mapBlock, true);
+ ptrack_set(heapBlk, ptrackbuf);
+ ReleaseBuffer(ptrackbuf);
+ }
+}
+
+/* Set one bit to buffer */
+static void
+ptrack_set(BlockNumber heapBlk, Buffer ptrackBuf)
+{
+ uint32 mapByte = HEAPBLK_TO_MAPBYTE(heapBlk);
+ uint8 mapOffset = HEAPBLK_TO_MAPBIT(heapBlk);
+ Page page;
+ char *map;
+
+ page = BufferGetPage(ptrackBuf);
+ map = PageGetContents(page);
+ LockBuffer(ptrackBuf, BUFFER_LOCK_SHARE);
+
+ /* Check if the bit already set */
+ if (!(map[mapByte] & (1 << mapOffset)))
+ {
+ /* Bad luck. Take an exclusive lock now after unlock share.*/
+ LockBuffer(ptrackBuf, BUFFER_LOCK_UNLOCK);
+ LockBuffer(ptrackBuf, BUFFER_LOCK_EXCLUSIVE);
+
+ /* The bit could have been set concurrently */
+ if (!(map[mapByte] & (1 << mapOffset)))
+ {
+ START_CRIT_SECTION();
+
+ map[mapByte] |= (1 << mapOffset);
+ MarkBufferDirty(ptrackBuf);
+
+ /*
+ * We don't have Xlog entry for ptrack, update pages
+ * on recovery instead.
+ */
+ END_CRIT_SECTION();
+ }
+ }
+
+ LockBuffer(ptrackBuf, BUFFER_LOCK_UNLOCK);
+}
+
+/*
+ * Read a ptrack map page.
+ *
+ * If the page doesn't exist, InvalidBuffer is returned, or if 'extend' is
+ * true, the ptrack map file is extended.
+ */
+static Buffer
+ptrack_readbuf(Relation rel, BlockNumber blkno, bool extend)
+{
+ Buffer buf;
+
+ /*
+ * We might not have opened the relation at the smgr level yet, or we
+ * might have been forced to close it by a sinval message. The code below
+ * won't necessarily notice relation extension immediately when extend =
+ * false, so we rely on sinval messages to ensure that our ideas about the
+ * size of the map aren't too far out of date.
+ */
+ RelationOpenSmgr(rel);
+
+ /*
+ * If we haven't cached the size of the ptrack map fork yet, check it
+ * first.
+ */
+ if (rel->rd_smgr->smgr_ptrack_nblocks == InvalidBlockNumber)
+ {
+ if (smgrexists(rel->rd_smgr, PAGESTRACK_FORKNUM))
+ rel->rd_smgr->smgr_ptrack_nblocks = smgrnblocks(rel->rd_smgr,
+ PAGESTRACK_FORKNUM);
+ else
+ rel->rd_smgr->smgr_ptrack_nblocks = 0;
+ }
+
+ /* Handle requests beyond EOF */
+ if (blkno >= rel->rd_smgr->smgr_ptrack_nblocks)
+ {
+ if (extend)
+ ptrack_extend(rel, blkno + 1);
+ else
+ return InvalidBuffer;
+ }
+
+ /* We should never miss updated pages, so error out if page is corrupted */
+ buf = ReadBufferExtended(rel, PAGESTRACK_FORKNUM, blkno,
+ RBM_NORMAL, NULL);
+
+ if (PageIsNew(BufferGetPage(buf)))
+ PageInit(BufferGetPage(buf), BLCKSZ, 0);
+
+ return buf;
+}
+
+/*
+ * Ensure that the ptrack map fork is at least ptrack_nblocks long, extending
+ * it if necessary with zeroed pages.
+ */
+static void
+ptrack_extend(Relation rel, BlockNumber ptrack_nblocks)
+{
+ BlockNumber ptrack_nblocks_now;
+ Page pg;
+
+ pg = (Page) palloc(BLCKSZ);
+ PageInit(pg, BLCKSZ, 0);
+
+ /*
+ * We use the relation extension lock to lock out other backends trying to
+ * extend the ptrack map at the same time. It also locks out extension
+ * of the main fork, unnecessarily, but extending the ptrack map
+ * happens seldom enough that it doesn't seem worthwhile to have a
+ * separate lock tag type for it.
+ *
+ * Note that another backend might have extended or created the relation
+ * by the time we get the lock.
+ */
+ LockRelationForExtension(rel, ExclusiveLock);
+
+ /* Might have to re-open if a cache flush happened */
+ RelationOpenSmgr(rel);
+
+ /*
+ * Create the file first if it doesn't exist. If smgr_ptrack_nblocks is
+ * positive then it must exist, no need for an smgrexists call.
+ */
+ if ((rel->rd_smgr->smgr_ptrack_nblocks == 0 ||
+ rel->rd_smgr->smgr_ptrack_nblocks == InvalidBlockNumber) &&
+ !smgrexists(rel->rd_smgr, PAGESTRACK_FORKNUM))
+ smgrcreate(rel->rd_smgr, PAGESTRACK_FORKNUM, false);
+
+ ptrack_nblocks_now = smgrnblocks(rel->rd_smgr, PAGESTRACK_FORKNUM);
+
+ /* Now extend the file */
+ while (ptrack_nblocks_now < ptrack_nblocks)
+ {
+ PageSetChecksumInplace(pg, ptrack_nblocks_now);
+
+ smgrextend(rel->rd_smgr, PAGESTRACK_FORKNUM, ptrack_nblocks_now,
+ (char *) pg, false);
+ ptrack_nblocks_now++;
+ }
+
+ /*
+ * Send a shared-inval message to force other backends to close any smgr
+ * references they may have for this rel, which we are about to change.
+ * This is a useful optimization because it means that backends don't have
+ * to keep checking for creation or extension of the file, which happens
+ * infrequently.
+ */
+ CacheInvalidateSmgr(rel->rd_smgr->smgr_rnode);
+
+ /* Update local cache with the up-to-date size */
+ rel->rd_smgr->smgr_ptrack_nblocks = ptrack_nblocks_now;
+
+ UnlockRelationForExtension(rel, ExclusiveLock);
+
+ pfree(pg);
+}
+
+
+
+/* Clear all blocks of relation's ptrack map */
+static void
+ptrack_clear_one_rel(Oid relid)
+{
+ BlockNumber nblock;
+ Relation rel = relation_open(relid, AccessShareLock);
+
+ RelationOpenSmgr(rel);
+
+ if (rel->rd_smgr == NULL)
+ {
+ relation_close(rel, AccessShareLock);
+ return;
+ }
+
+ LockRelationForExtension(rel, ExclusiveLock);
+
+ if (rel->rd_smgr->smgr_ptrack_nblocks == InvalidBlockNumber)
+ {
+ if (smgrexists(rel->rd_smgr, PAGESTRACK_FORKNUM))
+ rel->rd_smgr->smgr_ptrack_nblocks = smgrnblocks(rel->rd_smgr,
+ PAGESTRACK_FORKNUM);
+ else
+ rel->rd_smgr->smgr_ptrack_nblocks = 0;
+ }
+
+ for(nblock = 0; nblock < rel->rd_smgr->smgr_ptrack_nblocks; nblock++)
+ {
+ Buffer buf = ReadBufferExtended(rel, PAGESTRACK_FORKNUM,
+ nblock, RBM_ZERO_ON_ERROR, NULL);
+ Page page = BufferGetPage(buf);
+ char *map = PageGetContents(page);
+
+ LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
+
+ START_CRIT_SECTION();
+ MemSet(map, 0, MAPSIZE);
+ MarkBufferDirty(buf);
+ END_CRIT_SECTION();
+
+ UnlockReleaseBuffer(buf);
+ }
+
+ UnlockRelationForExtension(rel, ExclusiveLock);
+ relation_close(rel, AccessShareLock);
+ return;
+}
+
+/* Clear all ptrack files */
+void
+ptrack_clear(void)
+{
+ HeapTuple tuple;
+ Relation catalog = heap_open(RelationRelationId, AccessShareLock);
+ SysScanDesc scan = systable_beginscan(catalog, InvalidOid, false, NULL, 0, NULL);
+
+ while (HeapTupleIsValid(tuple = systable_getnext(scan)))
+ {
+ ptrack_clear_one_rel(HeapTupleGetOid(tuple));
+ }
+
+ systable_endscan(scan);
+ heap_close(catalog, AccessShareLock);
+
+ /*
+ * Update ptrack_enabled_lsn to know
+ * that we track all changes since this LSN.
+ */
+ SetPtrackClearLSN(false);
+}
+
+ * Get ptrack file as bytea and clear it */
+bytea *
+ptrack_get_and_clear(Oid tablespace_oid, Oid table_oid)
+{
+ bytea *result = NULL;
+ BlockNumber nblock;
+ Relation rel = RelationIdGetRelation(RelidByRelfilenode(tablespace_oid, table_oid));
+
+ if (table_oid == InvalidOid)
+ {
+ elog(WARNING, "InvalidOid");
+ goto full_end;
+ }
+
+ if (rel == InvalidRelation)
+ {
+ elog(WARNING, "InvalidRelation");
+ goto full_end;
+ }
+
+ RelationOpenSmgr(rel);
+ if (rel->rd_smgr == NULL)
+ goto end_rel;
+
+ LockRelationForExtension(rel, ExclusiveLock);
+
+ if (rel->rd_smgr->smgr_ptrack_nblocks == InvalidBlockNumber)
+ {
+ if (smgrexists(rel->rd_smgr, PAGESTRACK_FORKNUM))
+ rel->rd_smgr->smgr_ptrack_nblocks = smgrnblocks(rel->rd_smgr,
+ PAGESTRACK_FORKNUM);
+ else
+ rel->rd_smgr->smgr_ptrack_nblocks = 0;
+ }
+ if (rel->rd_smgr->smgr_ptrack_nblocks == 0)
+ {
+ UnlockRelationForExtension(rel, ExclusiveLock);
+ goto end_rel;
+ }
+ result = (bytea *) palloc(rel->rd_smgr->smgr_ptrack_nblocks*MAPSIZE + VARHDRSZ);
+ SET_VARSIZE(result, rel->rd_smgr->smgr_ptrack_nblocks*MAPSIZE + VARHDRSZ);
+
+ for(nblock = 0; nblock < rel->rd_smgr->smgr_ptrack_nblocks; nblock++)
+ {
+ Buffer buf = ReadBufferExtended(rel, PAGESTRACK_FORKNUM,
+ nblock, RBM_ZERO_ON_ERROR, NULL);
+ Page page = BufferGetPage(buf);
+ char *map = PageGetContents(page);
+ LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
+ START_CRIT_SECTION();
+ memcpy(VARDATA(result) + nblock*MAPSIZE, map, MAPSIZE);
+ MemSet(map, 0, MAPSIZE);
+ MarkBufferDirty(buf);
+ END_CRIT_SECTION();
+ LockBuffer(buf, BUFFER_LOCK_UNLOCK);
+ ReleaseBuffer(buf);
+ }
+
+ UnlockRelationForExtension(rel, ExclusiveLock);
+ end_rel:
+ RelationClose(rel);
+
+ /*
+ * Update ptrack_enabled_lsn to know
+ * that we track all changes since this LSN.
+ */
+ SetPtrackClearLSN(false);
+ full_end:
+ if (result == NULL)
+ {
+ result = palloc0(VARHDRSZ);
+ SET_VARSIZE(result, VARHDRSZ);
+ }
+
+ return result;
+}
+
+
+/*
+ * Reset LSN in ptrack_control file.
+ * If server started with ptrack_enable = off,
+ * set ptrack_enabled_lsn to InvalidXLogRecPtr,
+ * otherwise set it to current lsn.
+ *
+ * Also we update the value after ptrack_clear() call,
+ * to to know that we track all changes since this LSN.
+ *
+ * Judging by this value, we can say, if it's legal to perform incremental
+ * ptrack backup, or we had lost ptrack mapping since previous backup and
+ * must do full backup now.
+ */
+void
+SetPtrackClearLSN(bool set_invalid)
+{
+ int fd;
+ XLogRecPtr ptrack_enabled_lsn;
+ char file_path[MAXPGPATH];
+
+ ptrack_enabled_lsn = (set_invalid)?
+ InvalidXLogRecPtr : GetXLogInsertRecPtr();
+
+ join_path_components(file_path, DataDir, "global/ptrack_control");
+ canonicalize_path(file_path);
+
+ fd = BasicOpenFile(file_path,
+ O_RDWR | O_CREAT | PG_BINARY,
+ S_IRUSR | S_IWUSR);
+ if (fd < 0)
+ ereport(PANIC,
+ (errcode_for_file_access(),
+ errmsg("could not create ptrack control file \"%s\": %m",
+ "global/ptrack_control")));
+
+ errno = 0;
+ if (write(fd, &ptrack_enabled_lsn, sizeof(XLogRecPtr)) != sizeof(XLogRecPtr))
+ {
+ /* if write didn't set errno, assume problem is no disk space */
+ if (errno == 0)
+ errno = ENOSPC;
+ ereport(PANIC,
+ (errcode_for_file_access(),
+ errmsg("could not write to ptrack control file: %m")));
+ }
+
+ if (pg_fsync(fd) != 0)
+ ereport(PANIC,
+ (errcode_for_file_access(),
+ errmsg("could not fsync ptrack control file: %m")));
+
+ if (close(fd))
+ ereport(PANIC,
+ (errcode_for_file_access(),
+ errmsg("could not close ptrack control file: %m")));
+}
+
+/*
+ * If we disabled ptrack_enable, reset ptrack_enabled_lsn in ptrack_control
+ * file, to know, that it's illegal to perform incremental ptrack backup.
+ */
+void
+assign_ptrack_enable(bool newval, void *extra)
+{
+ if(DataDir != NULL && !IsBootstrapProcessingMode() && !newval)
+ SetPtrackClearLSN(true);
+}
+
+/* Clear all ptrack files */
+Datum
+pg_ptrack_clear(PG_FUNCTION_ARGS)
+{
+ if (!superuser() && !has_rolreplication(GetUserId()))
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ (errmsg("must be superuser or replication role to clear ptrack files"))));
+
+ ptrack_clear();
+
+ PG_RETURN_VOID();
+}
+
+/* Read all ptrack files and clear them afterwards */
+Datum
+pg_ptrack_get_and_clear(PG_FUNCTION_ARGS)
+{
+ if (!superuser() && !has_rolreplication(GetUserId()))
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ (errmsg("must be superuser or replication role to clear ptrack files"))));
+
+ PG_RETURN_BYTEA_P(ptrack_get_and_clear(PG_GETARG_OID(0), PG_GETARG_OID(1)));
+}
+
+/*
+ * Check if PTRACK_INIT_FILE exits in the given database
+ * and delete it.
+ * Args: dbOid and tblspcOid
+ * Return true if file existed.
+ */
+Datum
+pg_ptrack_get_and_clear_db(PG_FUNCTION_ARGS)
+{
+ char *db_path = GetDatabasePath(PG_GETARG_OID(0), PG_GETARG_OID(1));
+ struct stat buf;
+ char ptrack_init_file_path[MAXPGPATH];
+
+ if (!superuser() && !has_rolreplication(GetUserId()))
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ (errmsg("must be superuser or replication role to clear ptrack files"))));
+
+ snprintf(ptrack_init_file_path, sizeof(ptrack_init_file_path), "%s/%s", db_path, PTRACK_INIT_FILE);
+
+ if (stat(ptrack_init_file_path, &buf) == -1 && errno == ENOENT)
+ PG_RETURN_BOOL(false);
+ else if (!S_ISREG(buf.st_mode))
+ PG_RETURN_BOOL(false);
+ else
+ {
+ drop_ptrack_init_file(db_path);
+ PG_RETURN_BOOL(true);
+ }
+}
+
+/*
+ * Create PTRACK_INIT_FILE which allows to track changes
+ * on directory level made by operations which do not go
+ * through Shared Buffers.
+ */
+void
+create_ptrack_init_file(char *dest_dir)
+{
+ int dstfd;
+
+ char ptrack_init_file_path[MAXPGPATH];
+ snprintf(ptrack_init_file_path, sizeof(ptrack_init_file_path), "%s/%s", dest_dir, PTRACK_INIT_FILE);
+
+ dstfd = OpenTransientFile(ptrack_init_file_path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
+ S_IRUSR | S_IWUSR);
+ if (dstfd < 0)
+ {
+ if (errno != EEXIST)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not create file \"%s\": %m", ptrack_init_file_path)));
+ }
+ else if (CloseTransientFile(dstfd))
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not close file \"%s\": %m", ptrack_init_file_path)));
+}
+
+/* Delete PTRACK_INIT_FILE */
+void
+drop_ptrack_init_file(char *dest_dir)
+{
+ char ptrack_init_file_path[MAXPGPATH];
+ snprintf(ptrack_init_file_path, sizeof(ptrack_init_file_path), "%s/%s", dest_dir, PTRACK_INIT_FILE);
+
+ if (unlink(ptrack_init_file_path) != 0)
+ {
+ if (errno != ENOENT)
+ ereport(WARNING,
+ (errcode_for_file_access(),
+ errmsg("could not remove file \"%s\": %m", ptrack_init_file_path)));
+ }
+}
+
+/*
+ * Returns ptrack version currently in use.
+ */
+PG_FUNCTION_INFO_V1(ptrack_version);
+Datum
+ptrack_version(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_TEXT_P(cstring_to_text(PTRACK_VERSION));
+}
+
+/* Get lsn from ptrack_control file */
+Datum
+pg_ptrack_control_lsn(PG_FUNCTION_ARGS)
+{
+ int fd;
+ char file_path[MAXPGPATH];
+ XLogRecPtr lsn = 0;
+
+ if (!superuser() && !has_rolreplication(GetUserId()))
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ (errmsg("must be superuser or replication role read ptrack files"))));
+ join_path_components(file_path, DataDir, "global/ptrack_control");
+ canonicalize_path(file_path);
+
+ fd = BasicOpenFile(file_path, O_RDONLY | PG_BINARY, 0);
+ if (fd < 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not open file \"%s\" for reading: %m",
+ file_path)));
+
+ if (read(fd, &lsn, sizeof(XLogRecPtr)) != sizeof(XLogRecPtr))
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not read content of the file \"%s\" %m",
+ file_path)));
+
+ close(fd);
+
+ PG_RETURN_LSN(lsn);
+}
diff --git a/src/backend/access/heap/rewriteheap.c b/src/backend/access/heap/rewriteheap.c
index bd560e4..a74971e 100644
--- a/src/backend/access/heap/rewriteheap.c
+++ b/src/backend/access/heap/rewriteheap.c
@@ -114,6 +114,7 @@
#include "access/tuptoaster.h"
#include "access/xact.h"
#include "access/xloginsert.h"
+#include "access/ptrack.h"
#include "catalog/catalog.h"
@@ -332,11 +333,15 @@ end_heap_rewrite(RewriteState state)
if (state->rs_buffer_valid)
{
if (state->rs_use_wal)
+ {
+ /* Don't forget to set ptrack bit even if we're skipping bufmgr stage */
+ ptrack_add_block(state->rs_new_rel, state->rs_blockno);
log_newpage(&state->rs_new_rel->rd_node,
MAIN_FORKNUM,
state->rs_blockno,
state->rs_buffer,
true);
+ }
RelationOpenSmgr(state->rs_new_rel);
PageSetChecksumInplace(state->rs_buffer, state->rs_blockno);
@@ -681,11 +686,15 @@ raw_heap_insert(RewriteState state, HeapTuple tup)
/* XLOG stuff */
if (state->rs_use_wal)
+ {
+ /* Don't forget to set ptrack bit even if we're skipping bufmgr stage */
+ ptrack_add_block(state->rs_new_rel, state->rs_blockno);
log_newpage(&state->rs_new_rel->rd_node,
MAIN_FORKNUM,
state->rs_blockno,
page,
true);
+ }
/*
* Now write the page. We say isTemp = true even if it's not a
diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c
index 4aca7e4..68a482a 100644
--- a/src/backend/access/nbtree/nbtinsert.c
+++ b/src/backend/access/nbtree/nbtinsert.c
@@ -20,6 +20,7 @@
#include "access/nbtxlog.h"
#include "access/transam.h"
#include "access/xloginsert.h"
+#include "access/ptrack.h"
#include "miscadmin.h"
#include "storage/lmgr.h"
#include "storage/predicate.h"
@@ -838,6 +839,11 @@ _bt_insertonpg(Relation rel,
}
/* Do the update. No ereport(ERROR) until changes are logged */
+ ptrack_add_block(rel, BufferGetBlockNumber(buf));
+ if (BufferIsValid(metabuf))
+ ptrack_add_block(rel, BufferGetBlockNumber(metabuf));
+ if (BufferIsValid(cbuf))
+ ptrack_add_block(rel, BufferGetBlockNumber(cbuf));
START_CRIT_SECTION();
if (!_bt_pgaddtup(page, itemsz, itup, newitemoff))
@@ -1224,6 +1230,12 @@ _bt_split(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber firstright,
* not starting the critical section till here because we haven't been
* scribbling on the original page yet; see comments above.
*/
+ ptrack_add_block(rel, BufferGetBlockNumber(buf));
+ ptrack_add_block(rel, BufferGetBlockNumber(rbuf));
+ if (!P_RIGHTMOST(ropaque))
+ ptrack_add_block(rel, BufferGetBlockNumber(sbuf));
+ if (BufferIsValid(cbuf))
+ ptrack_add_block(rel, BufferGetBlockNumber(cbuf));
START_CRIT_SECTION();
/*
@@ -1976,6 +1988,9 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
ItemPointerSet(&(right_item->t_tid), rbkno, P_HIKEY);
/* NO EREPORT(ERROR) from here till newroot op is logged */
+ ptrack_add_block(rel, BufferGetBlockNumber(lbuf));
+ ptrack_add_block(rel, BufferGetBlockNumber(rootbuf));
+ ptrack_add_block(rel, BufferGetBlockNumber(metabuf));
START_CRIT_SECTION();
/* set btree special data */
diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c
index 5c817b6..7eee744 100644
--- a/src/backend/access/nbtree/nbtpage.c
+++ b/src/backend/access/nbtree/nbtpage.c
@@ -27,6 +27,7 @@
#include "access/transam.h"
#include "access/xlog.h"
#include "access/xloginsert.h"
+#include "access/ptrack.h"
#include "miscadmin.h"
#include "storage/indexfsm.h"
#include "storage/lmgr.h"
@@ -222,6 +223,8 @@ _bt_getroot(Relation rel, int access)
rootopaque->btpo_cycleid = 0;
/* NO ELOG(ERROR) till meta is updated */
+ ptrack_add_block(rel, BufferGetBlockNumber(rootbuf));
+ ptrack_add_block(rel, BufferGetBlockNumber(metabuf));
START_CRIT_SECTION();
metad->btm_root = rootblkno;
@@ -794,6 +797,7 @@ _bt_delitems_vacuum(Relation rel, Buffer buf,
BTPageOpaque opaque;
/* No ereport(ERROR) until changes are logged */
+ ptrack_add_block(rel, BufferGetBlockNumber(buf));
START_CRIT_SECTION();
/* Fix the page */
@@ -870,6 +874,7 @@ _bt_delitems_delete(Relation rel, Buffer buf,
Assert(nitems > 0);
/* No ereport(ERROR) until changes are logged */
+ ptrack_add_block(rel, BufferGetBlockNumber(buf));
START_CRIT_SECTION();
/* Fix the page */
@@ -1410,6 +1415,8 @@ _bt_mark_page_halfdead(Relation rel, Buffer leafbuf, BTStack stack)
PredicateLockPageCombine(rel, leafblkno, leafrightsib);
/* No ereport(ERROR) until changes are logged */
+ ptrack_add_block(rel, BufferGetBlockNumber(topparent));
+ ptrack_add_block(rel, BufferGetBlockNumber(leafbuf));
START_CRIT_SECTION();
/*
@@ -1732,6 +1739,14 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty)
*/
/* No ereport(ERROR) until changes are logged */
+ ptrack_add_block(rel, BufferGetBlockNumber(rbuf));
+ ptrack_add_block(rel, BufferGetBlockNumber(buf));
+ if (BufferIsValid(lbuf))
+ ptrack_add_block(rel, BufferGetBlockNumber(lbuf));
+ if (BufferIsValid(metabuf))
+ ptrack_add_block(rel, BufferGetBlockNumber(metabuf));
+ if (target != leafblkno)
+ ptrack_add_block(rel, BufferGetBlockNumber(leafbuf));
START_CRIT_SECTION();
/*
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index 3dbafdd..f39d634 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -21,6 +21,7 @@
#include "access/nbtree.h"
#include "access/relscan.h"
#include "access/xlog.h"
+#include "access/ptrack.h"
#include "catalog/index.h"
#include "commands/vacuum.h"
#include "pgstat.h"
@@ -297,6 +298,7 @@ btbuildempty(Relation index)
PageSetChecksumInplace(metapage, BTREE_METAPAGE);
smgrwrite(index->rd_smgr, INIT_FORKNUM, BTREE_METAPAGE,
(char *) metapage, true);
+
log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
BTREE_METAPAGE, metapage, false);
diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c
index bf6c03c..63f50fb 100644
--- a/src/backend/access/nbtree/nbtsort.c
+++ b/src/backend/access/nbtree/nbtsort.c
@@ -69,6 +69,7 @@
#include "access/nbtree.h"
#include "access/xlog.h"
#include "access/xloginsert.h"
+#include "access/ptrack.h"
#include "miscadmin.h"
#include "storage/smgr.h"
#include "tcop/tcopprot.h"
@@ -276,8 +277,12 @@ _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno)
/* XLOG stuff */
if (wstate->btws_use_wal)
{
+ /* Don't forget to set ptrack bit even if we're skipping bufmgr stage */
+ ptrack_add_block(wstate->index, blkno);
/* We use the heap NEWPAGE record type for this */
log_newpage(&wstate->index->rd_node, MAIN_FORKNUM, blkno, page, true);
+ /* Ensure rd_smgr is open (could have been closed by relcache flush!) */
+ RelationOpenSmgr(wstate->index);
}
/*
diff --git a/src/backend/access/nbtree/nbtxlog.c b/src/backend/access/nbtree/nbtxlog.c
index d67241c..bfd1564 100644
--- a/src/backend/access/nbtree/nbtxlog.c
+++ b/src/backend/access/nbtree/nbtxlog.c
@@ -21,6 +21,7 @@
#include "access/transam.h"
#include "access/xlog.h"
#include "access/xlogutils.h"
+#include "access/ptrack.h"
#include "storage/procarray.h"
#include "miscadmin.h"
@@ -84,6 +85,11 @@ _bt_restore_meta(XLogReaderState *record, uint8 block_id)
xl_btree_metadata *xlrec;
char *ptr;
Size len;
+ RelFileNode rnode;
+ BlockNumber blkno;
+
+ XLogRecGetBlockTag(record, block_id, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
metabuf = XLogInitBufferForRedo(record, block_id);
ptr = XLogRecGetBlockData(record, block_id, &len);
@@ -129,6 +135,11 @@ _bt_clear_incomplete_split(XLogReaderState *record, uint8 block_id)
{
XLogRecPtr lsn = record->EndRecPtr;
Buffer buf;
+ RelFileNode rnode;
+ BlockNumber blkno;
+
+ XLogRecGetBlockTag(record, block_id, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
if (XLogReadBufferForRedo(record, block_id, &buf) == BLK_NEEDS_REDO)
{
@@ -152,6 +163,11 @@ btree_xlog_insert(bool isleaf, bool ismeta, XLogReaderState *record)
xl_btree_insert *xlrec = (xl_btree_insert *) XLogRecGetData(record);
Buffer buffer;
Page page;
+ RelFileNode rnode;
+ BlockNumber blkno;
+
+ XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
/*
* Insertion to an internal page finishes an incomplete split at the child
@@ -209,11 +225,17 @@ btree_xlog_split(bool onleft, bool isroot, XLogReaderState *record)
BlockNumber leftsib;
BlockNumber rightsib;
BlockNumber rnext;
+ RelFileNode rnode;
+
+ XLogRecGetBlockTag(record, 0, &rnode, NULL, &leftsib);
+ ptrack_add_block_redo(rnode, leftsib);
+ XLogRecGetBlockTag(record, 1, &rnode, NULL, &rightsib);
+ ptrack_add_block_redo(rnode, rightsib);
- XLogRecGetBlockTag(record, 0, NULL, NULL, &leftsib);
- XLogRecGetBlockTag(record, 1, NULL, NULL, &rightsib);
if (!XLogRecGetBlockTag(record, 2, NULL, NULL, &rnext))
rnext = P_NONE;
+ else
+ ptrack_add_block_redo(rnode, rnext);
/*
* Clear the incomplete split flag on the left sibling of the child page
@@ -390,6 +412,12 @@ btree_xlog_vacuum(XLogReaderState *record)
Buffer buffer;
Page page;
BTPageOpaque opaque;
+ RelFileNode rnode;
+ BlockNumber blkno;
+
+ XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
+
#ifdef UNUSED
xl_btree_vacuum *xlrec = (xl_btree_vacuum *) XLogRecGetData(record);
@@ -568,6 +596,7 @@ btree_xlog_delete_get_latestRemovedXid(XLogReaderState *record)
* overkill, but it's safe, and certainly better than panicking here.
*/
XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
ibuffer = XLogReadBufferExtended(rnode, MAIN_FORKNUM, blkno, RBM_NORMAL);
if (!BufferIsValid(ibuffer))
return InvalidTransactionId;
@@ -593,6 +622,7 @@ btree_xlog_delete_get_latestRemovedXid(XLogReaderState *record)
*/
hblkno = ItemPointerGetBlockNumber(&(itup->t_tid));
hbuffer = XLogReadBufferExtended(xlrec->hnode, MAIN_FORKNUM, hblkno, RBM_NORMAL);
+ ptrack_add_block_redo(rnode, hblkno);
if (!BufferIsValid(hbuffer))
{
UnlockReleaseBuffer(ibuffer);
@@ -667,6 +697,11 @@ btree_xlog_delete(XLogReaderState *record)
Buffer buffer;
Page page;
BTPageOpaque opaque;
+ RelFileNode rnode;
+ BlockNumber blkno;
+
+ XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
/*
* If we have any conflict processing to do, it must happen before we
@@ -729,6 +764,13 @@ btree_xlog_mark_page_halfdead(uint8 info, XLogReaderState *record)
Page page;
BTPageOpaque pageop;
IndexTupleData trunctuple;
+ RelFileNode rnode;
+ BlockNumber blkno;
+
+ XLogRecGetBlockTag(record, 1, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
+ XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
/*
* In normal operation, we would lock all the pages this WAL record
@@ -812,10 +854,27 @@ btree_xlog_unlink_page(uint8 info, XLogReaderState *record)
Buffer buffer;
Page page;
BTPageOpaque pageop;
+ RelFileNode rnode;
+ BlockNumber blkno;
leftsib = xlrec->leftsib;
rightsib = xlrec->rightsib;
+ XLogRecGetBlockTag(record, 2, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
+ if (leftsib != P_NONE)
+ {
+ XLogRecGetBlockTag(record, 1, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
+ }
+ XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
+ if (XLogRecHasBlockRef(record, 3))
+ {
+ XLogRecGetBlockTag(record, 3, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
+ }
+
/*
* In normal operation, we would lock all the pages this WAL record
* touches before changing any of them. In WAL replay, it should be okay
@@ -926,6 +985,11 @@ btree_xlog_newroot(XLogReaderState *record)
BTPageOpaque pageop;
char *ptr;
Size len;
+ RelFileNode rnode;
+ BlockNumber blkno;
+
+ XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
buffer = XLogInitBufferForRedo(record, 0);
page = (Page) BufferGetPage(buffer);
diff --git a/src/backend/access/spgist/spgdoinsert.c b/src/backend/access/spgist/spgdoinsert.c
index b0702a7..8b6fd84 100644
--- a/src/backend/access/spgist/spgdoinsert.c
+++ b/src/backend/access/spgist/spgdoinsert.c
@@ -19,6 +19,7 @@
#include "access/spgist_private.h"
#include "access/spgxlog.h"
#include "access/xloginsert.h"
+#include "access/ptrack.h"
#include "miscadmin.h"
#include "storage/bufmgr.h"
#include "utils/rel.h"
@@ -214,6 +215,9 @@ addLeafTuple(Relation index, SpGistState *state, SpGistLeafTuple leafTuple,
xlrec.offnumParent = InvalidOffsetNumber;
xlrec.nodeI = 0;
+ ptrack_add_block(index, BufferGetBlockNumber(current->buffer));
+ if (parent->buffer)
+ ptrack_add_block(index, BufferGetBlockNumber(parent->buffer));
START_CRIT_SECTION();
if (current->offnum == InvalidOffsetNumber ||
@@ -458,6 +462,9 @@ moveLeafs(Relation index, SpGistState *state,
leafdata = leafptr = palloc(size);
+ ptrack_add_block(index, BufferGetBlockNumber(current->buffer));
+ ptrack_add_block(index, BufferGetBlockNumber(nbuf));
+ ptrack_add_block(index, BufferGetBlockNumber(parent->buffer));
START_CRIT_SECTION();
/* copy all the old tuples to new page, unless they're dead */
@@ -1110,6 +1117,14 @@ doPickSplit(Relation index, SpGistState *state,
leafdata = leafptr = (char *) palloc(totalLeafSizes);
/* Here we begin making the changes to the target pages */
+ if (current->buffer != InvalidBuffer)
+ ptrack_add_block(index, current->blkno);
+ if (parent->buffer != InvalidBuffer)
+ ptrack_add_block(index, parent->blkno);
+ if (newInnerBuffer != InvalidBuffer)
+ ptrack_add_block(index, BufferGetBlockNumber(newInnerBuffer));
+ if (newLeafBuffer != InvalidBuffer)
+ ptrack_add_block(index, BufferGetBlockNumber(newLeafBuffer));
START_CRIT_SECTION();
/*
@@ -1520,6 +1535,7 @@ spgAddNodeAction(Relation index, SpGistState *state,
/*
* We can replace the inner tuple by new version in-place
*/
+ ptrack_add_block(index, BufferGetBlockNumber(current->buffer));
START_CRIT_SECTION();
PageIndexTupleDelete(current->page, current->offnum);
@@ -1603,6 +1619,9 @@ spgAddNodeAction(Relation index, SpGistState *state,
else
xlrec.parentBlk = 2;
+ ptrack_add_block(index, BufferGetBlockNumber(current->buffer));
+ ptrack_add_block(index, BufferGetBlockNumber(saveCurrent.buffer));
+ ptrack_add_block(index, BufferGetBlockNumber(parent->buffer));
START_CRIT_SECTION();
/* insert new ... */
@@ -1788,6 +1807,9 @@ spgSplitNodeAction(Relation index, SpGistState *state,
&xlrec.newPage);
}
+ if (newBuffer != InvalidBuffer)
+ ptrack_add_block(index, BufferGetBlockNumber(newBuffer));
+ ptrack_add_block(index, BufferGetBlockNumber(current->buffer));
START_CRIT_SECTION();
/*
diff --git a/src/backend/access/spgist/spginsert.c b/src/backend/access/spgist/spginsert.c
index e4b2c29..e30217f 100644
--- a/src/backend/access/spgist/spginsert.c
+++ b/src/backend/access/spgist/spginsert.c
@@ -21,6 +21,7 @@
#include "access/spgxlog.h"
#include "access/xlog.h"
#include "access/xloginsert.h"
+#include "access/ptrack.h"
#include "catalog/index.h"
#include "miscadmin.h"
#include "storage/bufmgr.h"
@@ -91,6 +92,9 @@ spgbuild(Relation heap, Relation index, IndexInfo *indexInfo)
Assert(BufferGetBlockNumber(rootbuffer) == SPGIST_ROOT_BLKNO);
Assert(BufferGetBlockNumber(nullbuffer) == SPGIST_NULL_BLKNO);
+ ptrack_add_block(index, BufferGetBlockNumber(metabuffer));
+ ptrack_add_block(index, BufferGetBlockNumber(rootbuffer));
+ ptrack_add_block(index, BufferGetBlockNumber(nullbuffer));
START_CRIT_SECTION();
SpGistInitMetapage(BufferGetPage(metabuffer));
diff --git a/src/backend/access/spgist/spgvacuum.c b/src/backend/access/spgist/spgvacuum.c
index d7d5e90..b12c491 100644
--- a/src/backend/access/spgist/spgvacuum.c
+++ b/src/backend/access/spgist/spgvacuum.c
@@ -20,6 +20,7 @@
#include "access/spgxlog.h"
#include "access/transam.h"
#include "access/xloginsert.h"
+#include "access/ptrack.h"
#include "catalog/storage_xlog.h"
#include "commands/vacuum.h"
#include "miscadmin.h"
@@ -324,6 +325,7 @@ vacuumLeafPage(spgBulkDeleteState *bds, Relation index, Buffer buffer,
elog(ERROR, "inconsistent counts of deletable tuples");
/* Do the updates */
+ ptrack_add_block(index, BufferGetBlockNumber(buffer));
START_CRIT_SECTION();
spgPageIndexMultiDelete(&bds->spgstate, page,
@@ -448,6 +450,7 @@ vacuumLeafRoot(spgBulkDeleteState *bds, Relation index, Buffer buffer)
return; /* nothing more to do */
/* Do the update */
+ ptrack_add_block(index, BufferGetBlockNumber(buffer));
START_CRIT_SECTION();
/* The tuple numbers are in order, so we can use PageIndexMultiDelete */
@@ -505,6 +508,7 @@ vacuumRedirectAndPlaceholder(Relation index, Buffer buffer)
xlrec.nToPlaceholder = 0;
xlrec.newestRedirectXid = InvalidTransactionId;
+ ptrack_add_block(index, BufferGetBlockNumber(buffer));
START_CRIT_SECTION();
/*
diff --git a/src/backend/access/spgist/spgxlog.c b/src/backend/access/spgist/spgxlog.c
index 87def79..9ab5ede 100644
--- a/src/backend/access/spgist/spgxlog.c
+++ b/src/backend/access/spgist/spgxlog.c
@@ -20,6 +20,7 @@
#include "access/transam.h"
#include "access/xlog.h"
#include "access/xlogutils.h"
+#include "access/ptrack.h"
#include "storage/standby.h"
#include "utils/memutils.h"
@@ -78,6 +79,15 @@ spgRedoCreateIndex(XLogReaderState *record)
XLogRecPtr lsn = record->EndRecPtr;
Buffer buffer;
Page page;
+ RelFileNode rnode;
+ BlockNumber blkno;
+
+ XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
+ XLogRecGetBlockTag(record, 1, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
+ XLogRecGetBlockTag(record, 2, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
buffer = XLogInitBufferForRedo(record, 0);
Assert(BufferGetBlockNumber(buffer) == SPGIST_METAPAGE_BLKNO);
@@ -115,6 +125,16 @@ spgRedoAddLeaf(XLogReaderState *record)
Buffer buffer;
Page page;
XLogRedoAction action;
+ RelFileNode rnode;
+ BlockNumber blkno;
+
+ XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
+ if (xldata->offnumParent != InvalidOffsetNumber)
+ {
+ XLogRecGetBlockTag(record, 1, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
+ }
ptr += sizeof(spgxlogAddLeaf);
leafTuple = ptr;
@@ -215,8 +235,15 @@ spgRedoMoveLeafs(XLogReaderState *record)
Page page;
XLogRedoAction action;
BlockNumber blknoDst;
+ RelFileNode rnode;
+ BlockNumber blkno;
- XLogRecGetBlockTag(record, 1, NULL, NULL, &blknoDst);
+ XLogRecGetBlockTag(record, 1, &rnode, NULL, &blknoDst);
+ ptrack_add_block_redo(rnode, blknoDst);
+ XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
+ XLogRecGetBlockTag(record, 2, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
fillFakeState(&state, xldata->stateSrc);
@@ -326,6 +353,21 @@ spgRedoAddNode(XLogReaderState *record)
Buffer buffer;
Page page;
XLogRedoAction action;
+ RelFileNode rnode;
+ BlockNumber blkno;
+
+ XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
+ if(XLogRecHasBlockRef(record, 1))
+ {
+ XLogRecGetBlockTag(record, 1, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
+ }
+ if (xldata->parentBlk == 2)
+ {
+ XLogRecGetBlockTag(record, 2, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
+ }
ptr += sizeof(spgxlogAddNode);
innerTuple = ptr;
@@ -494,6 +536,13 @@ spgRedoSplitTuple(XLogReaderState *record)
Buffer buffer;
Page page;
XLogRedoAction action;
+ RelFileNode rnode;
+ BlockNumber blkno;
+
+ XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
+ XLogRecGetBlockTag(record, 1, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
ptr += sizeof(spgxlogSplitTuple);
prefixTuple = ptr;
@@ -580,8 +629,23 @@ spgRedoPickSplit(XLogReaderState *record)
int i;
BlockNumber blknoInner;
XLogRedoAction action;
-
- XLogRecGetBlockTag(record, 2, NULL, NULL, &blknoInner);
+ RelFileNode rnode;
+ BlockNumber blkno;
+
+ XLogRecGetBlockTag(record, 2, &rnode, NULL, &blknoInner);
+ ptrack_add_block_redo(rnode, blknoInner);
+ XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
+ if (XLogRecHasBlockRef(record, 1))
+ {
+ XLogRecGetBlockTag(record, 1, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
+ }
+ if (XLogRecHasBlockRef(record, 3))
+ {
+ XLogRecGetBlockTag(record, 3, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
+ }
fillFakeState(&state, xldata->stateSrc);
@@ -797,6 +861,11 @@ spgRedoVacuumLeaf(XLogReaderState *record)
Buffer buffer;
Page page;
int i;
+ RelFileNode rnode;
+ BlockNumber blkno;
+
+ XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
fillFakeState(&state, xldata->stateSrc);
@@ -873,6 +942,11 @@ spgRedoVacuumRoot(XLogReaderState *record)
OffsetNumber *toDelete;
Buffer buffer;
Page page;
+ RelFileNode rnode;
+ BlockNumber blkno;
+
+ XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
toDelete = xldata->offsets;
@@ -898,6 +972,11 @@ spgRedoVacuumRedirect(XLogReaderState *record)
spgxlogVacuumRedirect *xldata = (spgxlogVacuumRedirect *) ptr;
OffsetNumber *itemToPlaceholder;
Buffer buffer;
+ RelFileNode rnode;
+ BlockNumber blkno;
+
+ XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
itemToPlaceholder = xldata->offsets;
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index a63a885..c2b403d 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -36,6 +36,7 @@
#include "access/xloginsert.h"
#include "access/xlogreader.h"
#include "access/xlogutils.h"
+#include "access/ptrack.h"
#include "catalog/catversion.h"
#include "catalog/pg_control.h"
#include "catalog/pg_database.h"
@@ -9815,6 +9816,11 @@ xlog_redo(XLogReaderState *record)
else if (info == XLOG_FPI || info == XLOG_FPI_FOR_HINT)
{
Buffer buffer;
+ RelFileNode rnode;
+ BlockNumber blkno;
+
+ XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
/*
* Full-page image (FPI) records contain nothing else but a backup
diff --git a/src/backend/access/transam/xloginsert.c b/src/backend/access/transam/xloginsert.c
index 3af03ec..38fcaac 100644
--- a/src/backend/access/transam/xloginsert.c
+++ b/src/backend/access/transam/xloginsert.c
@@ -23,6 +23,7 @@
#include "access/xlog.h"
#include "access/xlog_internal.h"
#include "access/xloginsert.h"
+#include "access/ptrack.h"
#include "catalog/pg_control.h"
#include "common/pg_lzcompress.h"
#include "miscadmin.h"
diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c
index bbae733..0b07d6d 100644
--- a/src/backend/access/transam/xlogutils.c
+++ b/src/backend/access/transam/xlogutils.c
@@ -553,8 +553,6 @@ CreateFakeRelcacheEntry(RelFileNode rnode)
FakeRelCacheEntry fakeentry;
Relation rel;
- Assert(InRecovery);
-
/* Allocate the Relation struct and all related space in one block. */
fakeentry = palloc0(sizeof(FakeRelCacheEntryData));
rel = (Relation) fakeentry;
diff --git a/src/backend/catalog/storage.c b/src/backend/catalog/storage.c
index 9a5fde0..a498c8b 100644
--- a/src/backend/catalog/storage.c
+++ b/src/backend/catalog/storage.c
@@ -238,6 +238,7 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
rel->rd_smgr->smgr_targblock = InvalidBlockNumber;
rel->rd_smgr->smgr_fsm_nblocks = InvalidBlockNumber;
rel->rd_smgr->smgr_vm_nblocks = InvalidBlockNumber;
+ rel->rd_smgr->smgr_ptrack_nblocks = InvalidBlockNumber;
/* Truncate the FSM first if it exists */
fsm = smgrexists(rel->rd_smgr, FSM_FORKNUM);
diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c
index e138539..0becc0d 100644
--- a/src/backend/commands/dbcommands.c
+++ b/src/backend/commands/dbcommands.c
@@ -29,6 +29,7 @@
#include "access/xact.h"
#include "access/xloginsert.h"
#include "access/xlogutils.h"
+#include "access/ptrack.h"
#include "catalog/catalog.h"
#include "catalog/dependency.h"
#include "catalog/indexing.h"
@@ -626,6 +627,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
* We don't need to copy subdirectories
*/
copydir(srcpath, dstpath, false);
+ create_ptrack_init_file(dstpath);
/* Record the filesystem change in XLOG */
{
@@ -1255,6 +1257,7 @@ movedb(const char *dbname, const char *tblspcname)
* Copy files from the old tablespace to the new one
*/
copydir(src_dbpath, dst_dbpath, false);
+ create_ptrack_init_file(dst_dbpath);
/*
* Record the filesystem change in XLOG
@@ -2117,6 +2120,7 @@ dbase_redo(XLogReaderState *record)
* We don't need to copy subdirectories
*/
copydir(src_path, dst_path, false);
+ create_ptrack_init_file(dst_path);
}
else if (info == XLOG_DBASE_DROP)
{
diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c
index 5c2ce78..72b9bf5 100644
--- a/src/backend/commands/sequence.c
+++ b/src/backend/commands/sequence.c
@@ -22,6 +22,7 @@
#include "access/xlog.h"
#include "access/xloginsert.h"
#include "access/xlogutils.h"
+#include "access/ptrack.h"
#include "catalog/dependency.h"
#include "catalog/indexing.h"
#include "catalog/namespace.h"
@@ -374,6 +375,7 @@ fill_seq_with_data(Relation rel, HeapTuple tuple)
if (RelationNeedsWAL(rel))
GetTopTransactionId();
+ ptrack_add_block(rel, BufferGetBlockNumber(buf));
START_CRIT_SECTION();
MarkBufferDirty(buf);
@@ -759,6 +761,7 @@ nextval_internal(Oid relid, bool check_permissions)
GetTopTransactionId();
/* ready to change the on-disk (or really, in-buffer) tuple */
+ ptrack_add_block(seqrel, BufferGetBlockNumber(buf));
START_CRIT_SECTION();
/*
@@ -970,6 +973,7 @@ do_setval(Oid relid, int64 next, bool iscalled)
GetTopTransactionId();
/* ready to change the on-disk (or really, in-buffer) tuple */
+ ptrack_add_block(seqrel, BufferGetBlockNumber(buf));
START_CRIT_SECTION();
seq->last_value = next; /* last fetched number */
@@ -1882,6 +1886,11 @@ seq_redo(XLogReaderState *record)
Size itemsz;
xl_seq_rec *xlrec = (xl_seq_rec *) XLogRecGetData(record);
sequence_magic *sm;
+ RelFileNode rnode;
+ BlockNumber blkno;
+
+ XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
+ ptrack_add_block_redo(rnode, blkno);
if (info != XLOG_SEQ_LOG)
elog(PANIC, "seq_redo: unknown op code %u", info);
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 33c99b3..a07d290 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -23,6 +23,7 @@
#include "access/tupconvert.h"
#include "access/xact.h"
#include "access/xlog.h"
+#include "access/ptrack.h"
#include "catalog/catalog.h"
#include "catalog/dependency.h"
#include "catalog/heap.h"
@@ -10571,6 +10572,13 @@ ATExecSetTableSpace(Oid tableOid, Oid newTableSpace, LOCKMODE lockmode)
/* copy those extra forks that exist */
for (forkNum = MAIN_FORKNUM + 1; forkNum <= MAX_FORKNUM; forkNum++)
{
+ /*
+ * Do not copy ptrack fork, because it will be created
+ * for new relation while copying data.
+ */
+ if (forkNum == PAGESTRACK_FORKNUM)
+ continue;
+
if (smgrexists(rel->rd_smgr, forkNum))
{
smgrcreate(dstrel, forkNum, false);
@@ -10854,7 +10862,18 @@ copy_relation_data(SMgrRelation src, SMgrRelation dst,
* space.
*/
if (use_wal)
+ {
+ /*
+ * Don't forget to set ptrack bit even if we're skipping bufmgr
+ * stage. The reason to use ptrack_add_block_redo() instead of the
+ * regular ptrack_add_block() function is that we don't have
+ * a Relation structure here.
+ */
+ if (forkNum == MAIN_FORKNUM &&
+ relpersistence == RELPERSISTENCE_PERMANENT)
+ ptrack_add_block_redo(dst->smgr_rnode.node, blkno);
log_newpage(&dst->smgr_rnode.node, forkNum, blkno, page, false);
+ }
PageSetChecksumInplace(page, blkno);
diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c
index 30b1c08..559559a 100644
--- a/src/backend/commands/vacuumlazy.c
+++ b/src/backend/commands/vacuumlazy.c
@@ -44,6 +44,7 @@
#include "access/transam.h"
#include "access/visibilitymap.h"
#include "access/xlog.h"
+#include "access/ptrack.h"
#include "catalog/catalog.h"
#include "catalog/storage.h"
#include "commands/dbcommands.h"
@@ -861,6 +862,7 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats,
empty_pages++;
}
freespace = PageGetHeapFreeSpace(page);
+ ptrack_add_block(onerel, BufferGetBlockNumber(buf));
MarkBufferDirty(buf);
UnlockReleaseBuffer(buf);
@@ -876,6 +878,7 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats,
/* empty pages are always all-visible and all-frozen */
if (!PageIsAllVisible(page))
{
+ ptrack_add_block(onerel, BufferGetBlockNumber(buf));
START_CRIT_SECTION();
/* mark buffer dirty before writing a WAL record */
@@ -1101,6 +1104,7 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats,
*/
if (nfrozen > 0)
{
+ ptrack_add_block(onerel, BufferGetBlockNumber(buf));
START_CRIT_SECTION();
MarkBufferDirty(buf);
@@ -1174,6 +1178,7 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats,
* rare cases after a crash, it is not worth optimizing.
*/
PageSetAllVisible(page);
+ ptrack_add_block(onerel, BufferGetBlockNumber(buf));
MarkBufferDirty(buf);
visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr,
vmbuffer, visibility_cutoff_xid, flags);
@@ -1213,6 +1218,7 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats,
elog(WARNING, "page containing dead tuples is marked as all-visible in relation \"%s\" page %u",
relname, blkno);
PageClearAllVisible(page);
+ ptrack_add_block(onerel, BufferGetBlockNumber(buf));
MarkBufferDirty(buf);
visibilitymap_clear(onerel, blkno, vmbuffer,
VISIBILITYMAP_VALID_BITS);
@@ -1451,6 +1457,7 @@ lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, blkno);
+ ptrack_add_block(onerel, BufferGetBlockNumber(buffer));
START_CRIT_SECTION();
for (; tupindex < vacrelstats->num_dead_tuples; tupindex++)
diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c
index 0ca095c..f3feebe 100644
--- a/src/backend/storage/smgr/smgr.c
+++ b/src/backend/storage/smgr/smgr.c
@@ -170,6 +170,7 @@ smgropen(RelFileNode rnode, BackendId backend)
reln->smgr_targblock = InvalidBlockNumber;
reln->smgr_fsm_nblocks = InvalidBlockNumber;
reln->smgr_vm_nblocks = InvalidBlockNumber;
+ reln->smgr_ptrack_nblocks = InvalidBlockNumber;
reln->smgr_which = 0; /* we only have md.c at present */
/* mark it not open */
diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c
index eb6960d..d643828 100644
--- a/src/backend/utils/init/postinit.c
+++ b/src/backend/utils/init/postinit.c
@@ -24,6 +24,7 @@
#include "access/sysattr.h"
#include "access/xact.h"
#include "access/xlog.h"
+#include "access/ptrack.h"
#include "catalog/catalog.h"
#include "catalog/indexing.h"
#include "catalog/namespace.h"
@@ -565,6 +566,8 @@ InitPostgres(const char *in_dbname, Oid dboid, const char *username,
elog(DEBUG3, "InitPostgres");
+ assign_ptrack_enable(ptrack_enable, NULL);
+
/*
* Add my PGPROC struct to the ProcArray.
*
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 58a4cf9..d505718 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -33,6 +33,7 @@
#include "access/twophase.h"
#include "access/xact.h"
#include "access/xlog_internal.h"
+#include "access/ptrack.h"
#include "catalog/namespace.h"
#include "catalog/pg_authid.h"
#include "commands/async.h"
@@ -1034,6 +1035,16 @@ static struct config_bool ConfigureNamesBool[] =
},
{
+ {"ptrack_enable", PGC_SIGHUP, WAL_SETTINGS,
+ gettext_noop("Enable page tracking."),
+ NULL
+ },
+ &ptrack_enable,
+ false,
+ NULL, &assign_ptrack_enable, NULL
+ },
+
+ {
{"wal_compression", PGC_SUSET, WAL_SETTINGS,
gettext_noop("Compresses full-page writes written in WAL file."),
NULL
diff --git a/src/common/relpath.c b/src/common/relpath.c
index c2f3662..4af0439 100644
--- a/src/common/relpath.c
+++ b/src/common/relpath.c
@@ -35,7 +35,8 @@ const char *const forkNames[] = {
"main", /* MAIN_FORKNUM */
"fsm", /* FSM_FORKNUM */
"vm", /* VISIBILITYMAP_FORKNUM */
- "init" /* INIT_FORKNUM */
+ "init", /* INIT_FORKNUM */
+ "ptrack" /* PAGESTRACK_FORKNUM */
};
/*
diff --git a/src/include/access/ptrack.h b/src/include/access/ptrack.h
new file mode 100644
index 0000000..389a33c
--- /dev/null
+++ b/src/include/access/ptrack.h
@@ -0,0 +1,30 @@
+#ifndef PTRACK_H
+#define PTRACK_H
+
+#include "access/xlogdefs.h"
+#include "storage/block.h"
+#include "storage/buf.h"
+#include "storage/relfilenode.h"
+#include "utils/relcache.h"
+
+/* Ptrack version as a string */
+#define PTRACK_VERSION "1.4"
+/* Ptrack version as a number */
+#define PTRACK_VERSION_NUM 104
+
+/* Number of bits allocated for each heap block. */
+#define PTRACK_BITS_PER_HEAPBLOCK 1
+
+#define PTRACK_INIT_FILE "ptrack_init"
+
+extern PGDLLIMPORT bool ptrack_enable;
+
+extern void ptrack_add_block(Relation rel, BlockNumber heapBlk);
+extern void ptrack_add_block_redo(RelFileNode rnode, BlockNumber heapBlk);
+extern void create_ptrack_init_file(char *dest_dir);
+
+extern void ptrack_clear(void);
+extern bytea *ptrack_get_and_clear(Oid tablespace_oid, Oid table_oid);
+extern void assign_ptrack_enable(bool newval, void *extra);
+
+#endif /* PTRACK_H */
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index 8b33b4e..11d9d7e 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -5475,6 +5475,18 @@ DESCR("list files in the log directory");
DATA(insert OID = 3354 ( pg_ls_waldir PGNSP PGUID 12 10 20 0 0 f f f f t t v s 0 0 2249 "" "{25,20,1184}" "{o,o,o}" "{name,size,modification}" _null_ _null_ pg_ls_waldir _null_ _null_ _null_ ));
DESCR("list of files in the WAL directory");
+/* ptrack related functions*/
+DATA(insert OID = 6016 ( pg_ptrack_clear PGNSP PGUID 12 1 0 0 0 f f f f t f v u 0 0 2278 "" _null_ _null_ _null_ _null_ _null_ pg_ptrack_clear _null_ _null_ _null_ ));
+DESCR("clear ptrack fork files");
+DATA(insert OID = 6018 ( pg_ptrack_get_and_clear PGNSP PGUID 12 1 0 0 0 f f f f t f v s 2 0 17 "26 26" _null_ _null_ _null_ _null_ _null_ pg_ptrack_get_and_clear _null_ _null_ _null_ ));
+DESCR("get ptrack file as bytea and clear it");
+DATA(insert OID = 6022 ( pg_ptrack_get_and_clear_db PGNSP PGUID 12 1 0 0 0 f f f f t f v s 2 0 16 "26 26" _null_ _null_ _null_ _null_ _null_ pg_ptrack_get_and_clear_db _null_ _null_ _null_ ));
+DESCR("check if ptrack_init_file exists in the given database");
+DATA(insert OID = 6021 ( ptrack_version PGNSP PGUID 12 1 0 0 0 f f f f t f s s 0 0 25 "" _null_ _null_ _null_ _null_ _null_ ptrack_version _null_ _null_ _null_ ));
+DESCR("Ptrack version string");
+DATA(insert OID = 6023 ( pg_ptrack_control_lsn PGNSP PGUID 12 1 0 0 0 f f f f t f s s 0 0 3220 "" _null_ _null_ _null_ _null_ _null_ pg_ptrack_control_lsn _null_ _null_ _null_ ));
+DESCR("read LSN from ptrack_control file");
+
/*
* Symbolic values for provolatile column: these indicate whether the result
* of a function is dependent *only* on the values of its explicit arguments,
diff --git a/src/include/common/relpath.h b/src/include/common/relpath.h
index ec5ef99..919e02e 100644
--- a/src/include/common/relpath.h
+++ b/src/include/common/relpath.h
@@ -27,7 +27,8 @@ typedef enum ForkNumber
MAIN_FORKNUM = 0,
FSM_FORKNUM,
VISIBILITYMAP_FORKNUM,
- INIT_FORKNUM
+ INIT_FORKNUM,
+ PAGESTRACK_FORKNUM
/*
* NOTE: if you add a new fork, change MAX_FORKNUM and possibly
@@ -36,9 +37,9 @@ typedef enum ForkNumber
*/
} ForkNumber;
-#define MAX_FORKNUM INIT_FORKNUM
+#define MAX_FORKNUM PAGESTRACK_FORKNUM
-#define FORKNAMECHARS 4 /* max chars for a fork name */
+#define FORKNAMECHARS 5 /* max chars for a fork name */
extern const char *const forkNames[];
diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h
index 2279134..754edf0 100644
--- a/src/include/storage/smgr.h
+++ b/src/include/storage/smgr.h
@@ -46,15 +46,16 @@ typedef struct SMgrRelationData
struct SMgrRelationData **smgr_owner;
/*
- * These next three fields are not actually used or manipulated by smgr,
+ * These next four fields are not actually used or manipulated by smgr,
* except that they are reset to InvalidBlockNumber upon a cache flush
* event (in particular, upon truncation of the relation). Higher levels
* store cached state here so that it will be reset when truncation
- * happens. In all three cases, InvalidBlockNumber means "unknown".
+ * happens. In all four cases, InvalidBlockNumber means "unknown".
*/
BlockNumber smgr_targblock; /* current insertion target block */
BlockNumber smgr_fsm_nblocks; /* last known size of fsm fork */
BlockNumber smgr_vm_nblocks; /* last known size of vm fork */
+ BlockNumber smgr_ptrack_nblocks; /* last known size of ptrack fork */
/* additional public fields may someday exist here */
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment