Created
June 22, 2020 11:11
-
-
Save danielgustafsson/4d0d6cefe5acc420adcd3d79c09d2123 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml | |
index 5a66115df1..854eb283cd 100644 | |
--- a/doc/src/sgml/catalogs.sgml | |
+++ b/doc/src/sgml/catalogs.sgml | |
@@ -2157,6 +2157,17 @@ SCRAM-SHA-256$<replaceable><iteration count></replaceable>:<replaceable>&l | |
</para></entry> | |
</row> | |
+ <row> | |
+ <entry role="catalog_table_entry"><para role="column_definition"> | |
+ <structfield>relhaschecksums</structfield> <type>bool</type> | |
+ </para> | |
+ <para> | |
+ True if relation has data checksums on all pages. This state is only | |
+ used during checksum processing; this field should never be consulted | |
+ for cluster checksum status. | |
+ </para></entry> | |
+ </row> | |
+ | |
<row> | |
<entry role="catalog_table_entry"><para role="column_definition"> | |
<structfield>relrewrite</structfield> <type>oid</type> | |
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml | |
index b7c450ea29..7dc8d9c21d 100644 | |
--- a/doc/src/sgml/func.sgml | |
+++ b/doc/src/sgml/func.sgml | |
@@ -25077,6 +25077,71 @@ postgres=# SELECT * FROM pg_walfile_name_offset(pg_stop_backup()); | |
</sect2> | |
+ <sect2 id="functions-admin-checksum"> | |
+ <title>Data Checksum Functions</title> | |
+ | |
+ <para> | |
+ The functions shown in <xref linkend="functions-checksums-table" /> can | |
+ be used to enable or disable data checksums in a running cluster. | |
+ See <xref linkend="checksums" /> for details. | |
+ </para> | |
+ | |
+ <table id="functions-checksums-table"> | |
+ <title>Checksum <acronym>SQL</acronym> Functions</title> | |
+ <tgroup cols="3"> | |
+ <thead> | |
+ <row> | |
+ <entry>Function</entry> | |
+ <entry>Return Type</entry> | |
+ <entry>Description</entry> | |
+ </row> | |
+ </thead> | |
+ <tbody> | |
+ <row> | |
+ <entry> | |
+ <indexterm> | |
+ <primary>pg_enable_data_checksums</primary> | |
+ </indexterm> | |
+ <literal><function>pg_enable_data_checksums(<optional><parameter>cost_delay</parameter> <type>int</type>, <parameter>cost_limit</parameter> <type>int</type></optional>)</function></literal> | |
+ </entry> | |
+ <entry> | |
+ void | |
+ </entry> | |
+ <entry> | |
+ <para> | |
+ Initiates data checksums for the cluster. This will switch the data checksums mode | |
+ to <literal>inprogress</literal> as well as start a background worker that will process | |
+ all data in the database and enable checksums for it. When all data pages have had | |
+ checksums enabled, the cluster will automatically switch data checksums mode to | |
+ <literal>on</literal>. | |
+ </para> | |
+ <para> | |
+ If <parameter>cost_delay</parameter> and <parameter>cost_limit</parameter> are | |
+ specified, the speed of the process is throttled using the same principles as | |
+ <link linkend="runtime-config-resource-vacuum-cost">Cost-based Vacuum Delay</link>. | |
+ </para> | |
+ </entry> | |
+ </row> | |
+ <row> | |
+ <entry> | |
+ <indexterm> | |
+ <primary>pg_disable_data_checksums</primary> | |
+ </indexterm> | |
+ <literal><function>pg_disable_data_checksums()</function></literal> | |
+ </entry> | |
+ <entry> | |
+ void | |
+ </entry> | |
+ <entry> | |
+ Disables data checksums for the cluster. | |
+ </entry> | |
+ </row> | |
+ </tbody> | |
+ </tgroup> | |
+ </table> | |
+ | |
+ </sect2> | |
+ | |
<sect2 id="functions-admin-dbobject"> | |
<title>Database Object Management Functions</title> | |
diff --git a/doc/src/sgml/ref/initdb.sgml b/doc/src/sgml/ref/initdb.sgml | |
index 1635fcb1fd..365e4acb69 100644 | |
--- a/doc/src/sgml/ref/initdb.sgml | |
+++ b/doc/src/sgml/ref/initdb.sgml | |
@@ -219,6 +219,7 @@ PostgreSQL documentation | |
failures will be reported in the | |
<link linkend="monitoring-pg-stat-database-view"> | |
<structname>pg_stat_database</structname></link> view. | |
+ See <xref linkend="checksums" /> for details. | |
</para> | |
</listitem> | |
</varlistentry> | |
diff --git a/doc/src/sgml/wal.sgml b/doc/src/sgml/wal.sgml | |
index bd9fae544c..fdc0fc2080 100644 | |
--- a/doc/src/sgml/wal.sgml | |
+++ b/doc/src/sgml/wal.sgml | |
@@ -230,6 +230,103 @@ | |
</para> | |
</sect1> | |
+ <sect1 id="checksums"> | |
+ <title>Data Checksums</title> | |
+ <indexterm> | |
+ <primary>checksums</primary> | |
+ </indexterm> | |
+ | |
+ <para> | |
+ Data pages are not checksum protected by default, but this can optionally be | |
+ enabled for a cluster. When enabled, each data page will be assigned a | |
+ checksum that is updated when the page is written and verified every time | |
+ the page is read. Only data pages are protected by checksums, internal data | |
+ structures and temporary files are not. | |
+ </para> | |
+ | |
+ <para> | |
+ Checksums are normally enabled when the cluster is initialized using <link | |
+ linkend="app-initdb-data-checksums"><application>initdb</application></link>. | |
+ They can also be enabled or disabled at a later timne, either as an offline | |
+ operation or in a running cluster. In all cases, checksums are enabled or | |
+ disabled at the full cluster level, and cannot be specified individually for | |
+ databases or tables. | |
+ </para> | |
+ | |
+ <para> | |
+ The current state of checksums in the cluster can be verified by viewing the | |
+ value of the read-only configuration variable <xref | |
+ linkend="guc-data-checksums" /> by issuing the command <command>SHOW | |
+ data_checksums</command>. | |
+ </para> | |
+ | |
+ <para> | |
+ When attempting to recover from corrupt data it may be necessary to bypass | |
+ the checksum protection in order to recover data. To do this, temporarily | |
+ set the configuration parameter <xref linkend="guc-ignore-checksum-failure" />. | |
+ </para> | |
+ | |
+ <sect2 id="checksums-enable-disable"> | |
+ <title>On-line Enabling of Checksums</title> | |
+ | |
+ <para> | |
+ Checksums can be enabled or disabled online, by calling the appropriate | |
+ <link linkend="functions-admin-checksum">functions</link>. | |
+ Disabling of checksums takes effect immediately when the function is called. | |
+ </para> | |
+ | |
+ <para> | |
+ Enabling checksums will put the cluster checksum mode in | |
+ <literal>inprogress</literal> mode. During this time, checksums will be | |
+ written but not verified. In addition to this, a background worker process | |
+ is started that enables checksums on all existing data in the cluster. Once | |
+ this worker has completed processing all databases in the cluster, the | |
+ checksum mode will automatically switch to <literal>on</literal>. The | |
+ processing will consume a background worker process, make sure that | |
+ <varname>max_worker_processes</varname> allows for at least one more | |
+ additional process. | |
+ </para> | |
+ | |
+ <para> | |
+ The process will initially wait for all open transactions to finish before | |
+ it starts, so that it can be certain that there are no tables that have been | |
+ created inside a transaction that has not committed yet and thus would not | |
+ be visible to the process enabling checksums. It will also, for each database, | |
+ wait for all pre-existing temporary tables to get removed before it finishes. | |
+ If long-lived temporary tables are used in the application it may be necessary | |
+ to terminate these application connections to allow the process to complete. | |
+ </para> | |
+ | |
+ <para> | |
+ If the cluster is stopped while in <literal>inprogress</literal> mode, for | |
+ any reason, then this process must be restarted manually. To do this, | |
+ re-execute the function <function>pg_enable_data_checksums()</function> | |
+ once the cluster has been restarted. The background worker will attempt | |
+ to resume the work from where it was interrupted. | |
+ </para> | |
+ | |
+ <note> | |
+ <para> | |
+ Enabling checksums can cause significant I/O to the system, as most of the | |
+ database pages will need to be rewritten, and will be written both to the | |
+ data files and the WAL. | |
+ </para> | |
+ </note> | |
+ | |
+ </sect2> | |
+ | |
+ <sect2 id="checksums-enable-disable"> | |
+ <title>Off-line Enabling of Checksums</title> | |
+ | |
+ <para> | |
+ The <link linkend="pg_checksums"><application>pg_checksums</application></link> | |
+ application can be used to enable or disable data checksums, as well as | |
+ verify checksums, on an offline cluster. | |
+ </para> | |
+ | |
+ </sect2> | |
+ </sect1> | |
+ | |
<sect1 id="wal-intro"> | |
<title>Write-Ahead Logging (<acronym>WAL</acronym>)</title> | |
diff --git a/src/backend/access/rmgrdesc/xlogdesc.c b/src/backend/access/rmgrdesc/xlogdesc.c | |
index 1cd97852e8..f5b75a843d 100644 | |
--- a/src/backend/access/rmgrdesc/xlogdesc.c | |
+++ b/src/backend/access/rmgrdesc/xlogdesc.c | |
@@ -18,6 +18,7 @@ | |
#include "access/xlog.h" | |
#include "access/xlog_internal.h" | |
#include "catalog/pg_control.h" | |
+#include "storage/bufpage.h" | |
#include "utils/guc.h" | |
#include "utils/timestamp.h" | |
@@ -140,6 +141,18 @@ xlog_desc(StringInfo buf, XLogReaderState *record) | |
xlrec.ThisTimeLineID, xlrec.PrevTimeLineID, | |
timestamptz_to_str(xlrec.end_time)); | |
} | |
+ else if (info == XLOG_CHECKSUMS) | |
+ { | |
+ xl_checksum_state xlrec; | |
+ | |
+ memcpy(&xlrec, rec, sizeof(xl_checksum_state)); | |
+ if (xlrec.new_checksumtype == PG_DATA_CHECKSUM_VERSION) | |
+ appendStringInfo(buf, "on"); | |
+ else if (xlrec.new_checksumtype == PG_DATA_CHECKSUM_INPROGRESS_ON_VERSION) | |
+ appendStringInfo(buf, "inprogress"); | |
+ else | |
+ appendStringInfo(buf, "off"); | |
+ } | |
} | |
const char * | |
@@ -185,6 +198,9 @@ xlog_identify(uint8 info) | |
case XLOG_FPI_FOR_HINT: | |
id = "FPI_FOR_HINT"; | |
break; | |
+ case XLOG_CHECKSUMS: | |
+ id = "CHECKSUMS"; | |
+ break; | |
} | |
return id; | |
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c | |
index a1256a103b..8ca8aa4f4a 100644 | |
--- a/src/backend/access/transam/xlog.c | |
+++ b/src/backend/access/transam/xlog.c | |
@@ -38,6 +38,7 @@ | |
#include "access/xlogreader.h" | |
#include "access/xlogutils.h" | |
#include "catalog/catversion.h" | |
+#include "catalog/pg_class.h" | |
#include "catalog/pg_control.h" | |
#include "catalog/pg_database.h" | |
#include "commands/progress.h" | |
@@ -251,6 +252,11 @@ static bool LocalPromoteIsTriggered = false; | |
*/ | |
static int LocalXLogInsertAllowed = -1; | |
+/* | |
+ * Local state for Controlfile data_checksum_version | |
+ */ | |
+static uint32 LocalDataChecksumVersion = 0; | |
+ | |
/* | |
* When ArchiveRecoveryRequested is set, archive recovery was requested, | |
* ie. signal files were present. When InArchiveRecovery is set, we are | |
@@ -892,6 +898,7 @@ static void SetLatestXTime(TimestampTz xtime); | |
static void SetCurrentChunkStartTime(TimestampTz xtime); | |
static void CheckRequiredParameterValues(void); | |
static void XLogReportParameters(void); | |
+static void XlogChecksums(ChecksumType new_type); | |
static void checkTimeLineSwitch(XLogRecPtr lsn, TimeLineID newTLI, | |
TimeLineID prevTLI); | |
static void LocalSetXLogInsertAllowed(void); | |
@@ -1077,7 +1084,7 @@ XLogInsertRecord(XLogRecData *rdata, | |
Assert(RedoRecPtr < Insert->RedoRecPtr); | |
RedoRecPtr = Insert->RedoRecPtr; | |
} | |
- doPageWrites = (Insert->fullPageWrites || Insert->forcePageWrites); | |
+ doPageWrites = (Insert->fullPageWrites || Insert->forcePageWrites || DataChecksumsOnInProgress()); | |
if (doPageWrites && | |
(!prevDoPageWrites || | |
@@ -4888,9 +4895,7 @@ ReadControlFile(void) | |
CalculateCheckpointSegments(); | |
- /* Make the initdb settings visible as GUC variables, too */ | |
- SetConfigOption("data_checksums", DataChecksumsEnabled() ? "yes" : "no", | |
- PGC_INTERNAL, PGC_S_OVERRIDE); | |
+ LocalDataChecksumVersion = ControlFile->data_checksum_version; | |
} | |
/* | |
@@ -4927,10 +4932,116 @@ GetMockAuthenticationNonce(void) | |
* Are checksums enabled for data pages? | |
*/ | |
bool | |
-DataChecksumsEnabled(void) | |
+DataChecksumsNeedWrite(void) | |
+{ | |
+ return (LocalDataChecksumVersion == PG_DATA_CHECKSUM_VERSION || | |
+ LocalDataChecksumVersion == PG_DATA_CHECKSUM_INPROGRESS_ON_VERSION); | |
+} | |
+ | |
+bool | |
+DataChecksumsNeedVerify(void) | |
+{ | |
+ /* | |
+ * Only verify checksums if they are fully enabled in the cluster. In | |
+ * inprogress state they are only updated, not verified. | |
+ */ | |
+ return (LocalDataChecksumVersion == PG_DATA_CHECKSUM_VERSION); | |
+} | |
+ | |
+bool | |
+DataChecksumsOnInProgress(void) | |
+{ | |
+ return (LocalDataChecksumVersion == PG_DATA_CHECKSUM_INPROGRESS_ON_VERSION); | |
+} | |
+ | |
+void | |
+SetDataChecksumsOnInProgress(void) | |
+{ | |
+ Assert(ControlFile != NULL); | |
+ | |
+ if (LocalDataChecksumVersion > 0) | |
+ return; | |
+ | |
+ LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); | |
+ ControlFile->data_checksum_version = PG_DATA_CHECKSUM_INPROGRESS_ON_VERSION; | |
+ UpdateControlFile(); | |
+ LWLockRelease(ControlFileLock); | |
+ WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_CHECKSUM_INPROGRESS_ON)); | |
+ | |
+ XlogChecksums(PG_DATA_CHECKSUM_INPROGRESS_ON_VERSION); | |
+} | |
+ | |
+void | |
+AbsorbChecksumsOnInProgressBarrier(void) | |
+{ | |
+ Assert(LocalDataChecksumVersion == 0 || LocalDataChecksumVersion == PG_DATA_CHECKSUM_INPROGRESS_ON_VERSION); | |
+ LocalDataChecksumVersion = PG_DATA_CHECKSUM_INPROGRESS_ON_VERSION; | |
+} | |
+ | |
+void | |
+SetDataChecksumsOn(void) | |
{ | |
Assert(ControlFile != NULL); | |
- return (ControlFile->data_checksum_version > 0); | |
+ | |
+ LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); | |
+ | |
+ if (ControlFile->data_checksum_version != PG_DATA_CHECKSUM_INPROGRESS_ON_VERSION) | |
+ { | |
+ LWLockRelease(ControlFileLock); | |
+ elog(ERROR, "checksums not in inprogress mode"); | |
+ } | |
+ | |
+ ControlFile->data_checksum_version = PG_DATA_CHECKSUM_VERSION; | |
+ UpdateControlFile(); | |
+ LWLockRelease(ControlFileLock); | |
+ WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_CHECKSUM_ON)); | |
+ | |
+ XlogChecksums(PG_DATA_CHECKSUM_VERSION); | |
+} | |
+ | |
+void | |
+AbsorbChecksumsOnBarrier(void) | |
+{ | |
+ Assert(LocalDataChecksumVersion == PG_DATA_CHECKSUM_INPROGRESS_ON_VERSION); | |
+ LocalDataChecksumVersion = PG_DATA_CHECKSUM_VERSION; | |
+} | |
+ | |
+void | |
+SetDataChecksumsOff(void) | |
+{ | |
+ LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); | |
+ | |
+ ControlFile->data_checksum_version = 0; | |
+ XlogChecksums(0); | |
+ UpdateControlFile(); | |
+ LWLockRelease(ControlFileLock); | |
+ WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_CHECKSUM_OFF)); | |
+} | |
+ | |
+void | |
+AbsorbChecksumsOffBarrier(void) | |
+{ | |
+ LocalDataChecksumVersion = 0; | |
+} | |
+ | |
+void | |
+InitLocalControldata(void) | |
+{ | |
+ LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); | |
+ LocalDataChecksumVersion = ControlFile->data_checksum_version; | |
+ LWLockRelease(ControlFileLock); | |
+} | |
+ | |
+/* guc hook */ | |
+const char * | |
+show_data_checksums(void) | |
+{ | |
+ if (LocalDataChecksumVersion == PG_DATA_CHECKSUM_VERSION) | |
+ return "on"; | |
+ else if (LocalDataChecksumVersion == PG_DATA_CHECKSUM_INPROGRESS_ON_VERSION) | |
+ return "inprogress"; | |
+ else | |
+ return "off"; | |
} | |
/* | |
@@ -7916,6 +8027,18 @@ StartupXLOG(void) | |
*/ | |
CompleteCommitTsInitialization(); | |
+ /* | |
+ * If we reach this point with checksums in an inprogress state (either | |
+ * being enabled or being disabled), we notify the user that they need to | |
+ * manually restart the process to enable checksums. This is because we | |
+ * cannot launch a dynamic background worker directly from here, it has to | |
+ * be launched from a regular backend. | |
+ */ | |
+ if (ControlFile->data_checksum_version == PG_DATA_CHECKSUM_INPROGRESS_ON_VERSION) | |
+ ereport(WARNING, | |
+ (errmsg("data checksums are being enabled, but no worker is running"), | |
+ errhint("Either disable or enable data checksums by calling the pg_disable_data_checksums() or pg_enable_data_checksums() functions."))); | |
+ | |
/* | |
* All done with end-of-recovery actions. | |
* | |
@@ -9759,6 +9882,24 @@ XLogReportParameters(void) | |
} | |
} | |
+/* | |
+ * Log the new state of checksums | |
+ */ | |
+static void | |
+XlogChecksums(ChecksumType new_type) | |
+{ | |
+ xl_checksum_state xlrec; | |
+ XLogRecPtr recptr; | |
+ | |
+ xlrec.new_checksumtype = new_type; | |
+ | |
+ XLogBeginInsert(); | |
+ XLogRegisterData((char *) &xlrec, sizeof(xl_checksum_state)); | |
+ | |
+ recptr = XLogInsert(RM_XLOG_ID, XLOG_CHECKSUMS); | |
+ XLogFlush(recptr); | |
+} | |
+ | |
/* | |
* Update full_page_writes in shared memory, and write an | |
* XLOG_FPW_CHANGE record if necessary. | |
@@ -10214,6 +10355,26 @@ xlog_redo(XLogReaderState *record) | |
/* Keep track of full_page_writes */ | |
lastFullPageWrites = fpw; | |
} | |
+ else if (info == XLOG_CHECKSUMS) | |
+ { | |
+ xl_checksum_state state; | |
+ | |
+ memcpy(&state, XLogRecGetData(record), sizeof(xl_checksum_state)); | |
+ | |
+ LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); | |
+ ControlFile->data_checksum_version = state.new_checksumtype; | |
+ UpdateControlFile(); | |
+ LWLockRelease(ControlFileLock); | |
+ if (state.new_checksumtype == PG_DATA_CHECKSUM_INPROGRESS_ON_VERSION) | |
+ WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_CHECKSUM_INPROGRESS_ON)); | |
+ else if (state.new_checksumtype == PG_DATA_CHECKSUM_VERSION) | |
+ WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_CHECKSUM_ON)); | |
+ else | |
+ { | |
+ Assert(state.new_checksumtype == 0); | |
+ WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_CHECKSUM_OFF)); | |
+ } | |
+ } | |
} | |
#ifdef WAL_DEBUG | |
diff --git a/src/backend/access/transam/xlogfuncs.c b/src/backend/access/transam/xlogfuncs.c | |
index 290658b22c..6c7b674f90 100644 | |
--- a/src/backend/access/transam/xlogfuncs.c | |
+++ b/src/backend/access/transam/xlogfuncs.c | |
@@ -25,6 +25,7 @@ | |
#include "catalog/pg_type.h" | |
#include "funcapi.h" | |
#include "miscadmin.h" | |
+#include "postmaster/datachecksumsworker.h" | |
#include "pgstat.h" | |
#include "replication/walreceiver.h" | |
#include "storage/fd.h" | |
@@ -784,3 +785,101 @@ pg_promote(PG_FUNCTION_ARGS) | |
(errmsg("server did not promote within %d seconds", wait_seconds))); | |
PG_RETURN_BOOL(false); | |
} | |
+ | |
+/* | |
+ * Disables checksums for the cluster, unless already disabled. | |
+ * | |
+ * Has immediate effect - the checksums are set to off right away. | |
+ */ | |
+Datum | |
+disable_data_checksums(PG_FUNCTION_ARGS) | |
+{ | |
+ /* | |
+ * If we don't need to write new checksums, then clearly they are already | |
+ * disabled. TODO: it could be argued that this should be a NOTICE, LOG | |
+ * or perhaps even an error; or maybe nothing at all with a silent return. | |
+ * For now we LOG and return, but this needs to be revisited. | |
+ */ | |
+ if (!DataChecksumsNeedWrite()) | |
+ { | |
+ ereport(LOG, | |
+ (errmsg("data checksums already disabled"))); | |
+ PG_RETURN_VOID(); | |
+ } | |
+ | |
+ /* | |
+ * Shutting down a concurrently running datachecksumworker will not block | |
+ * awaiting shutdown, but we can continue turning off checksums anyway | |
+ * since it will at most finish the block it had already started and then | |
+ * abort. | |
+ */ | |
+ ShutdownDatachecksumsWorkerIfRunning(); | |
+ | |
+ SetDataChecksumsOff(); | |
+ | |
+ PG_RETURN_VOID(); | |
+} | |
+ | |
+/* | |
+ * Enables checksums for the cluster, unless already enabled. | |
+ * | |
+ * Supports vacuum-like cost-based throttling, to limit system load. | |
+ * Starts a background worker that updates checksums on existing data. | |
+ */ | |
+Datum | |
+enable_data_checksums(PG_FUNCTION_ARGS) | |
+{ | |
+ int cost_delay = PG_GETARG_INT32(0); | |
+ int cost_limit = PG_GETARG_INT32(1); | |
+ | |
+ if (cost_delay < 0) | |
+ ereport(ERROR, (errmsg("cost delay cannot be less than zero"))); | |
+ if (cost_limit <= 0) | |
+ ereport(ERROR, (errmsg("cost limit must be a positive value"))); | |
+ | |
+ if (DataChecksumWorkerStarted()) | |
+ { | |
+ ereport(NOTICE, | |
+ (errmsg("data checksum worker already running"), | |
+ errhint("Retry the operation later to allow time for the worker to finish."))); | |
+ PG_RETURN_VOID(); | |
+ } | |
+ | |
+ /* | |
+ * data checksums on -> on is not a valid state transition as there is | |
+ * nothing to do, but it's debatable whether it should be an ERROR, a | |
+ * LOG/NOTICE or just returning VOID silently. Figuring this out is a TODO | |
+ * much like for the inverse case of disabling disabled checksums. | |
+ */ | |
+ if (DataChecksumsNeedVerify()) | |
+ { | |
+ ereport(NOTICE, | |
+ (errmsg("data checksums already enabled"))); | |
+ } | |
+ /* | |
+ * If the state is set to inprogress but the worker isn't running, then | |
+ * the data checksumming was prematurely terminated. Attempt to continue | |
+ * processing data pages where we left off based on state stored in the | |
+ * catalog. | |
+ */ | |
+ else if (DataChecksumsOnInProgress()) | |
+ { | |
+ ereport(LOG, | |
+ (errmsg("data checksums partly enabled, continuing processing"))); | |
+ | |
+ StartDatachecksumsWorkerLauncher(ENABLE_CHECKSUMS, cost_delay, cost_limit); | |
+ } | |
+ /* | |
+ * We are starting a checksumming process scratch, and need to start by | |
+ * clearing the state in pg_class in case checksums have ever been enabled | |
+ * before (either fully or partly). As soon as we set the checksum state | |
+ * to inprogress new relations will set relhaschecksums in pg_class so it | |
+ * must be done first. | |
+ */ | |
+ else | |
+ { | |
+ StartDatachecksumsWorkerLauncher(RESET_STATE, cost_delay, cost_limit); | |
+ } | |
+ | |
+ PG_RETURN_VOID(); | |
+} | |
diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c | |
index 9c45544815..07d70aa9b6 100644 | |
--- a/src/backend/catalog/heap.c | |
+++ b/src/backend/catalog/heap.c | |
@@ -921,6 +921,7 @@ InsertPgClassTuple(Relation pg_class_desc, | |
values[Anum_pg_class_relispopulated - 1] = BoolGetDatum(rd_rel->relispopulated); | |
values[Anum_pg_class_relreplident - 1] = CharGetDatum(rd_rel->relreplident); | |
values[Anum_pg_class_relispartition - 1] = BoolGetDatum(rd_rel->relispartition); | |
+ values[Anum_pg_class_relhaschecksums - 1] = BoolGetDatum(DataChecksumsNeedWrite()); | |
values[Anum_pg_class_relrewrite - 1] = ObjectIdGetDatum(rd_rel->relrewrite); | |
values[Anum_pg_class_relfrozenxid - 1] = TransactionIdGetDatum(rd_rel->relfrozenxid); | |
values[Anum_pg_class_relminmxid - 1] = MultiXactIdGetDatum(rd_rel->relminmxid); | |
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql | |
index 5314e9348f..f9745cc09c 100644 | |
--- a/src/backend/catalog/system_views.sql | |
+++ b/src/backend/catalog/system_views.sql | |
@@ -1222,6 +1222,11 @@ CREATE OR REPLACE FUNCTION | |
RETURNS boolean STRICT VOLATILE LANGUAGE INTERNAL AS 'pg_promote' | |
PARALLEL SAFE; | |
+CREATE OR REPLACE FUNCTION pg_enable_data_checksums ( | |
+ cost_delay int DEFAULT 0, cost_limit int DEFAULT 100) | |
+ RETURNS void STRICT VOLATILE LANGUAGE internal AS 'enable_data_checksums' | |
+ PARALLEL RESTRICTED; | |
+ | |
-- legacy definition for compatibility with 9.3 | |
CREATE OR REPLACE FUNCTION | |
json_populate_record(base anyelement, from_json json, use_json_as_text boolean DEFAULT false) | |
diff --git a/src/backend/postmaster/Makefile b/src/backend/postmaster/Makefile | |
index bfdf6a833d..59b82ee9ce 100644 | |
--- a/src/backend/postmaster/Makefile | |
+++ b/src/backend/postmaster/Makefile | |
@@ -17,6 +17,7 @@ OBJS = \ | |
bgworker.o \ | |
bgwriter.o \ | |
checkpointer.o \ | |
+ datachecksumsworker.o \ | |
fork_process.o \ | |
interrupt.o \ | |
pgarch.o \ | |
diff --git a/src/backend/postmaster/bgworker.c b/src/backend/postmaster/bgworker.c | |
index beb5e85434..2212b19b86 100644 | |
--- a/src/backend/postmaster/bgworker.c | |
+++ b/src/backend/postmaster/bgworker.c | |
@@ -18,6 +18,7 @@ | |
#include "pgstat.h" | |
#include "port/atomics.h" | |
#include "postmaster/bgworker_internals.h" | |
+#include "postmaster/datachecksumsworker.h" | |
#include "postmaster/interrupt.h" | |
#include "postmaster/postmaster.h" | |
#include "replication/logicallauncher.h" | |
@@ -128,6 +129,15 @@ static const struct | |
}, | |
{ | |
"ApplyWorkerMain", ApplyWorkerMain | |
+ }, | |
+ { | |
+ "DatachecksumsWorkerLauncherMain", DatachecksumsWorkerLauncherMain | |
+ }, | |
+ { | |
+ "DatachecksumsWorkerMain", DatachecksumsWorkerMain | |
+ }, | |
+ { | |
+ "ResetDataChecksumStateInDatabase", ResetDataChecksumStateInDatabase | |
} | |
}; | |
diff --git a/src/backend/postmaster/datachecksumsworker.c b/src/backend/postmaster/datachecksumsworker.c | |
new file mode 100644 | |
index 0000000000..e40b6b76fd | |
--- /dev/null | |
+++ b/src/backend/postmaster/datachecksumsworker.c | |
@@ -0,0 +1,1196 @@ | |
+/*------------------------------------------------------------------------- | |
+ * | |
+ * datachecksumsworker.c | |
+ * Background worker for enabling or disabling data checksums online | |
+ * | |
+ * When enabling data checksums on a database at initdb time or with | |
+ * pg_checksums, no extra process is required as each page is checksummed, and | |
+ * verified, when accessed. When enabling checksums on an already running | |
+ * cluster, which was not initialized with checksums, this worker will ensure | |
+ * that all pages are checksummed before verification of the checksums is | |
+ * turned on. In the case of disabling checksums, the state transition is | |
+ * recorded in the catalog and controlfile, no changes are performed | |
+ * on the data pages or in the catalog. | |
+ * | |
+ * Checksums can be either enabled or disabled clusterwide, with on/off being | |
+ * the endstate for data_checkums. | |
+ * | |
+ * Enabling checksums | |
+ * ------------------ | |
+ * When enabling checkums in an online cluster, data_checksums will be set to | |
+ * inprogress which signals that write operations MUST compute and write | |
+ * the checksum on the data page, but during reading the checksum SHALL NOT be | |
+ * verified. This ensures that all objects created during checksumming will | |
+ * have checksums set, but no reads will fail due to incorrect checksum. The | |
+ * DataChecksumsWorker will compile a list of databases which exists at the | |
+ * start of checksumming, and all of these which havent been dropped during | |
+ * the processing MUST have been processed successfully in order for checksums | |
+ * to be enabled. Any new relation created during processing will see the | |
+ * inprogress state and will automatically be checksummed as well as have its | |
+ * state recorded in the catalog to avoid the datachecksumworker having to | |
+ * process it when already checksummed. | |
+ * | |
+ * For each database, all relations which have storage are read and every data | |
+ * page is marked dirty to force a write with the checksum, this will generate | |
+ * a lot of WAL as the entire database is read and written. Once all datapages | |
+ * in a relation have been written, pg_class.relhaschecksums is set to true to | |
+ * indicate that the relation is done. | |
+ * | |
+ * If the processing is interrupted by a cluster restart, it will be restarted | |
+ * from where it left off given that pg_class.relhaschecksums track state of | |
+ * processed relations and the inprogress state will ensure all new writes | |
+ * performed with checksums. Each database will be reprocessed, but relations | |
+ * where pg_class.relhaschecksums is true are skipped. | |
+ * | |
+ * In case checksums have been enabled and later disabled, when re-enabling | |
+ * pg_class.relhaschecksums will be reset to false before entering inprogress | |
+ * mode to ensure that all relations are re-processed. | |
+ * | |
+ * | |
+ * Disabling checksums | |
+ * ------------------- | |
+ * Disabling checksums is done as an immediate operation as it only updates | |
+ * the controlfile and accompanying local state in the backends. No changes | |
+ * to pg_class.relhaschecksums is performed as it only tracks state during | |
+ * enabling. | |
+ * | |
+ * | |
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group | |
+ * Portions Copyright (c) 1994, Regents of the University of California | |
+ * | |
+ * | |
+ * IDENTIFICATION | |
+ * src/backend/postmaster/datachecksumsworker.c | |
+ * | |
+ *------------------------------------------------------------------------- | |
+ */ | |
+#include "postgres.h" | |
+ | |
+#include "access/genam.h" | |
+#include "access/heapam.h" | |
+#include "access/htup_details.h" | |
+#include "access/xact.h" | |
+#include "catalog/indexing.h" | |
+#include "catalog/pg_class.h" | |
+#include "catalog/pg_database.h" | |
+#include "commands/vacuum.h" | |
+#include "common/relpath.h" | |
+#include "miscadmin.h" | |
+#include "pgstat.h" | |
+#include "postmaster/bgworker.h" | |
+#include "postmaster/bgwriter.h" | |
+#include "postmaster/datachecksumsworker.h" | |
+#include "storage/bufmgr.h" | |
+#include "storage/checksum.h" | |
+#include "storage/lmgr.h" | |
+#include "storage/ipc.h" | |
+#include "storage/procarray.h" | |
+#include "storage/smgr.h" | |
+#include "tcop/tcopprot.h" | |
+#include "utils/fmgroids.h" | |
+#include "utils/lsyscache.h" | |
+#include "utils/ps_status.h" | |
+#include "utils/syscache.h" | |
+ | |
+#define DATACHECKSUMSWORKER_MAX_DB_RETRIES 5 | |
+ | |
+typedef enum | |
+{ | |
+ DATACHECKSUMSWORKER_SUCCESSFUL = 0, | |
+ DATACHECKSUMSWORKER_ABORTED, | |
+ DATACHECKSUMSWORKER_FAILED, | |
+ DATACHECKSUMSWORKER_RETRYDB, | |
+} DatachecksumsWorkerResult; | |
+ | |
+typedef struct DatachecksumsWorkerShmemStruct | |
+{ | |
+ /* | |
+ * Access to launcher_started and abort must be protected by | |
+ * DatachecksumsWorkerLock. | |
+ */ | |
+ bool launcher_started; | |
+ bool abort; | |
+ | |
+ /* | |
+ * Access to other members can be done without a lock, as while they are | |
+ * in shared memory, they are never concurrently accessed. When a worker | |
+ * is running, the launcher is only waiting for that worker to finish. | |
+ */ | |
+ DatachecksumsWorkerResult success; | |
+ bool process_shared_catalogs; | |
+ /* Parameter values set on start */ | |
+ int cost_delay; | |
+ int cost_limit; | |
+ DataChecksumOperation operation; | |
+} DatachecksumsWorkerShmemStruct; | |
+ | |
+/* Shared memory segment for datachecksumsworker */ | |
+static DatachecksumsWorkerShmemStruct * DatachecksumsWorkerShmem; | |
+ | |
+/* Bookkeeping for work to do */ | |
+typedef struct DatachecksumsWorkerDatabase | |
+{ | |
+ Oid dboid; | |
+ char *dbname; | |
+} DatachecksumsWorkerDatabase; | |
+ | |
+typedef struct DatachecksumsWorkerRelation | |
+{ | |
+ Oid reloid; | |
+ char relkind; | |
+} DatachecksumsWorkerRelation; | |
+ | |
+typedef struct DatachecksumsWorkerResultEntry | |
+{ | |
+ Oid dboid; | |
+ DatachecksumsWorkerResult result; | |
+ int retries; | |
+} DatachecksumsWorkerResultEntry; | |
+ | |
+ | |
+/* Prototypes */ | |
+static List *BuildDatabaseList(void); | |
+static List *BuildRelationList(bool include_shared); | |
+static List *BuildTempTableList(void); | |
+static DatachecksumsWorkerResult ProcessDatabase(DatachecksumsWorkerDatabase * db); | |
+static bool ProcessAllDatabases(bool already_connected); | |
+static void launcher_cancel_handler(SIGNAL_ARGS); | |
+static void SetRelHasChecksums(Oid relOid); | |
+ | |
+bool | |
+DataChecksumWorkerStarted(void) | |
+{ | |
+ bool started = false; | |
+ | |
+ LWLockAcquire(DatachecksumsWorkerLock, LW_EXCLUSIVE); | |
+ if (DatachecksumsWorkerShmem->launcher_started && !DatachecksumsWorkerShmem->abort) | |
+ started = true; | |
+ LWLockRelease(DatachecksumsWorkerLock); | |
+ | |
+ return started; | |
+} | |
+ | |
+/* | |
+ * Main entry point for datachecksumsworker launcher process. | |
+ */ | |
+void | |
+StartDatachecksumsWorkerLauncher(DataChecksumOperation op, | |
+ int cost_delay, int cost_limit) | |
+{ | |
+ BackgroundWorker bgw; | |
+ BackgroundWorkerHandle *bgw_handle; | |
+ | |
+ /* | |
+ * This can be hit during a short window during which the worker is | |
+ * shutting down. Once done the worker will clear the abort flag and | |
+ * re-processing can be performed. | |
+ */ | |
+ LWLockAcquire(DatachecksumsWorkerLock, LW_EXCLUSIVE); | |
+ if (DatachecksumsWorkerShmem->abort) | |
+ { | |
+ LWLockRelease(DatachecksumsWorkerLock); | |
+ ereport(ERROR, | |
+ (errmsg("data checksums worker has been aborted"))); | |
+ } | |
+ | |
+ if (DatachecksumsWorkerShmem->launcher_started) | |
+ { | |
+ /* Failed to set means somebody else started */ | |
+ LWLockRelease(DatachecksumsWorkerLock); | |
+ ereport(NOTICE, | |
+ (errmsg("data checksums worker is already running"))); | |
+ return; | |
+ } | |
+ | |
+ /* Whether to enable or disable checksums */ | |
+ DatachecksumsWorkerShmem->operation = op; | |
+ | |
+ /* Backoff parameters to throttle the load during enabling */ | |
+ DatachecksumsWorkerShmem->cost_delay = cost_delay; | |
+ DatachecksumsWorkerShmem->cost_limit = cost_limit; | |
+ | |
+ memset(&bgw, 0, sizeof(bgw)); | |
+ bgw.bgw_flags = BGWORKER_SHMEM_ACCESS | BGWORKER_BACKEND_DATABASE_CONNECTION; | |
+ bgw.bgw_start_time = BgWorkerStart_RecoveryFinished; | |
+ snprintf(bgw.bgw_library_name, BGW_MAXLEN, "postgres"); | |
+ snprintf(bgw.bgw_function_name, BGW_MAXLEN, "DatachecksumsWorkerLauncherMain"); | |
+ snprintf(bgw.bgw_name, BGW_MAXLEN, "datachecksumsworker launcher"); | |
+ snprintf(bgw.bgw_type, BGW_MAXLEN, "datachecksumsworker launcher"); | |
+ bgw.bgw_restart_time = BGW_NEVER_RESTART; | |
+ bgw.bgw_notify_pid = MyProcPid; | |
+ bgw.bgw_main_arg = (Datum) 0; | |
+ | |
+ DatachecksumsWorkerShmem->launcher_started = true; | |
+ LWLockRelease(DatachecksumsWorkerLock); | |
+ | |
+ if (!RegisterDynamicBackgroundWorker(&bgw, &bgw_handle)) | |
+ { | |
+ LWLockAcquire(DatachecksumsWorkerLock, LW_EXCLUSIVE); | |
+ DatachecksumsWorkerShmem->launcher_started = false; | |
+ LWLockRelease(DatachecksumsWorkerLock); | |
+ ereport(ERROR, | |
+ (errmsg("failed to start background worker to process data checksums"))); | |
+ } | |
+} | |
+ | |
+/* | |
+ * ShutdownDatachecksumsWorkerIfRunning | |
+ * Request shutdown of the datachecksumworker | |
+ * | |
+ * This does not turn off processing immediately, it signals the checksum | |
+ * process to end when done with the current block. | |
+ */ | |
+void | |
+ShutdownDatachecksumsWorkerIfRunning(void) | |
+{ | |
+ LWLockAcquire(DatachecksumsWorkerLock, LW_EXCLUSIVE); | |
+ | |
+ /* If the launcher isn't started, there is nothing to shut down */ | |
+ if (DatachecksumsWorkerShmem->launcher_started) | |
+ DatachecksumsWorkerShmem->abort = true; | |
+ | |
+ LWLockRelease(DatachecksumsWorkerLock); | |
+} | |
+ | |
+/* | |
+ * ProcessSingleRelationFork | |
+ * Enable checksums in a single relation/fork. | |
+ * | |
+ * Returns true if successful, and false if *aborted*. On error, an actual | |
+ * error is raised in the lower levels. | |
+ */ | |
+static bool | |
+ProcessSingleRelationFork(Relation reln, ForkNumber forkNum, BufferAccessStrategy strategy) | |
+{ | |
+ BlockNumber numblocks = RelationGetNumberOfBlocksInFork(reln, forkNum); | |
+ BlockNumber b; | |
+ char activity[NAMEDATALEN * 2 + 128]; | |
+ | |
+ for (b = 0; b < numblocks; b++) | |
+ { | |
+ Buffer buf = ReadBufferExtended(reln, forkNum, b, RBM_NORMAL, strategy); | |
+ | |
+ /* | |
+ * Report to pgstat every 100 blocks to keep from overwhelming the | |
+ * activity reporting with close to identical reports. | |
+ */ | |
+ if ((b % 100) == 0) | |
+ { | |
+ snprintf(activity, sizeof(activity) - 1, "processing: %s.%s (%s block %d/%d)", | |
+ get_namespace_name(RelationGetNamespace(reln)), RelationGetRelationName(reln), | |
+ forkNames[forkNum], b, numblocks); | |
+ pgstat_report_activity(STATE_RUNNING, activity); | |
+ } | |
+ | |
+ /* Need to get an exclusive lock before we can flag as dirty */ | |
+ LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); | |
+ | |
+ /* | |
+ * Mark the buffer as dirty and force a full page write. We have to | |
+ * re-write the page to WAL even if the checksum hasn't changed, | |
+ * because if there is a replica it might have a slightly different | |
+ * version of the page with an invalid checksum, caused by unlogged | |
+ * changes (e.g. hintbits) on the master happening while checksums | |
+ * were off. This can happen if there was a valid checksum on the page | |
+ * at one point in the past, so only when checksums are first on, then | |
+ * off, and then turned on again. | |
+ */ | |
+ START_CRIT_SECTION(); | |
+ MarkBufferDirty(buf); | |
+ log_newpage_buffer(buf, false); | |
+ END_CRIT_SECTION(); | |
+ | |
+ UnlockReleaseBuffer(buf); | |
+ | |
+ /* | |
+ * This is the only place where we check if we are asked to abort, the | |
+ * abortion will bubble up from here. It's safe to check this without | |
+ * a lock, because if we miss it being set, we will try again soon. | |
+ */ | |
+ if (DatachecksumsWorkerShmem->abort) | |
+ return false; | |
+ | |
+ vacuum_delay_point(); | |
+ } | |
+ | |
+ return true; | |
+} | |
+ | |
+/* | |
+ * ProcessSingleRelationByOid | |
+ * Process a single relation based on oid. | |
+ * | |
+ * Returns true if successful, and false if *aborted*. On error, an actual | |
+ * error is raised in the lower levels. | |
+ */ | |
+static bool | |
+ProcessSingleRelationByOid(Oid relationId, BufferAccessStrategy strategy) | |
+{ | |
+ Relation rel; | |
+ ForkNumber fnum; | |
+ bool aborted = false; | |
+ | |
+ StartTransactionCommand(); | |
+ | |
+ elog(DEBUG2, | |
+ "background worker \"datachecksumsworker\" starting to process relation %u", | |
+ relationId); | |
+ | |
+ rel = try_relation_open(relationId, AccessShareLock); | |
+ if (rel == NULL) | |
+ { | |
+ /* | |
+ * Relation no longer exist. We don't consider this an error since | |
+ * there are no pages in it that need checksums, and thus return true. | |
+ */ | |
+ elog(DEBUG1, | |
+ "background worker \"datachecksumsworker\" skipping relation %u as it no longer exists", | |
+ relationId); | |
+ CommitTransactionCommand(); | |
+ pgstat_report_activity(STATE_IDLE, NULL); | |
+ return true; | |
+ } | |
+ RelationOpenSmgr(rel); | |
+ | |
+ for (fnum = 0; fnum <= MAX_FORKNUM; fnum++) | |
+ { | |
+ if (smgrexists(rel->rd_smgr, fnum)) | |
+ { | |
+ if (!ProcessSingleRelationFork(rel, fnum, strategy)) | |
+ { | |
+ aborted = true; | |
+ break; | |
+ } | |
+ } | |
+ } | |
+ relation_close(rel, AccessShareLock); | |
+ elog(DEBUG2, | |
+ "background worker \"datachecksumsworker\" done with relation %u: %s", | |
+ relationId, (aborted ? "aborted" : "finished")); | |
+ | |
+ if (!aborted) | |
+ SetRelHasChecksums(relationId); | |
+ | |
+ CommitTransactionCommand(); | |
+ | |
+ pgstat_report_activity(STATE_IDLE, NULL); | |
+ | |
+ return !aborted; | |
+} | |
+ | |
+static void | |
+SetRelHasChecksums(Oid relOid) | |
+{ | |
+ Relation rel; | |
+ Form_pg_class pg_class_tuple; | |
+ HeapTuple tuple; | |
+ | |
+ rel = table_open(RelationRelationId, RowExclusiveLock); | |
+ | |
+ tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relOid)); | |
+ if (!HeapTupleIsValid(tuple)) | |
+ elog(ERROR, "cache lookup failed for relation %u", relOid); | |
+ | |
+ pg_class_tuple = (Form_pg_class) GETSTRUCT(tuple); | |
+ pg_class_tuple->relhaschecksums = true; | |
+ | |
+ CatalogTupleUpdate(rel, &tuple->t_self, tuple); | |
+ | |
+ ReleaseSysCache(tuple); | |
+ | |
+ table_close(rel, RowExclusiveLock); | |
+} | |
+ | |
+/* | |
+ * ProcessDatabase | |
+ * Enable checksums in a single database. | |
+ * | |
+ * We do this by launching a dynamic background worker into this database, and | |
+ * waiting for it to finish. We have to do this in a separate worker, since | |
+ * each process can only be connected to one database during its lifetime. | |
+ */ | |
+static DatachecksumsWorkerResult | |
+ProcessDatabase(DatachecksumsWorkerDatabase * db) | |
+{ | |
+ BackgroundWorker bgw; | |
+ BackgroundWorkerHandle *bgw_handle; | |
+ BgwHandleStatus status; | |
+ pid_t pid; | |
+ char activity[NAMEDATALEN + 64]; | |
+ | |
+ DatachecksumsWorkerShmem->success = DATACHECKSUMSWORKER_FAILED; | |
+ | |
+ memset(&bgw, 0, sizeof(bgw)); | |
+ bgw.bgw_flags = BGWORKER_SHMEM_ACCESS | BGWORKER_BACKEND_DATABASE_CONNECTION; | |
+ bgw.bgw_start_time = BgWorkerStart_RecoveryFinished; | |
+ snprintf(bgw.bgw_library_name, BGW_MAXLEN, "postgres"); | |
+ if (DatachecksumsWorkerShmem->operation == ENABLE_CHECKSUMS) | |
+ snprintf(bgw.bgw_function_name, BGW_MAXLEN, "DatachecksumsWorkerMain"); | |
+ else if (DatachecksumsWorkerShmem->operation == RESET_STATE) | |
+ snprintf(bgw.bgw_function_name, BGW_MAXLEN, "ResetDataChecksumStateInDatabase"); | |
+ else | |
+ elog(ERROR, "invalid datachecksumworker operation requested: %d", | |
+ DatachecksumsWorkerShmem->operation); | |
+ snprintf(bgw.bgw_name, BGW_MAXLEN, "datachecksumsworker worker"); | |
+ snprintf(bgw.bgw_type, BGW_MAXLEN, "datachecksumsworker worker"); | |
+ bgw.bgw_restart_time = BGW_NEVER_RESTART; | |
+ bgw.bgw_notify_pid = MyProcPid; | |
+ bgw.bgw_main_arg = ObjectIdGetDatum(db->dboid); | |
+ | |
+ /* | |
+ * If there are no worker slots available, make sure we retry processing | |
+ * this database. This will make the datachecksumsworker move on to the next | |
+ * database and quite likely fail with the same problem. Maybe we need a | |
+ * backoff to avoid running through all the databases here in short order. | |
+ */ | |
+ if (!RegisterDynamicBackgroundWorker(&bgw, &bgw_handle)) | |
+ { | |
+ ereport(WARNING, | |
+ (errmsg("failed to start worker for enabling checksums in \"%s\", retrying", | |
+ db->dbname), | |
+ errhint("The max_worker_processes setting might be too low."))); | |
+ return DATACHECKSUMSWORKER_RETRYDB; | |
+ } | |
+ | |
+ status = WaitForBackgroundWorkerStartup(bgw_handle, &pid); | |
+ if (status == BGWH_STOPPED) | |
+ { | |
+ ereport(WARNING, | |
+ (errmsg("could not start background worker for enabling checksums in \"%s\"", | |
+ db->dbname), | |
+ errhint("More details on the error might be found in the server log."))); | |
+ return DATACHECKSUMSWORKER_FAILED; | |
+ } | |
+ | |
+ /* | |
+ * If the postmaster crashed we cannot end up with a processed database | |
+ * so we have no alternative other than exiting. When enabling checksums | |
+ * we won't at this time have changed the pg_control version to enabled | |
+ * so when the cluster comes back up processing will habe to be resumed. | |
+ * When disabling, the pg_control version will be set to off before this | |
+ * so when the cluster comes up checksums will be off as expected. In the | |
+ * latter case we might have stale relhaschecksums flags in pg_class which | |
+ * need to be handled in some way. TODO | |
+ */ | |
+ if (status == BGWH_POSTMASTER_DIED) | |
+ ereport(FATAL, | |
+ (errmsg("cannot enable checksums without the postmaster process"), | |
+ errhint("Restart the database and restart the checksumming process by calling pg_enable_data_checksums()."))); | |
+ | |
+ Assert(status == BGWH_STARTED); | |
+ ereport(DEBUG1, | |
+ (errmsg("started background worker \"datachecksumsworker\" in database \"%s\"", | |
+ db->dbname))); | |
+ | |
+ snprintf(activity, sizeof(activity) - 1, | |
+ "Waiting for worker in database %s (pid %d)", db->dbname, pid); | |
+ pgstat_report_activity(STATE_RUNNING, activity); | |
+ | |
+ status = WaitForBackgroundWorkerShutdown(bgw_handle); | |
+ if (status == BGWH_POSTMASTER_DIED) | |
+ ereport(FATAL, | |
+ (errmsg("postmaster exited during checksum processing in \"%s\"", | |
+ db->dbname), | |
+ errhint("Restart the database and restart the checksumming process by calling pg_enable_data_checksums()."))); | |
+ | |
+ if (DatachecksumsWorkerShmem->success == DATACHECKSUMSWORKER_ABORTED) | |
+ ereport(LOG, | |
+ (errmsg("background worker for enabling checksums was aborted during processing in \"%s\"", | |
+ db->dbname))); | |
+ | |
+ ereport(DEBUG1, | |
+ (errmsg("background worker \"datachecksumsworker\" in \"%s\" completed", | |
+ db->dbname))); | |
+ | |
+ pgstat_report_activity(STATE_IDLE, NULL); | |
+ | |
+ return DatachecksumsWorkerShmem->success; | |
+} | |
+ | |
+static void | |
+launcher_exit(int code, Datum arg) | |
+{ | |
+ LWLockAcquire(DatachecksumsWorkerLock, LW_EXCLUSIVE); | |
+ DatachecksumsWorkerShmem->abort = false; | |
+ DatachecksumsWorkerShmem->launcher_started = false; | |
+ LWLockRelease(DatachecksumsWorkerLock); | |
+} | |
+ | |
+static void | |
+launcher_cancel_handler(SIGNAL_ARGS) | |
+{ | |
+ LWLockAcquire(DatachecksumsWorkerLock, LW_EXCLUSIVE); | |
+ DatachecksumsWorkerShmem->abort = true; | |
+ LWLockRelease(DatachecksumsWorkerLock); | |
+} | |
+ | |
+static void | |
+WaitForAllTransactionsToFinish(void) | |
+{ | |
+ TransactionId waitforxid; | |
+ bool aborted = false; | |
+ | |
+ LWLockAcquire(XidGenLock, LW_SHARED); | |
+ waitforxid = XidFromFullTransactionId(ShmemVariableCache->nextFullXid); | |
+ LWLockRelease(XidGenLock); | |
+ | |
+ while (!aborted) | |
+ { | |
+ TransactionId oldestxid = GetOldestActiveTransactionId(); | |
+ | |
+ if (TransactionIdPrecedes(oldestxid, waitforxid)) | |
+ { | |
+ char activity[64]; | |
+ int rc; | |
+ | |
+ /* Oldest running xid is older than us, so wait */ | |
+ snprintf(activity, | |
+ sizeof(activity), | |
+ "Waiting for current transactions to finish (waiting for %u)", | |
+ waitforxid); | |
+ pgstat_report_activity(STATE_RUNNING, activity); | |
+ | |
+ /* Retry every 5 seconds */ | |
+ ResetLatch(MyLatch); | |
+ rc = WaitLatch(MyLatch, | |
+ WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, | |
+ 5000, | |
+ WAIT_EVENT_CHECKSUM_ENABLE_STARTCONDITION); | |
+ | |
+ LWLockAcquire(DatachecksumsWorkerLock, LW_EXCLUSIVE); | |
+ | |
+ /* | |
+ * If the postmaster died we wont be able to enable checksums | |
+ * clusterwide so abort and hope to continue when restarted. | |
+ */ | |
+ if (rc & WL_POSTMASTER_DEATH) | |
+ DatachecksumsWorkerShmem->abort = true; | |
+ aborted = DatachecksumsWorkerShmem->abort; | |
+ | |
+ LWLockRelease(DatachecksumsWorkerLock); | |
+ } | |
+ else | |
+ { | |
+ pgstat_report_activity(STATE_IDLE, NULL); | |
+ return; | |
+ } | |
+ } | |
+} | |
+ | |
+void | |
+DatachecksumsWorkerLauncherMain(Datum arg) | |
+{ | |
+ bool connected = false; | |
+ | |
+ on_shmem_exit(launcher_exit, 0); | |
+ | |
+ ereport(DEBUG1, | |
+ (errmsg("background worker \"datachecksumsworker\" launcher started"))); | |
+ | |
+ pqsignal(SIGTERM, die); | |
+ pqsignal(SIGINT, launcher_cancel_handler); | |
+ | |
+ BackgroundWorkerUnblockSignals(); | |
+ | |
+ MyBackendType = B_DATACHECKSUMSWORKER_LAUNCHER; | |
+ init_ps_display(NULL); | |
+ | |
+ if (DatachecksumsWorkerShmem->operation == RESET_STATE) | |
+ { | |
+ if (!ProcessAllDatabases(connected)) | |
+ { | |
+ /* | |
+ * Before we error out make sure we clear state since this may | |
+ * otherwise render the worker stuck without possibility of a | |
+ * restart. | |
+ */ | |
+ LWLockAcquire(DatachecksumsWorkerLock, LW_EXCLUSIVE); | |
+ DatachecksumsWorkerShmem->launcher_started = false; | |
+ DatachecksumsWorkerShmem->abort = false; | |
+ LWLockRelease(DatachecksumsWorkerLock); | |
+ ereport(ERROR, | |
+ (errmsg("unable to finish processing"))); | |
+ } | |
+ | |
+ connected = true; | |
+ SetDataChecksumsOnInProgress(); | |
+ | |
+ LWLockAcquire(DatachecksumsWorkerLock, LW_EXCLUSIVE); | |
+ DatachecksumsWorkerShmem->operation = ENABLE_CHECKSUMS; | |
+ LWLockRelease(DatachecksumsWorkerLock); | |
+ } | |
+ | |
+ /* | |
+ * Prepare for datachecksumworker shutdown, once we signal that checksums | |
+ * are enabled we want the worker to be done and exited to avoid races | |
+ * with immediate disabling/enabling. | |
+ */ | |
+ LWLockAcquire(DatachecksumsWorkerLock, LW_EXCLUSIVE); | |
+ DatachecksumsWorkerShmem->abort = false; | |
+ DatachecksumsWorkerShmem->launcher_started = false; | |
+ LWLockRelease(DatachecksumsWorkerLock); | |
+ | |
+ /* | |
+ * If processing succeeds for ENABLE_CHECKSUMS, then everything has been | |
+ * processed so set checksums as enabled clusterwide | |
+ */ | |
+ if (ProcessAllDatabases(connected)) | |
+ { | |
+ SetDataChecksumsOn(); | |
+ ereport(LOG, | |
+ (errmsg("checksums enabled clusterwide"))); | |
+ } | |
+} | |
+ | |
+static bool | |
+ProcessAllDatabases(bool already_connected) | |
+{ | |
+ List *DatabaseList; | |
+ HTAB *ProcessedDatabases = NULL; | |
+ ListCell *lc; | |
+ HASHCTL hash_ctl; | |
+ bool found_failed = false; | |
+ | |
+ /* Initialize a hash tracking all processed databases */ | |
+ memset(&hash_ctl, 0, sizeof(hash_ctl)); | |
+ hash_ctl.keysize = sizeof(Oid); | |
+ hash_ctl.entrysize = sizeof(DatachecksumsWorkerResultEntry); | |
+ ProcessedDatabases = hash_create("Processed databases", | |
+ 64, | |
+ &hash_ctl, | |
+ HASH_ELEM | HASH_BLOBS); | |
+ | |
+ /* | |
+ * Initialize a connection to shared catalogs only. | |
+ */ | |
+ if (!already_connected) | |
+ BackgroundWorkerInitializeConnection(NULL, NULL, 0); | |
+ | |
+ /* | |
+ * Set up so first run processes shared catalogs, but not once in every | |
+ * db. | |
+ */ | |
+ DatachecksumsWorkerShmem->process_shared_catalogs = true; | |
+ | |
+ while (true) | |
+ { | |
+ int processed_databases = 0; | |
+ | |
+ /* | |
+ * Get a list of all databases to process. This may include databases | |
+ * that were created during our runtime. | |
+ * | |
+ * Since a database can be created as a copy of any other database | |
+ * (which may not have existed in our last run), we have to repeat | |
+ * this loop until no new databases show up in the list. Since we wait | |
+ * for all pre-existing transactions finish, this way we can be | |
+ * certain that there are no databases left without checksums. | |
+ */ | |
+ DatabaseList = BuildDatabaseList(); | |
+ | |
+ foreach(lc, DatabaseList) | |
+ { | |
+ DatachecksumsWorkerDatabase *db = (DatachecksumsWorkerDatabase *) lfirst(lc); | |
+ DatachecksumsWorkerResult result; | |
+ DatachecksumsWorkerResultEntry *entry; | |
+ bool found; | |
+ | |
+ elog(DEBUG1, "Starting processing of database %s with oid %u", db->dbname, db->dboid); | |
+ | |
+ entry = (DatachecksumsWorkerResultEntry *) hash_search(ProcessedDatabases, &db->dboid, | |
+ HASH_FIND, NULL); | |
+ | |
+ if (entry) | |
+ { | |
+ if (entry->result == DATACHECKSUMSWORKER_RETRYDB) | |
+ { | |
+ /* | |
+ * Limit the number of retries to avoid infinite looping | |
+ * in case there simply wont be enough workers in the | |
+ * cluster to finish this operation. | |
+ */ | |
+ if (entry->retries > DATACHECKSUMSWORKER_MAX_DB_RETRIES) | |
+ entry->result = DATACHECKSUMSWORKER_FAILED; | |
+ } | |
+ | |
+ /* Skip if this database has been processed already */ | |
+ if (entry->result != DATACHECKSUMSWORKER_RETRYDB) | |
+ { | |
+ pfree(db->dbname); | |
+ pfree(db); | |
+ continue; | |
+ } | |
+ } | |
+ | |
+ result = ProcessDatabase(db); | |
+ processed_databases++; | |
+ | |
+ if (result == DATACHECKSUMSWORKER_SUCCESSFUL) | |
+ { | |
+ /* | |
+ * If one database has completed shared catalogs, we don't | |
+ * have to process them again. | |
+ */ | |
+ if (DatachecksumsWorkerShmem->process_shared_catalogs) | |
+ DatachecksumsWorkerShmem->process_shared_catalogs = false; | |
+ } | |
+ else if (result == DATACHECKSUMSWORKER_ABORTED) | |
+ /* Abort flag set, so exit the whole process */ | |
+ return false; | |
+ | |
+ entry = hash_search(ProcessedDatabases, &db->dboid, HASH_ENTER, &found); | |
+ entry->dboid = db->dboid; | |
+ entry->result = result; | |
+ if (!found) | |
+ entry->retries = 0; | |
+ else | |
+ entry->retries++; | |
+ | |
+ pfree(db->dbname); | |
+ pfree(db); | |
+ } | |
+ | |
+ elog(DEBUG1, | |
+ "completed one pass over all databases for checksum enabling, %i databases processed", | |
+ processed_databases); | |
+ | |
+ list_free(DatabaseList); | |
+ | |
+ /* | |
+ * If no databases were processed in this run of the loop, we have now | |
+ * finished all databases and no concurrently created ones can exist. | |
+ */ | |
+ if (processed_databases == 0) | |
+ break; | |
+ } | |
+ | |
+ /* | |
+ * ProcessedDatabases now has all databases and the results of their | |
+ * processing. Failure to enable checksums for a database can be because | |
+ * they actually failed for some reason, or because the database was | |
+ * dropped between us getting the database list and trying to process it. | |
+ * Get a fresh list of databases to detect the second case where the | |
+ * database was dropped before we had started processing it. If a database | |
+ * still exists, but enabling checksums failed then we fail the entire | |
+ * checksumming process and exit with an error. | |
+ */ | |
+ DatabaseList = BuildDatabaseList(); | |
+ | |
+ foreach(lc, DatabaseList) | |
+ { | |
+ DatachecksumsWorkerDatabase *db = (DatachecksumsWorkerDatabase *) lfirst(lc); | |
+ DatachecksumsWorkerResult *entry; | |
+ bool found; | |
+ | |
+ entry = hash_search(ProcessedDatabases, (void *) &db->dboid, | |
+ HASH_FIND, &found); | |
+ | |
+ /* | |
+ * We are only interested in the databases where the failed database | |
+ * still exist. | |
+ */ | |
+ if (found && *entry == DATACHECKSUMSWORKER_FAILED) | |
+ { | |
+ ereport(WARNING, | |
+ (errmsg("failed to enable checksums in \"%s\"", | |
+ db->dbname))); | |
+ found_failed = found; | |
+ continue; | |
+ } | |
+ } | |
+ | |
+ if (found_failed) | |
+ { | |
+ /* Disable checksums on cluster, because we failed */ | |
+ SetDataChecksumsOff(); | |
+ ereport(ERROR, | |
+ (errmsg("checksums failed to get enabled in all databases, aborting"), | |
+ errhint("The server log might have more information on the error."))); | |
+ } | |
+ | |
+ /* | |
+ * Force a checkpoint to get everything out to disk. TODO: we probably | |
+ * don't want to use a CHECKPOINT_IMMEDIATE here but it's very convenient | |
+ * for testing until the patch is fully baked, as it may otherwise make | |
+ * tests take a lot longer. | |
+ */ | |
+ RequestCheckpoint(CHECKPOINT_FORCE | CHECKPOINT_WAIT | CHECKPOINT_IMMEDIATE); | |
+ | |
+ return true; | |
+} | |
+ | |
+/* | |
+ * DatachecksumsWorkerShmemSize | |
+ * Compute required space for datachecksumsworker-related shared memory | |
+ */ | |
+Size | |
+DatachecksumsWorkerShmemSize(void) | |
+{ | |
+ Size size; | |
+ | |
+ size = sizeof(DatachecksumsWorkerShmemStruct); | |
+ size = MAXALIGN(size); | |
+ | |
+ return size; | |
+} | |
+ | |
+/* | |
+ * DatachecksumsWorkerShmemInit | |
+ * Allocate and initialize datachecksumsworker-related shared memory | |
+ */ | |
+void | |
+DatachecksumsWorkerShmemInit(void) | |
+{ | |
+ bool found; | |
+ | |
+ DatachecksumsWorkerShmem = (DatachecksumsWorkerShmemStruct *) | |
+ ShmemInitStruct("DatachecksumsWorker Data", | |
+ DatachecksumsWorkerShmemSize(), | |
+ &found); | |
+ | |
+ if (!found) | |
+ { | |
+ MemSet(DatachecksumsWorkerShmem, 0, DatachecksumsWorkerShmemSize()); | |
+ | |
+ /* | |
+ * Even if this is a redundant assignment, we want to be explicit about | |
+ * our intent for readability, since we want to be able to query this | |
+ * state in case of restartability. | |
+ */ | |
+ DatachecksumsWorkerShmem->launcher_started = false; | |
+ } | |
+} | |
+ | |
+/* | |
+ * BuildDatabaseList | |
+ * Compile a list of all currently available databases in the cluster | |
+ * | |
+ * This creates the list of databases for the datachecksumsworker workers to add | |
+ * checksums to. | |
+ */ | |
+static List * | |
+BuildDatabaseList(void) | |
+{ | |
+ List *DatabaseList = NIL; | |
+ Relation rel; | |
+ TableScanDesc scan; | |
+ HeapTuple tup; | |
+ MemoryContext ctx = CurrentMemoryContext; | |
+ MemoryContext oldctx; | |
+ | |
+ StartTransactionCommand(); | |
+ | |
+ rel = table_open(DatabaseRelationId, AccessShareLock); | |
+ | |
+ /* | |
+ * Before we do this, wait for all pending transactions to finish. This | |
+ * will ensure there are no concurrently running CREATE DATABASE, which | |
+ * could cause us to miss the creation of a database that was copied | |
+ * without checksums. | |
+ */ | |
+ WaitForAllTransactionsToFinish(); | |
+ | |
+ scan = table_beginscan_catalog(rel, 0, NULL); | |
+ | |
+ while (HeapTupleIsValid(tup = heap_getnext(scan, ForwardScanDirection))) | |
+ { | |
+ Form_pg_database pgdb = (Form_pg_database) GETSTRUCT(tup); | |
+ DatachecksumsWorkerDatabase *db; | |
+ | |
+ oldctx = MemoryContextSwitchTo(ctx); | |
+ | |
+ db = (DatachecksumsWorkerDatabase *) palloc(sizeof(DatachecksumsWorkerDatabase)); | |
+ | |
+ db->dboid = pgdb->oid; | |
+ db->dbname = pstrdup(NameStr(pgdb->datname)); | |
+ | |
+ DatabaseList = lappend(DatabaseList, db); | |
+ | |
+ MemoryContextSwitchTo(oldctx); | |
+ } | |
+ | |
+ table_endscan(scan); | |
+ table_close(rel, AccessShareLock); | |
+ | |
+ CommitTransactionCommand(); | |
+ | |
+ return DatabaseList; | |
+} | |
+ | |
+/* | |
+ * BuildRelationList | |
+ * Compile a list of all relations in the database | |
+ * | |
+ * If shared is true, both shared relations and local ones are returned, else | |
+ * all non-shared relations are returned. Temp tables are not included. | |
+ */ | |
+static List * | |
+BuildRelationList(bool include_shared) | |
+{ | |
+ List *RelationList = NIL; | |
+ Relation rel; | |
+ TableScanDesc scan; | |
+ HeapTuple tup; | |
+ MemoryContext ctx = CurrentMemoryContext; | |
+ MemoryContext oldctx; | |
+ | |
+ StartTransactionCommand(); | |
+ | |
+ rel = table_open(RelationRelationId, AccessShareLock); | |
+ scan = table_beginscan_catalog(rel, 0, NULL); | |
+ | |
+ while (HeapTupleIsValid(tup = heap_getnext(scan, ForwardScanDirection))) | |
+ { | |
+ Form_pg_class pgc = (Form_pg_class) GETSTRUCT(tup); | |
+ DatachecksumsWorkerRelation *relentry; | |
+ | |
+ if (!RELKIND_HAS_STORAGE(pgc->relkind) || | |
+ pgc->relpersistence == RELPERSISTENCE_TEMP) | |
+ continue; | |
+ | |
+ if (pgc->relhaschecksums) | |
+ continue; | |
+ | |
+ if (pgc->relisshared && !include_shared) | |
+ continue; | |
+ | |
+ oldctx = MemoryContextSwitchTo(ctx); | |
+ relentry = (DatachecksumsWorkerRelation *) palloc(sizeof(DatachecksumsWorkerRelation)); | |
+ | |
+ relentry->reloid = pgc->oid; | |
+ relentry->relkind = pgc->relkind; | |
+ | |
+ RelationList = lappend(RelationList, relentry); | |
+ | |
+ MemoryContextSwitchTo(oldctx); | |
+ } | |
+ | |
+ table_endscan(scan); | |
+ table_close(rel, AccessShareLock); | |
+ | |
+ CommitTransactionCommand(); | |
+ | |
+ return RelationList; | |
+} | |
+ | |
+/* | |
+ * BuildTempTableList | |
+ * Compile a list of all temporary tables in database | |
+ * | |
+ * Returns a List of oids. | |
+ */ | |
+static List * | |
+BuildTempTableList(void) | |
+{ | |
+ List *RelationList = NIL; | |
+ Relation rel; | |
+ TableScanDesc scan; | |
+ HeapTuple tup; | |
+ MemoryContext ctx = CurrentMemoryContext; | |
+ MemoryContext oldctx; | |
+ | |
+ StartTransactionCommand(); | |
+ | |
+ rel = table_open(RelationRelationId, AccessShareLock); | |
+ scan = table_beginscan_catalog(rel, 0, NULL); | |
+ | |
+ while (HeapTupleIsValid(tup = heap_getnext(scan, ForwardScanDirection))) | |
+ { | |
+ Form_pg_class pgc = (Form_pg_class) GETSTRUCT(tup); | |
+ | |
+ if (pgc->relpersistence != RELPERSISTENCE_TEMP) | |
+ continue; | |
+ | |
+ oldctx = MemoryContextSwitchTo(ctx); | |
+ RelationList = lappend_oid(RelationList, pgc->oid); | |
+ MemoryContextSwitchTo(oldctx); | |
+ } | |
+ | |
+ table_endscan(scan); | |
+ table_close(rel, AccessShareLock); | |
+ | |
+ CommitTransactionCommand(); | |
+ | |
+ return RelationList; | |
+} | |
+ | |
+void | |
+ResetDataChecksumStateInDatabase(Datum arg) | |
+{ | |
+ Relation rel; | |
+ HeapTuple tuple; | |
+ Oid dboid = DatumGetObjectId(arg); | |
+ TableScanDesc scan; | |
+ Form_pg_class pgc; | |
+ | |
+ pqsignal(SIGTERM, die); | |
+ | |
+ BackgroundWorkerUnblockSignals(); | |
+ | |
+ MyBackendType = B_DATACHECKSUMSWORKER_WORKER; | |
+ init_ps_display(NULL); | |
+ | |
+ ereport(DEBUG1, | |
+ (errmsg("background worker \"datachecksumsworker\" starting for database oid %d to reset state", | |
+ dboid))); | |
+ | |
+ BackgroundWorkerInitializeConnectionByOid(dboid, InvalidOid, BGWORKER_BYPASS_ALLOWCONN); | |
+ | |
+ StartTransactionCommand(); | |
+ | |
+ rel = table_open(RelationRelationId, RowExclusiveLock); | |
+ scan = table_beginscan_catalog(rel, 0, NULL); | |
+ | |
+ while (HeapTupleIsValid(tuple = heap_getnext(scan, ForwardScanDirection))) | |
+ { | |
+ tuple = heap_copytuple(tuple); | |
+ pgc = (Form_pg_class) GETSTRUCT(tuple); | |
+ | |
+ if (pgc->relhaschecksums) | |
+ { | |
+ pgc->relhaschecksums = false; | |
+ CatalogTupleUpdate(rel, &tuple->t_self, tuple); | |
+ } | |
+ | |
+ heap_freetuple(tuple); | |
+ } | |
+ | |
+ table_endscan(scan); | |
+ table_close(rel, RowExclusiveLock); | |
+ | |
+ CommitTransactionCommand(); | |
+ | |
+ DatachecksumsWorkerShmem->success = DATACHECKSUMSWORKER_SUCCESSFUL; | |
+ ereport(DEBUG1, | |
+ (errmsg("background worker \"datachecksumsworker\" completed resetting state in database oid %d", | |
+ dboid))); | |
+} | |
+ | |
+/* | |
+ * Main function for enabling checksums in a single database | |
+ */ | |
+void | |
+DatachecksumsWorkerMain(Datum arg) | |
+{ | |
+ Oid dboid = DatumGetObjectId(arg); | |
+ List *RelationList = NIL; | |
+ List *InitialTempTableList = NIL; | |
+ ListCell *lc; | |
+ BufferAccessStrategy strategy; | |
+ bool aborted = false; | |
+ | |
+ pqsignal(SIGTERM, die); | |
+ | |
+ BackgroundWorkerUnblockSignals(); | |
+ | |
+ MyBackendType = B_DATACHECKSUMSWORKER_WORKER; | |
+ init_ps_display(NULL); | |
+ | |
+ ereport(DEBUG1, | |
+ (errmsg("background worker \"datachecksumsworker\" starting for database oid %d", | |
+ dboid))); | |
+ | |
+ BackgroundWorkerInitializeConnectionByOid(dboid, InvalidOid, BGWORKER_BYPASS_ALLOWCONN); | |
+ | |
+ /* | |
+ * Get a list of all temp tables present as we start in this database. We | |
+ * need to wait until they are all gone until we are done, since we cannot | |
+ * access those files and modify them. | |
+ */ | |
+ InitialTempTableList = BuildTempTableList(); | |
+ | |
+ /* | |
+ * Enable vacuum cost delay, if any. | |
+ */ | |
+ VacuumCostDelay = DatachecksumsWorkerShmem->cost_delay; | |
+ VacuumCostLimit = DatachecksumsWorkerShmem->cost_limit; | |
+ VacuumCostActive = (VacuumCostDelay > 0); | |
+ VacuumCostBalance = 0; | |
+ VacuumPageHit = 0; | |
+ VacuumPageMiss = 0; | |
+ VacuumPageDirty = 0; | |
+ | |
+ /* | |
+ * Create and set the vacuum strategy as our buffer strategy. | |
+ */ | |
+ strategy = GetAccessStrategy(BAS_VACUUM); | |
+ | |
+ RelationList = BuildRelationList(DatachecksumsWorkerShmem->process_shared_catalogs); | |
+ foreach(lc, RelationList) | |
+ { | |
+ DatachecksumsWorkerRelation *rel = (DatachecksumsWorkerRelation *) lfirst(lc); | |
+ | |
+ if (!ProcessSingleRelationByOid(rel->reloid, strategy)) | |
+ { | |
+ aborted = true; | |
+ break; | |
+ } | |
+ } | |
+ list_free_deep(RelationList); | |
+ | |
+ if (aborted) | |
+ { | |
+ DatachecksumsWorkerShmem->success = DATACHECKSUMSWORKER_ABORTED; | |
+ ereport(DEBUG1, | |
+ (errmsg("background worker \"datachecksumsworker\" aborted in database oid %d", | |
+ dboid))); | |
+ return; | |
+ } | |
+ | |
+ /* | |
+ * Wait for all temp tables that existed when we started to go away. This | |
+ * is necessary since we cannot "reach" them to enable checksums. Any temp | |
+ * tables created after we started will already have checksums in them | |
+ * (due to the inprogress state), so no need to wait for those. | |
+ */ | |
+ while (!aborted) | |
+ { | |
+ List *CurrentTempTables; | |
+ ListCell *lc; | |
+ int numleft; | |
+ char activity[64]; | |
+ int rc; | |
+ | |
+ CurrentTempTables = BuildTempTableList(); | |
+ numleft = 0; | |
+ foreach(lc, InitialTempTableList) | |
+ { | |
+ if (list_member_oid(CurrentTempTables, lfirst_oid(lc))) | |
+ numleft++; | |
+ } | |
+ list_free(CurrentTempTables); | |
+ | |
+ if (numleft == 0) | |
+ break; | |
+ | |
+ /* At least one temp table left to wait for */ | |
+ snprintf(activity, sizeof(activity), "Waiting for %d temp tables to be removed", numleft); | |
+ pgstat_report_activity(STATE_RUNNING, activity); | |
+ | |
+ /* Retry every 5 seconds */ | |
+ ResetLatch(MyLatch); | |
+ rc = WaitLatch(MyLatch, | |
+ WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, | |
+ 5000, | |
+ WAIT_EVENT_CHECKSUM_ENABLE_FINISHCONDITION); | |
+ | |
+ LWLockAcquire(DatachecksumsWorkerLock, LW_EXCLUSIVE); | |
+ | |
+ /* | |
+ * If the postmaster died we wont be able to enable checksums | |
+ * clusterwide so abort and hope to continue when restarted. | |
+ */ | |
+ if (rc & WL_POSTMASTER_DEATH) | |
+ DatachecksumsWorkerShmem->abort = true; | |
+ aborted = DatachecksumsWorkerShmem->abort; | |
+ | |
+ LWLockRelease(DatachecksumsWorkerLock); | |
+ } | |
+ | |
+ list_free(InitialTempTableList); | |
+ | |
+ DatachecksumsWorkerShmem->success = DATACHECKSUMSWORKER_SUCCESSFUL; | |
+ ereport(DEBUG1, | |
+ (errmsg("background worker \"datachecksumsworker\" completed in database oid %d", | |
+ dboid))); | |
+} | |
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c | |
index c022597bc0..4b211e8298 100644 | |
--- a/src/backend/postmaster/pgstat.c | |
+++ b/src/backend/postmaster/pgstat.c | |
@@ -3770,6 +3770,12 @@ pgstat_get_wait_ipc(WaitEventIPC w) | |
case WAIT_EVENT_CHECKPOINT_START: | |
event_name = "CheckpointStart"; | |
break; | |
+ case WAIT_EVENT_CHECKSUM_ENABLE_STARTCONDITION: | |
+ event_name = "ChecksumEnableStartCondition"; | |
+ break; | |
+ case WAIT_EVENT_CHECKSUM_ENABLE_FINISHCONDITION: | |
+ event_name = "ChecksumEnableFinishCondition"; | |
+ break; | |
case WAIT_EVENT_EXECUTE_GATHER: | |
event_name = "ExecuteGather"; | |
break; | |
diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c | |
index 096b0fcef0..5f81bbb78d 100644 | |
--- a/src/backend/replication/basebackup.c | |
+++ b/src/backend/replication/basebackup.c | |
@@ -1595,7 +1595,7 @@ sendFile(const char *readfilename, const char *tarfilename, | |
_tarWriteHeader(tarfilename, NULL, statbuf, false); | |
- if (!noverify_checksums && DataChecksumsEnabled()) | |
+ if (!noverify_checksums && DataChecksumsNeedVerify()) | |
{ | |
char *filename; | |
diff --git a/src/backend/replication/logical/decode.c b/src/backend/replication/logical/decode.c | |
index c2e5e3abf8..1e0226166f 100644 | |
--- a/src/backend/replication/logical/decode.c | |
+++ b/src/backend/replication/logical/decode.c | |
@@ -196,6 +196,7 @@ DecodeXLogOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) | |
case XLOG_FPW_CHANGE: | |
case XLOG_FPI_FOR_HINT: | |
case XLOG_FPI: | |
+ case XLOG_CHECKSUMS: | |
break; | |
default: | |
elog(ERROR, "unexpected RM_XLOG_ID record type: %u", info); | |
diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c | |
index 427b0d59cd..ddfcbdd61a 100644 | |
--- a/src/backend/storage/ipc/ipci.c | |
+++ b/src/backend/storage/ipc/ipci.c | |
@@ -27,6 +27,7 @@ | |
#include "postmaster/autovacuum.h" | |
#include "postmaster/bgworker_internals.h" | |
#include "postmaster/bgwriter.h" | |
+#include "postmaster/datachecksumsworker.h" | |
#include "postmaster/postmaster.h" | |
#include "replication/logicallauncher.h" | |
#include "replication/origin.h" | |
@@ -255,6 +256,7 @@ CreateSharedMemoryAndSemaphores(void) | |
WalSndShmemInit(); | |
WalRcvShmemInit(); | |
ApplyLauncherShmemInit(); | |
+ DatachecksumsWorkerShmemInit(); | |
/* | |
* Set up other modules that need some shared memory space | |
diff --git a/src/backend/storage/ipc/procsignal.c b/src/backend/storage/ipc/procsignal.c | |
index 4fa385b0ec..91340192c5 100644 | |
--- a/src/backend/storage/ipc/procsignal.c | |
+++ b/src/backend/storage/ipc/procsignal.c | |
@@ -18,6 +18,7 @@ | |
#include <unistd.h> | |
#include "access/parallel.h" | |
+#include "access/xlog.h" | |
#include "commands/async.h" | |
#include "miscadmin.h" | |
#include "pgstat.h" | |
@@ -92,7 +93,10 @@ static volatile ProcSignalSlot *MyProcSignalSlot = NULL; | |
static bool CheckProcSignal(ProcSignalReason reason); | |
static void CleanupProcSignalState(int status, Datum arg); | |
-static void ProcessBarrierPlaceholder(void); | |
+ | |
+static void ProcessBarrierChecksumOnInProgress(void); | |
+static void ProcessBarrierChecksumOn(void); | |
+static void ProcessBarrierChecksumOff(void); | |
/* | |
* ProcSignalShmemSize | |
@@ -495,8 +499,18 @@ ProcessProcSignalBarrier(void) | |
* unconditionally, but it's more efficient to call only the ones that | |
* might need us to do something based on the flags. | |
*/ | |
- if (BARRIER_SHOULD_CHECK(flags, PROCSIGNAL_BARRIER_PLACEHOLDER)) | |
- ProcessBarrierPlaceholder(); | |
+ if (BARRIER_SHOULD_CHECK(flags, PROCSIGNAL_BARRIER_CHECKSUM_INPROGRESS_ON)) | |
+ { | |
+ ProcessBarrierChecksumOnInProgress(); | |
+ } | |
+ else if (BARRIER_SHOULD_CHECK(flags, PROCSIGNAL_BARRIER_CHECKSUM_ON)) | |
+ { | |
+ ProcessBarrierChecksumOn(); | |
+ } | |
+ else if (BARRIER_SHOULD_CHECK(flags, PROCSIGNAL_BARRIER_CHECKSUM_OFF)) | |
+ { | |
+ ProcessBarrierChecksumOff(); | |
+ } | |
/* | |
* State changes related to all types of barriers that might have been | |
@@ -509,16 +523,21 @@ ProcessProcSignalBarrier(void) | |
} | |
static void | |
-ProcessBarrierPlaceholder(void) | |
+ProcessBarrierChecksumOn(void) | |
{ | |
- /* | |
- * XXX. This is just a placeholder until the first real user of this | |
- * machinery gets committed. Rename PROCSIGNAL_BARRIER_PLACEHOLDER to | |
- * PROCSIGNAL_BARRIER_SOMETHING_ELSE where SOMETHING_ELSE is something | |
- * appropriately descriptive. Get rid of this function and instead have | |
- * ProcessBarrierSomethingElse. Most likely, that function should live in | |
- * the file pertaining to that subsystem, rather than here. | |
- */ | |
+ AbsorbChecksumsOnBarrier(); | |
+} | |
+ | |
+static void | |
+ProcessBarrierChecksumOff(void) | |
+{ | |
+ AbsorbChecksumsOffBarrier(); | |
+} | |
+ | |
+static void | |
+ProcessBarrierChecksumOnInProgress(void) | |
+{ | |
+ AbsorbChecksumsOnInProgressBarrier(); | |
} | |
/* | |
diff --git a/src/backend/storage/lmgr/lwlocknames.txt b/src/backend/storage/lmgr/lwlocknames.txt | |
index e6985e8eed..42f1b23aec 100644 | |
--- a/src/backend/storage/lmgr/lwlocknames.txt | |
+++ b/src/backend/storage/lmgr/lwlocknames.txt | |
@@ -50,3 +50,4 @@ MultiXactTruncationLock 41 | |
OldSnapshotTimeMapLock 42 | |
LogicalRepWorkerLock 43 | |
XactTruncationLock 44 | |
+DatachecksumsWorkerLock 45 | |
diff --git a/src/backend/storage/page/README b/src/backend/storage/page/README | |
index 4e45bd92ab..3ab61abd0a 100644 | |
--- a/src/backend/storage/page/README | |
+++ b/src/backend/storage/page/README | |
@@ -10,7 +10,9 @@ http://www.cs.toronto.edu/~bianca/papers/sigmetrics09.pdf, discussed | |
2010/12/22 on -hackers list. | |
Current implementation requires this be enabled system-wide at initdb time, or | |
-by using the pg_checksums tool on an offline cluster. | |
+by using the pg_checksums tool on an offline cluster. Checksums can also be | |
+turned on and off using pg_enable_data_checksums()/pg_disable_data_checksums() | |
+at runtime. | |
The checksum is not valid at all times on a data page!! | |
The checksum is valid when the page leaves the shared pool and is checked | |
diff --git a/src/backend/storage/page/bufpage.c b/src/backend/storage/page/bufpage.c | |
index d708117a40..4c6deaae8b 100644 | |
--- a/src/backend/storage/page/bufpage.c | |
+++ b/src/backend/storage/page/bufpage.c | |
@@ -94,7 +94,7 @@ PageIsVerified(Page page, BlockNumber blkno) | |
*/ | |
if (!PageIsNew(page)) | |
{ | |
- if (DataChecksumsEnabled()) | |
+ if (DataChecksumsNeedVerify()) | |
{ | |
checksum = pg_checksum_page((char *) page, blkno); | |
@@ -1167,7 +1167,7 @@ PageSetChecksumCopy(Page page, BlockNumber blkno) | |
static char *pageCopy = NULL; | |
/* If we don't need a checksum, just return the passed-in data */ | |
- if (PageIsNew(page) || !DataChecksumsEnabled()) | |
+ if (PageIsNew(page) || !DataChecksumsNeedWrite()) | |
return (char *) page; | |
/* | |
@@ -1194,7 +1194,7 @@ void | |
PageSetChecksumInplace(Page page, BlockNumber blkno) | |
{ | |
/* If we don't need a checksum, just return */ | |
- if (PageIsNew(page) || !DataChecksumsEnabled()) | |
+ if (PageIsNew(page) || !DataChecksumsNeedWrite()) | |
return; | |
((PageHeader) page)->pd_checksum = pg_checksum_page((char *) page, blkno); | |
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c | |
index 2aff739466..6f04af6c4c 100644 | |
--- a/src/backend/utils/adt/pgstatfuncs.c | |
+++ b/src/backend/utils/adt/pgstatfuncs.c | |
@@ -1559,7 +1559,7 @@ pg_stat_get_db_checksum_failures(PG_FUNCTION_ARGS) | |
int64 result; | |
PgStat_StatDBEntry *dbentry; | |
- if (!DataChecksumsEnabled()) | |
+ if (!DataChecksumsNeedWrite()) | |
PG_RETURN_NULL(); | |
if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL) | |
@@ -1577,7 +1577,7 @@ pg_stat_get_db_checksum_last_failure(PG_FUNCTION_ARGS) | |
TimestampTz result; | |
PgStat_StatDBEntry *dbentry; | |
- if (!DataChecksumsEnabled()) | |
+ if (!DataChecksumsNeedWrite()) | |
PG_RETURN_NULL(); | |
if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL) | |
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c | |
index 0b9eb00d2d..aceebf58ad 100644 | |
--- a/src/backend/utils/cache/relcache.c | |
+++ b/src/backend/utils/cache/relcache.c | |
@@ -1875,6 +1875,8 @@ formrdesc(const char *relationName, Oid relationReltype, | |
relation->rd_rel->relnatts = (int16) natts; | |
relation->rd_rel->relam = HEAP_TABLE_AM_OID; | |
+ relation->rd_rel->relhaschecksums = DataChecksumsNeedWrite(); | |
+ | |
/* | |
* initialize attribute tuple form | |
* | |
@@ -3483,6 +3485,8 @@ RelationBuildLocalRelation(const char *relname, | |
else | |
rel->rd_rel->relispopulated = true; | |
+ rel->rd_rel->relhaschecksums = DataChecksumsNeedWrite(); | |
+ | |
/* set replica identity -- system catalogs and non-tables don't have one */ | |
if (!IsCatalogNamespace(relnamespace) && | |
(relkind == RELKIND_RELATION || | |
diff --git a/src/backend/utils/init/miscinit.c b/src/backend/utils/init/miscinit.c | |
index cca9704d2d..09d36c507b 100644 | |
--- a/src/backend/utils/init/miscinit.c | |
+++ b/src/backend/utils/init/miscinit.c | |
@@ -247,6 +247,12 @@ GetBackendTypeDesc(BackendType backendType) | |
case B_LOGGER: | |
backendDesc = "logger"; | |
break; | |
+ case B_DATACHECKSUMSWORKER_LAUNCHER: | |
+ backendDesc = "datachecksumsworker launcher"; | |
+ break; | |
+ case B_DATACHECKSUMSWORKER_WORKER: | |
+ backendDesc = "datachecksumsworker worker"; | |
+ break; | |
} | |
return backendDesc; | |
diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c | |
index f4247ea70d..06443b08b1 100644 | |
--- a/src/backend/utils/init/postinit.c | |
+++ b/src/backend/utils/init/postinit.c | |
@@ -617,6 +617,11 @@ InitPostgres(const char *in_dbname, Oid dboid, const char *username, | |
if (MyBackendId > MaxBackends || MyBackendId <= 0) | |
elog(FATAL, "bad backend ID: %d", MyBackendId); | |
+ /* | |
+ * Set up local cache of Controldata values. | |
+ */ | |
+ InitLocalControldata(); | |
+ | |
/* Now that we have a BackendId, we can participate in ProcSignal */ | |
ProcSignalInit(MyBackendId); | |
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c | |
index 75fc6f11d6..f9adfc3bc2 100644 | |
--- a/src/backend/utils/misc/guc.c | |
+++ b/src/backend/utils/misc/guc.c | |
@@ -33,6 +33,7 @@ | |
#include "access/transam.h" | |
#include "access/twophase.h" | |
#include "access/xact.h" | |
+#include "access/xlog.h" | |
#include "access/xlog_internal.h" | |
#include "catalog/namespace.h" | |
#include "catalog/pg_authid.h" | |
@@ -73,6 +74,7 @@ | |
#include "replication/walreceiver.h" | |
#include "replication/walsender.h" | |
#include "storage/bufmgr.h" | |
+#include "storage/checksum.h" | |
#include "storage/dsm_impl.h" | |
#include "storage/fd.h" | |
#include "storage/large_object.h" | |
@@ -494,6 +496,16 @@ static struct config_enum_entry shared_memory_options[] = { | |
{NULL, 0, false} | |
}; | |
+/* | |
+ * Options for data_checksums enum. | |
+ */ | |
+static const struct config_enum_entry data_checksum_options[] = { | |
+ {"on", DATA_CHECKSUMS_ON, true}, | |
+ {"off", DATA_CHECKSUMS_OFF, true}, | |
+ {"inprogress", DATA_CHECKSUMS_INPROGRESS_ON, true}, | |
+ {NULL, 0, false} | |
+}; | |
+ | |
/* | |
* Options for enum values stored in other modules | |
*/ | |
@@ -602,7 +614,7 @@ static int max_identifier_length; | |
static int block_size; | |
static int segment_size; | |
static int wal_block_size; | |
-static bool data_checksums; | |
+static int data_checksums_tmp; | |
static bool integer_datetimes; | |
static bool assert_enabled; | |
static char *recovery_target_timeline_string; | |
@@ -1903,17 +1915,6 @@ static struct config_bool ConfigureNamesBool[] = | |
NULL, NULL, NULL | |
}, | |
- { | |
- {"data_checksums", PGC_INTERNAL, PRESET_OPTIONS, | |
- gettext_noop("Shows whether data checksums are turned on for this cluster."), | |
- NULL, | |
- GUC_NOT_IN_SAMPLE | GUC_DISALLOW_IN_FILE | |
- }, | |
- &data_checksums, | |
- false, | |
- NULL, NULL, NULL | |
- }, | |
- | |
{ | |
{"syslog_sequence_numbers", PGC_SIGHUP, LOGGING_WHERE, | |
gettext_noop("Add sequence number to syslog messages to avoid duplicate suppression."), | |
@@ -4755,6 +4756,17 @@ static struct config_enum ConfigureNamesEnum[] = | |
NULL, NULL, NULL | |
}, | |
+ { | |
+ {"data_checksums", PGC_INTERNAL, PRESET_OPTIONS, | |
+ gettext_noop("Shows whether data checksums are turned on for this cluster."), | |
+ NULL, | |
+ GUC_NOT_IN_SAMPLE | GUC_DISALLOW_IN_FILE | |
+ }, | |
+ &data_checksums_tmp, | |
+ DATA_CHECKSUMS_OFF, data_checksum_options, | |
+ NULL, NULL, show_data_checksums | |
+ }, | |
+ | |
/* End-of-list marker */ | |
{ | |
{NULL, 0, 0, NULL, NULL}, NULL, 0, NULL, NULL, NULL, NULL | |
diff --git a/src/bin/pg_checksums/pg_checksums.c b/src/bin/pg_checksums/pg_checksums.c | |
index 1daa5aed0e..f5468d0cd9 100644 | |
--- a/src/bin/pg_checksums/pg_checksums.c | |
+++ b/src/bin/pg_checksums/pg_checksums.c | |
@@ -597,7 +597,7 @@ main(int argc, char *argv[]) | |
exit(1); | |
} | |
- if (ControlFile->data_checksum_version > 0 && | |
+ if (ControlFile->data_checksum_version == DATA_CHECKSUMS_ON && | |
mode == PG_MODE_ENABLE) | |
{ | |
pg_log_error("data checksums are already enabled in cluster"); | |
diff --git a/src/bin/pg_upgrade/controldata.c b/src/bin/pg_upgrade/controldata.c | |
index 00d71e3a8a..4bbf7b36a5 100644 | |
--- a/src/bin/pg_upgrade/controldata.c | |
+++ b/src/bin/pg_upgrade/controldata.c | |
@@ -657,6 +657,15 @@ check_control_data(ControlData *oldctrl, | |
* check_for_isn_and_int8_passing_mismatch(). | |
*/ | |
+ /* | |
+ * If checksums have been turned on in the old cluster, but the | |
+ * datachecksumsworker have yet to finish, then disallow upgrading. The user | |
+ * should either let the process finish, or turn off checksums, before | |
+ * retrying. | |
+ */ | |
+ if (oldctrl->data_checksum_version == 2) | |
+ pg_fatal("checksum enabling in old cluster is in progress\n"); | |
+ | |
/* | |
* We might eventually allow upgrades from checksum to no-checksum | |
* clusters. | |
diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h | |
index 8b90cefbe0..a806cc6d0e 100644 | |
--- a/src/bin/pg_upgrade/pg_upgrade.h | |
+++ b/src/bin/pg_upgrade/pg_upgrade.h | |
@@ -218,7 +218,7 @@ typedef struct | |
uint32 large_object; | |
bool date_is_int; | |
bool float8_pass_by_value; | |
- bool data_checksum_version; | |
+ uint32 data_checksum_version; | |
} ControlData; | |
/* | |
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h | |
index 347a38f57c..3cffe4f828 100644 | |
--- a/src/include/access/xlog.h | |
+++ b/src/include/access/xlog.h | |
@@ -199,7 +199,7 @@ extern PGDLLIMPORT int wal_level; | |
* of the bits make it to disk, but the checksum wouldn't match. Also WAL-log | |
* them if forced by wal_log_hints=on. | |
*/ | |
-#define XLogHintBitIsNeeded() (DataChecksumsEnabled() || wal_log_hints) | |
+#define XLogHintBitIsNeeded() (DataChecksumsNeedWrite() || wal_log_hints) | |
/* Do we need to WAL-log information required only for Hot Standby and logical replication? */ | |
#define XLogStandbyInfoActive() (wal_level >= WAL_LEVEL_REPLICA) | |
@@ -315,7 +315,18 @@ extern TimestampTz GetCurrentChunkReplayStartTime(void); | |
extern void UpdateControlFile(void); | |
extern uint64 GetSystemIdentifier(void); | |
extern char *GetMockAuthenticationNonce(void); | |
-extern bool DataChecksumsEnabled(void); | |
+extern bool DataChecksumsNeedWrite(void); | |
+extern bool DataChecksumsNeedVerify(void); | |
+extern bool DataChecksumsOnInProgress(void); | |
+extern void SetDataChecksumsOnInProgress(void); | |
+extern void SetDataChecksumsOn(void); | |
+extern void SetDataChecksumsOff(void); | |
+extern void AbsorbChecksumsOnInProgressBarrier(void); | |
+extern void AbsorbChecksumsOffInProgressBarrier(void); | |
+extern void AbsorbChecksumsOnBarrier(void); | |
+extern void AbsorbChecksumsOffBarrier(void); | |
+extern const char *show_data_checksums(void); | |
+extern void InitLocalControldata(void); | |
extern XLogRecPtr GetFakeLSNForUnloggedRel(void); | |
extern Size XLOGShmemSize(void); | |
extern void XLOGShmemInit(void); | |
diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h | |
index c8869d5226..b180ca7b0f 100644 | |
--- a/src/include/access/xlog_internal.h | |
+++ b/src/include/access/xlog_internal.h | |
@@ -25,6 +25,7 @@ | |
#include "lib/stringinfo.h" | |
#include "pgtime.h" | |
#include "storage/block.h" | |
+#include "storage/checksum.h" | |
#include "storage/relfilenode.h" | |
@@ -245,6 +246,12 @@ typedef struct xl_restore_point | |
char rp_name[MAXFNAMELEN]; | |
} xl_restore_point; | |
+/* Information logged when checksum level is changed */ | |
+typedef struct xl_checksum_state | |
+{ | |
+ ChecksumType new_checksumtype; | |
+} xl_checksum_state; | |
+ | |
/* End of recovery mark, when we don't do an END_OF_RECOVERY checkpoint */ | |
typedef struct xl_end_of_recovery | |
{ | |
diff --git a/src/include/catalog/pg_class.h b/src/include/catalog/pg_class.h | |
index 78b33b2a7f..1b8b291d2b 100644 | |
--- a/src/include/catalog/pg_class.h | |
+++ b/src/include/catalog/pg_class.h | |
@@ -119,6 +119,9 @@ CATALOG(pg_class,1259,RelationRelationId) BKI_BOOTSTRAP BKI_ROWTYPE_OID(83,Relat | |
/* is relation a partition? */ | |
bool relispartition BKI_DEFAULT(f); | |
+ /* does the relation have checksums enabled */ | |
+ bool relhaschecksums BKI_DEFAULT(f); | |
+ | |
/* heap for rewrite during DDL, link to original rel */ | |
Oid relrewrite BKI_DEFAULT(0); | |
diff --git a/src/include/catalog/pg_control.h b/src/include/catalog/pg_control.h | |
index de5670e538..73a5495335 100644 | |
--- a/src/include/catalog/pg_control.h | |
+++ b/src/include/catalog/pg_control.h | |
@@ -76,6 +76,7 @@ typedef struct CheckPoint | |
#define XLOG_END_OF_RECOVERY 0x90 | |
#define XLOG_FPI_FOR_HINT 0xA0 | |
#define XLOG_FPI 0xB0 | |
+#define XLOG_CHECKSUMS 0xC0 | |
/* | |
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat | |
index 61f2c2f5b4..287f618197 100644 | |
--- a/src/include/catalog/pg_proc.dat | |
+++ b/src/include/catalog/pg_proc.dat | |
@@ -10862,6 +10862,22 @@ | |
proargnames => '{max_data_alignment,database_block_size,blocks_per_segment,wal_block_size,bytes_per_wal_segment,max_identifier_length,max_index_columns,max_toast_chunk_size,large_object_chunk_size,float8_pass_by_value,data_page_checksum_version}', | |
prosrc => 'pg_control_init' }, | |
+{ oid => '4142', | |
+ descr => 'disable data checksums', | |
+ proname => 'pg_disable_data_checksums', provolatile => 'v', prorettype => 'bool', | |
+ proparallel => 'r', | |
+ proargtypes => '', | |
+ prosrc => 'disable_data_checksums' }, | |
+ | |
+{ oid => '4035', | |
+ descr => 'enable data checksums', | |
+ proname => 'pg_enable_data_checksums', provolatile => 'v', prorettype => 'void', | |
+ proparallel => 'r', | |
+ proargtypes => 'int4 int4', proallargtypes => '{int4,int4}', | |
+ proargmodes => '{i,i}', | |
+ proargnames => '{cost_delay,cost_limit}', | |
+ prosrc => 'enable_data_checksums' }, | |
+ | |
# collation management functions | |
{ oid => '3445', descr => 'import collations from operating system', | |
proname => 'pg_import_system_collations', procost => '100', | |
diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h | |
index 18bc8a7b90..41d2082b29 100644 | |
--- a/src/include/miscadmin.h | |
+++ b/src/include/miscadmin.h | |
@@ -322,6 +322,8 @@ typedef enum BackendType | |
B_ARCHIVER, | |
B_STATS_COLLECTOR, | |
B_LOGGER, | |
+ B_DATACHECKSUMSWORKER_LAUNCHER, | |
+ B_DATACHECKSUMSWORKER_WORKER, | |
} BackendType; | |
extern BackendType MyBackendType; | |
diff --git a/src/include/pgstat.h b/src/include/pgstat.h | |
index 1387201382..701bde851b 100644 | |
--- a/src/include/pgstat.h | |
+++ b/src/include/pgstat.h | |
@@ -852,6 +852,8 @@ typedef enum | |
WAIT_EVENT_BTREE_PAGE, | |
WAIT_EVENT_CHECKPOINT_DONE, | |
WAIT_EVENT_CHECKPOINT_START, | |
+ WAIT_EVENT_CHECKSUM_ENABLE_STARTCONDITION, | |
+ WAIT_EVENT_CHECKSUM_ENABLE_FINISHCONDITION, | |
WAIT_EVENT_EXECUTE_GATHER, | |
WAIT_EVENT_HASH_BATCH_ALLOCATE, | |
WAIT_EVENT_HASH_BATCH_ELECT, | |
diff --git a/src/include/postmaster/datachecksumsworker.h b/src/include/postmaster/datachecksumsworker.h | |
new file mode 100644 | |
index 0000000000..62aeabf9f6 | |
--- /dev/null | |
+++ b/src/include/postmaster/datachecksumsworker.h | |
@@ -0,0 +1,42 @@ | |
+/*------------------------------------------------------------------------- | |
+ * | |
+ * datachecksumsworker.h | |
+ * header file for checksum helper background worker | |
+ * | |
+ * | |
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group | |
+ * Portions Copyright (c) 1994, Regents of the University of California | |
+ * | |
+ * src/include/postmaster/datachecksumsworker.h | |
+ * | |
+ *------------------------------------------------------------------------- | |
+ */ | |
+#ifndef DATACHECKSUMSWORKER_H | |
+#define DATACHECKSUMSWORKER_H | |
+ | |
+typedef enum DataChecksumOperation | |
+{ | |
+ ENABLE_CHECKSUMS = 0, | |
+ RESET_STATE | |
+} DataChecksumOperation; | |
+ | |
+/* Shared memory */ | |
+extern Size DatachecksumsWorkerShmemSize(void); | |
+extern void DatachecksumsWorkerShmemInit(void); | |
+ | |
+/* Status functions */ | |
+bool DataChecksumWorkerStarted(void); | |
+ | |
+/* Start the background processes for enabling checksums */ | |
+void StartDatachecksumsWorkerLauncher(DataChecksumOperation op, | |
+ int cost_delay, int cost_limit); | |
+ | |
+/* Shutdown the background processes, if any */ | |
+void ShutdownDatachecksumsWorkerIfRunning(void); | |
+ | |
+/* Background worker entrypoints */ | |
+void DatachecksumsWorkerLauncherMain(Datum arg); | |
+void DatachecksumsWorkerMain(Datum arg); | |
+void ResetDataChecksumStateInDatabase(Datum arg); | |
+ | |
+#endif /* DATACHECKSUMSWORKER_H */ | |
diff --git a/src/include/storage/bufpage.h b/src/include/storage/bufpage.h | |
index 3f88683a05..7f2dbbf630 100644 | |
--- a/src/include/storage/bufpage.h | |
+++ b/src/include/storage/bufpage.h | |
@@ -198,6 +198,8 @@ typedef PageHeaderData *PageHeader; | |
*/ | |
#define PG_PAGE_LAYOUT_VERSION 4 | |
#define PG_DATA_CHECKSUM_VERSION 1 | |
+#define PG_DATA_CHECKSUM_INPROGRESS_ON_VERSION 2 | |
+ | |
/* ---------------------------------------------------------------- | |
* page support macros | |
diff --git a/src/include/storage/checksum.h b/src/include/storage/checksum.h | |
index 6e77744cbc..f6ae955f58 100644 | |
--- a/src/include/storage/checksum.h | |
+++ b/src/include/storage/checksum.h | |
@@ -15,6 +15,14 @@ | |
#include "storage/block.h" | |
+typedef enum ChecksumType | |
+{ | |
+ DATA_CHECKSUMS_OFF = 0, | |
+ DATA_CHECKSUMS_ON, | |
+ DATA_CHECKSUMS_INPROGRESS_ON, | |
+ DATA_CHECKSUMS_INPROGRESS_OFF | |
+} ChecksumType; | |
+ | |
/* | |
* Compute the checksum for a Postgres page. The page must be aligned on a | |
* 4-byte boundary. | |
diff --git a/src/include/storage/procsignal.h b/src/include/storage/procsignal.h | |
index 5cb39697f3..05f85861e3 100644 | |
--- a/src/include/storage/procsignal.h | |
+++ b/src/include/storage/procsignal.h | |
@@ -48,12 +48,9 @@ typedef enum | |
typedef enum | |
{ | |
- /* | |
- * XXX. PROCSIGNAL_BARRIER_PLACEHOLDER should be replaced when the first | |
- * real user of the ProcSignalBarrier mechanism is added. It's just here | |
- * for now because we can't have an empty enum. | |
- */ | |
- PROCSIGNAL_BARRIER_PLACEHOLDER = 0 | |
+ PROCSIGNAL_BARRIER_CHECKSUM_OFF = 0, | |
+ PROCSIGNAL_BARRIER_CHECKSUM_INPROGRESS_ON, | |
+ PROCSIGNAL_BARRIER_CHECKSUM_ON | |
} ProcSignalBarrierType; | |
/* | |
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h | |
index 0b5957ba02..ab30511d84 100644 | |
--- a/src/include/utils/rel.h | |
+++ b/src/include/utils/rel.h | |
@@ -611,6 +611,13 @@ typedef struct ViewOptions | |
*/ | |
#define RelationIsPopulated(relation) ((relation)->rd_rel->relispopulated) | |
+/* | |
+ * RelationHasDataChecksums | |
+ * True if all data pages of the relation have data checksums. | |
+ */ | |
+#define RelationHasDataChecksums(relation) \ | |
+ ((relation->rd_rel->relhasdatachecksums) | |
+ | |
/* | |
* RelationIsAccessibleInLogicalDecoding | |
* True if we need to log enough information to have access via | |
diff --git a/src/test/Makefile b/src/test/Makefile | |
index efb206aa75..6469ac94a4 100644 | |
--- a/src/test/Makefile | |
+++ b/src/test/Makefile | |
@@ -12,7 +12,8 @@ subdir = src/test | |
top_builddir = ../.. | |
include $(top_builddir)/src/Makefile.global | |
-SUBDIRS = perl regress isolation modules authentication recovery subscription | |
+SUBDIRS = perl regress isolation modules authentication recovery subscription \ | |
+ checksum | |
# Test suites that are not safe by default but can be run if selected | |
# by the user via the whitespace-separated list in variable | |
diff --git a/src/test/checksum/.gitignore b/src/test/checksum/.gitignore | |
new file mode 100644 | |
index 0000000000..871e943d50 | |
--- /dev/null | |
+++ b/src/test/checksum/.gitignore | |
@@ -0,0 +1,2 @@ | |
+# Generated by test suite | |
+/tmp_check/ | |
diff --git a/src/test/checksum/Makefile b/src/test/checksum/Makefile | |
new file mode 100644 | |
index 0000000000..558a8135f1 | |
--- /dev/null | |
+++ b/src/test/checksum/Makefile | |
@@ -0,0 +1,23 @@ | |
+#------------------------------------------------------------------------- | |
+# | |
+# Makefile for src/test/checksum | |
+# | |
+# Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group | |
+# Portions Copyright (c) 1994, Regents of the University of California | |
+# | |
+# src/test/checksum/Makefile | |
+# | |
+#------------------------------------------------------------------------- | |
+ | |
+subdir = src/test/checksum | |
+top_builddir = ../../.. | |
+include $(top_builddir)/src/Makefile.global | |
+ | |
+check: | |
+ $(prove_check) | |
+ | |
+installcheck: | |
+ $(prove_installcheck) | |
+ | |
+clean distclean maintainer-clean: | |
+ rm -rf tmp_check | |
diff --git a/src/test/checksum/README b/src/test/checksum/README | |
new file mode 100644 | |
index 0000000000..0f0317060b | |
--- /dev/null | |
+++ b/src/test/checksum/README | |
@@ -0,0 +1,22 @@ | |
+src/test/checksum/README | |
+ | |
+Regression tests for data checksums | |
+=================================== | |
+ | |
+This directory contains a test suite for enabling data checksums | |
+in a running cluster. | |
+ | |
+Running the tests | |
+================= | |
+ | |
+ make check | |
+ | |
+or | |
+ | |
+ make installcheck | |
+ | |
+NOTE: This creates a temporary installation (in the case of "check"), | |
+with multiple nodes, be they master or standby(s) for the purpose of | |
+the tests. | |
+ | |
+NOTE: This requires the --enable-tap-tests argument to configure. | |
diff --git a/src/test/checksum/t/001_basic.pl b/src/test/checksum/t/001_basic.pl | |
new file mode 100644 | |
index 0000000000..9dbb660937 | |
--- /dev/null | |
+++ b/src/test/checksum/t/001_basic.pl | |
@@ -0,0 +1,86 @@ | |
+# Test suite for testing enabling data checksums in an online cluster | |
+use strict; | |
+use warnings; | |
+use PostgresNode; | |
+use TestLib; | |
+use Test::More tests => 10; | |
+ | |
+# Initialize node with checksums disabled. | |
+my $node = get_new_node('main'); | |
+$node->init(); | |
+$node->start(); | |
+ | |
+# Create some content to have un-checksummed data in the cluster | |
+$node->safe_psql('postgres', | |
+ "CREATE TABLE t AS SELECT generate_series(1,10000) AS a;"); | |
+ | |
+# Ensure that checksums are turned off | |
+my $result = $node->safe_psql('postgres', | |
+ "SELECT setting FROM pg_catalog.pg_settings WHERE name = 'data_checksums';"); | |
+is($result, 'off', 'ensure checksums are disabled'); | |
+ | |
+# No relation in pg_class should have relhaschecksums at this point | |
+$result = $node->safe_psql('postgres', | |
+ "SELECT count(*) FROM pg_catalog.pg_class WHERE relhaschecksums;"); | |
+is($result, '0', 'ensure no entries in pg_class has checksums recorded'); | |
+ | |
+# Enable data checksums | |
+$node->safe_psql('postgres', "SELECT pg_enable_data_checksums();"); | |
+ | |
+# Wait for checksums to become enabled | |
+$result = $node->poll_query_until('postgres', | |
+ "SELECT setting FROM pg_catalog.pg_settings WHERE name = 'data_checksums';", | |
+ 'on'); | |
+is($result, 1, 'ensure checksums are enabled'); | |
+ | |
+# Check that relations with storage have been marked with relhaschecksums in | |
+# pg_class | |
+$result = $node->safe_psql('postgres', | |
+ "SELECT count(*) FROM pg_catalog.pg_class WHERE NOT relhaschecksums " . | |
+ "AND relkind IN ('r', 'i', 'S', 't', 'm');"); | |
+is($result, '0', 'ensure all relations are correctly flagged in the catalog'); | |
+ | |
+# Run a dummy query just to make sure we read back some data | |
+$result = $node->safe_psql('postgres', "SELECT count(*) FROM t"); | |
+is($result, '10000', 'ensure checksummed pages can be read back'); | |
+ | |
+# Enable data checksums again which should be a no-op.. | |
+$node->safe_psql('postgres', "SELECT pg_enable_data_checksums();"); | |
+# ..and make sure we still can process data fine | |
+$node->safe_psql('postgres', "UPDATE t SET a = a + 1;"); | |
+$result = $node->safe_psql('postgres', "SELECT count(*) FROM t"); | |
+is($result, '10000', 'ensure checksummed pages can be read back'); | |
+ | |
+# Disable checksums again | |
+$node->safe_psql('postgres', "SELECT pg_disable_data_checksums();"); | |
+ | |
+# Wait for checksums to be disabled | |
+$result = $node->poll_query_until('postgres', | |
+ "SELECT setting FROM pg_catalog.pg_settings WHERE name = 'data_checksums';", | |
+ 'off'); | |
+is($result, 1, 'ensure checksums are disabled'); | |
+ | |
+# Test reading again | |
+$result = $node->safe_psql('postgres', "SELECT count(*) FROM t"); | |
+is($result, '10000', 'ensure checksummed pages can be read back'); | |
+ | |
+# Disable checksums when already disabled, which is also a no-op so we mainly | |
+# want to run this to make sure the backend isn't crashing or erroring out | |
+$node->safe_psql('postgres', "SELECT pg_disable_data_checksums();"); | |
+ | |
+# Re-enable checksums and make sure that the relhaschecksums flags in the | |
+# catalog aren't tricking processing into skipping previously checksummed | |
+# relations | |
+$node->safe_psql('postgres', "UPDATE t SET a = a + 1;"); | |
+ | |
+$node->safe_psql('postgres', "SELECT pg_enable_data_checksums();"); | |
+$result = $node->poll_query_until('postgres', | |
+ "SELECT setting FROM pg_catalog.pg_settings WHERE name = 'data_checksums';", | |
+ 'on'); | |
+is($result, 1, 'ensure checksums are enabled'); | |
+ | |
+# Run a dummy query just to make sure we read back some data | |
+$result = $node->safe_psql('postgres', "SELECT count(*) FROM t"); | |
+is($result, '10000', 'ensure checksummed pages can be read back'); | |
+ | |
+$node->stop; | |
diff --git a/src/test/checksum/t/002_restarts.pl b/src/test/checksum/t/002_restarts.pl | |
new file mode 100644 | |
index 0000000000..d908b95561 | |
--- /dev/null | |
+++ b/src/test/checksum/t/002_restarts.pl | |
@@ -0,0 +1,97 @@ | |
+# Test suite for testing enabling data checksums in an online cluster with | |
+# restarting the processing | |
+use strict; | |
+use warnings; | |
+use PostgresNode; | |
+use TestLib; | |
+use Test::More; | |
+use IPC::Run qw(pump finish timer); | |
+ | |
+# If we don't have IO::Pty, forget it, because IPC::Run depends on that | |
+# to support pty connections | |
+eval { require IO::Pty; }; | |
+if ($@) | |
+{ | |
+ plan skip_all => 'IO::Pty is needed to run this test'; | |
+} | |
+ | |
+# Initialize node with checksums disabled. | |
+my $node = get_new_node('main'); | |
+$node->init(); | |
+$node->start(); | |
+ | |
+# Create some content to have un-checksummed data in the cluster | |
+$node->safe_psql('postgres', | |
+ "CREATE TABLE t AS SELECT generate_series(1,10000) AS a;"); | |
+ | |
+# Ensure that checksums are disabled | |
+my $result = $node->safe_psql('postgres', | |
+ "SELECT setting FROM pg_catalog.pg_settings WHERE name = 'data_checksums';"); | |
+is($result, 'off', 'ensure checksums are disabled'); | |
+ | |
+# Create a barrier for checksumming to block on, in this case a pre-existing | |
+# temporary table which is kept open while processing is started. We can | |
+# accomplish this by setting up an interactive psql process which keeps the | |
+# temporary table created as we enable checksums in another psql process. | |
+my $in = ''; | |
+my $out = ''; | |
+my $timer = timer(5); | |
+ | |
+my $h = $node->interactive_psql('postgres', \$in, \$out, $timer); | |
+ | |
+$out = ''; | |
+$timer->start(5); | |
+ | |
+$in .= "CREATE TEMPORARY TABLE tt (a integer);\n"; | |
+pump $h until ($out =~ /CREATE TABLE/ || $timer->is_expired); | |
+ | |
+# In another session, make sure we can see the blocking temp table but start | |
+# processing anyways and check that we are blocked with a proper wait event. | |
+$result = $node->safe_psql('postgres', | |
+ "SELECT relpersistence FROM pg_catalog.pg_class WHERE relname = 'tt';"); | |
+is($result, 't', 'ensure we can see the temporary table'); | |
+ | |
+$node->safe_psql('postgres', "SELECT pg_enable_data_checksums();"); | |
+ | |
+$result = $node->poll_query_until('postgres', | |
+ "SELECT count(*) FROM pg_catalog.pg_class WHERE NOT relhaschecksums " . | |
+ "AND relkind IN ('r', 'i', 'S', 't', 'm');", | |
+ '1'); | |
+is($result, 1, 'ensure there is a single table left'); | |
+ | |
+$result = $node->safe_psql('postgres', | |
+ "SELECT setting FROM pg_catalog.pg_settings WHERE name = 'data_checksums';"); | |
+is($result, 'inprogress', "ensure checksums aren't enabled yet"); | |
+ | |
+$result = $node->safe_psql('postgres', | |
+ "SELECT wait_event FROM pg_stat_activity WHERE backend_type = 'datachecksumsworker worker';"); | |
+is($result, 'ChecksumEnableFinishCondition', 'test for correct wait event'); | |
+ | |
+$result = $node->safe_psql('postgres', | |
+ "SELECT count(*) FROM pg_catalog.pg_class WHERE NOT relhaschecksums " . | |
+ "AND relkind IN ('r', 'i', 'S', 't', 'm');"); | |
+is($result, '1', 'doublecheck that there is a single table left before restarting'); | |
+ | |
+$node->stop; | |
+$node->start; | |
+ | |
+$result = $node->safe_psql('postgres', | |
+ "SELECT setting FROM pg_catalog.pg_settings WHERE name = 'data_checksums';"); | |
+is($result, 'inprogress', "ensure checksums aren't enabled yet"); | |
+ | |
+$result = $node->safe_psql('postgres', | |
+ "SELECT count(*) FROM pg_catalog.pg_class WHERE NOT relhaschecksums " . | |
+ "AND relkind IN ('r', 'i', 'S', 't', 'm');"); | |
+is($result, '0', 'no temporary tables this time around'); | |
+ | |
+$node->safe_psql('postgres', "SELECT pg_enable_data_checksums();"); | |
+ | |
+$result = $node->poll_query_until('postgres', | |
+ "SELECT setting FROM pg_catalog.pg_settings WHERE name = 'data_checksums';", | |
+ 'on'); | |
+is($result, 1, 'ensure checksums are turned on'); | |
+ | |
+$result = $node->safe_psql('postgres', "SELECT count(*) FROM t"); | |
+is($result, '10000', 'ensure checksummed pages can be read back'); | |
+ | |
+done_testing(); | |
diff --git a/src/test/checksum/t/003_standby_checksum.pl b/src/test/checksum/t/003_standby_checksum.pl | |
new file mode 100644 | |
index 0000000000..b276027453 | |
--- /dev/null | |
+++ b/src/test/checksum/t/003_standby_checksum.pl | |
@@ -0,0 +1,96 @@ | |
+# Test suite for testing enabling data checksums in an online cluster with | |
+# streaming replication | |
+use strict; | |
+use warnings; | |
+use PostgresNode; | |
+use TestLib; | |
+use Test::More tests => 10; | |
+ | |
+# Initialize primary node | |
+my $node_primary = get_new_node('primary'); | |
+$node_primary->init(allows_streaming => 1); | |
+$node_primary->start; | |
+my $backup_name = 'my_backup'; | |
+ | |
+# Take backup | |
+$node_primary->backup($backup_name); | |
+ | |
+# Create streaming standby linking to primary | |
+my $node_standby_1 = get_new_node('standby_1'); | |
+$node_standby_1->init_from_backup($node_primary, $backup_name, | |
+ has_streaming => 1); | |
+$node_standby_1->start; | |
+ | |
+# Create some content on primary to have un-checksummed data in the cluster | |
+$node_primary->safe_psql('postgres', | |
+ "CREATE TABLE t AS SELECT generate_series(1,10000) AS a;"); | |
+ | |
+# Wait for standbys to catch up | |
+$node_primary->wait_for_catchup($node_standby_1, 'replay', | |
+ $node_primary->lsn('insert')); | |
+ | |
+# Check that checksums are turned off | |
+my $result = $node_primary->safe_psql('postgres', | |
+ "SELECT setting FROM pg_catalog.pg_settings WHERE name = 'data_checksums';"); | |
+is($result, "off", 'ensure checksums are turned off on primary'); | |
+ | |
+$result = $node_standby_1->safe_psql('postgres', | |
+ "SELECT setting FROM pg_catalog.pg_settings WHERE name = 'data_checksums';"); | |
+is($result, "off", 'ensure checksums are turned off on standby_1'); | |
+ | |
+# Enable checksums for the cluster | |
+$node_primary->safe_psql('postgres', "SELECT pg_enable_data_checksums();"); | |
+ | |
+# Ensure that the primary switches to inprogress | |
+$result = $node_primary->poll_query_until('postgres', | |
+ "SELECT setting FROM pg_catalog.pg_settings WHERE name = 'data_checksums';", | |
+ "inprogress"); | |
+is($result, 1, 'ensure checksums are in progress on primary'); | |
+ | |
+# Wait for checksum enable to be replayed | |
+$node_primary->wait_for_catchup($node_standby_1, 'replay'); | |
+ | |
+# Ensure that the standby has switched to inprogress or on | |
+# Normally it would be "inprogress", but it is theoretically possible for the primary | |
+# to complete the checksum enabling *and* have the standby replay that record before | |
+# we reach the check below. | |
+$result = $node_standby_1->safe_psql('postgres', | |
+ "SELECT setting FROM pg_catalog.pg_settings WHERE name = 'data_checksums';"); | |
+cmp_ok($result, '~~', ["inprogress", "on"], 'ensure checksums are on or in progress on standby_1'); | |
+ | |
+# Insert some more data which should be checksummed on INSERT | |
+$node_primary->safe_psql('postgres', | |
+ "INSERT INTO t VALUES (generate_series(1,10000));"); | |
+ | |
+# Wait for checksums enabled on the primary | |
+$result = $node_primary->poll_query_until('postgres', | |
+ "SELECT setting FROM pg_catalog.pg_settings WHERE name = 'data_checksums';", | |
+ 'on'); | |
+is($result, 1, 'ensure checksums are enabled on the primary'); | |
+ | |
+# Wait for checksums enabled on the standby | |
+$result = $node_standby_1->poll_query_until('postgres', | |
+ "SELECT setting FROM pg_catalog.pg_settings WHERE name = 'data_checksums';", | |
+ 'on'); | |
+is($result, 1, 'ensure checksums are enabled on the standby'); | |
+ | |
+$result = $node_primary->safe_psql('postgres', "SELECT count(a) FROM t"); | |
+is ($result, '20000', 'ensure we can safely read all data with checksums'); | |
+ | |
+# Disable checksums and ensure it's propagated to standby and that we can | |
+# still read all data | |
+$node_primary->safe_psql('postgres', "SELECT pg_disable_data_checksums();"); | |
+$result = $node_primary->safe_psql('postgres', | |
+ "SELECT setting FROM pg_catalog.pg_settings WHERE name = 'data_checksums';"); | |
+is($result, 'off', 'ensure data checksums are disabled on the primary'); | |
+ | |
+# Wait for checksum disable to be replayed | |
+$node_primary->wait_for_catchup($node_standby_1, 'replay'); | |
+ | |
+# Ensure that the standby has switched to off | |
+$result = $node_standby_1->safe_psql('postgres', | |
+ "SELECT setting FROM pg_catalog.pg_settings WHERE name = 'data_checksums';"); | |
+is($result, "off", 'ensure checksums are off on standby_1'); | |
+ | |
+$result = $node_primary->safe_psql('postgres', "SELECT count(a) FROM t"); | |
+is ($result, "20000", 'ensure we can safely read all data without checksums'); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment