Last active
August 29, 2015 14:18
-
-
Save volkanunsal/bb58fa52077ae3288dfd to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Writing recovery.conf file | |
Creating the staging directory to hold the master's log files | |
DEBUG: 00000: invoking IpcMemoryCreate(size=14647296) | |
LOCATION: CreateSharedMemoryAndSemaphores, ipci.c:142 | |
DEBUG: 00000: SlruScanDirectory invoking callback on pg_notify/0000 | |
LOCATION: SlruScanDirectory, slru.c:1307 | |
DEBUG: 00000: removing file "pg_notify/0000" | |
LOCATION: SlruDeleteSegment, slru.c:1220 | |
DEBUG: 00000: InitPostgres | |
LOCATION: InitPostgres, postinit.c:506 | |
DEBUG: 00000: my backend ID is 1 | |
LOCATION: SharedInvalBackendInit, sinvaladt.c:330 | |
LOG: 00000: database system was interrupted; last known up at 2015-04-10 21:01:22 GMT | |
LOCATION: StartupXLOG, xlog.c:4935 | |
DEBUG: 00000: standby_mode = 'on' | |
LOCATION: readRecoveryCommandFile, xlog.c:4320 | |
DEBUG: 00000: primary_conninfo = 'host=104.131.182.80' | |
LOCATION: readRecoveryCommandFile, xlog.c:4327 | |
LOG: 00000: entering standby mode | |
LOCATION: StartupXLOG, xlog.c:4988 | |
DEBUG: 00000: checkpoint record is at 0/12000060 | |
LOCATION: StartupXLOG, xlog.c:5046 | |
DEBUG: 00000: redo record is at 0/12000028; shutdown FALSE | |
LOCATION: StartupXLOG, xlog.c:5190 | |
DEBUG: 00000: next transaction ID: 0/695; next OID: 12914 | |
LOCATION: StartupXLOG, xlog.c:5194 | |
DEBUG: 00000: next MultiXactId: 1; next MultiXactOffset: 0 | |
LOCATION: StartupXLOG, xlog.c:5197 | |
DEBUG: 00000: oldest unfrozen transaction ID: 674, in database 1 | |
LOCATION: StartupXLOG, xlog.c:5200 | |
DEBUG: 00000: oldest MultiXactId: 1, in database 1 | |
LOCATION: StartupXLOG, xlog.c:5203 | |
DEBUG: 00000: transaction ID wrap limit is 2147484321, limited by database with OID 1 | |
LOCATION: SetTransactionIdLimit, varsup.c:337 | |
DEBUG: 00000: MultiXactId wrap limit is 2147483648, limited by database with OID 1 | |
LOCATION: SetMultiXactIdLimit, multixact.c:2167 | |
DEBUG: 00000: resetting unlogged relations: cleanup 1 init 0 | |
LOCATION: ResetUnloggedRelations, reinit.c:60 | |
DEBUG: 00000: initializing for hot standby | |
LOCATION: StartupXLOG, xlog.c:5420 | |
DEBUG: 00000: my backend ID is 2 | |
LOCATION: SharedInvalBackendInit, sinvaladt.c:330 | |
LOG: 00000: redo starts at 0/12000028 | |
LOCATION: StartupXLOG, xlog.c:5555 | |
DEBUG: 00000: prune KnownAssignedXids to 695 | |
CONTEXT: xlog redo running xacts: nextXid 695 latestCompletedXid 694 oldestRunningXid 695 | |
LOCATION: KnownAssignedXidsRemovePreceding, procarray.c:3233 | |
DEBUG: 00000: 0 KnownAssignedXids (num=0 tail=0 head=0) | |
CONTEXT: xlog redo running xacts: nextXid 695 latestCompletedXid 694 oldestRunningXid 695 | |
LOCATION: KnownAssignedXidsDisplay, procarray.c:3435 | |
DEBUG: 00000: recovery snapshots are now enabled | |
CONTEXT: xlog redo running xacts: nextXid 695 latestCompletedXid 694 oldestRunningXid 695 | |
LOCATION: ProcArrayApplyRecoveryInfo, procarray.c:744 | |
DEBUG: 00000: end of backup reached | |
CONTEXT: xlog redo backup end: 0/12000028 | |
LOCATION: xlog_redo, xlog.c:8241 | |
LOG: 00000: consistent recovery state reached at 0/120000F0 | |
LOCATION: CheckRecoveryConsistency, xlog.c:6231 | |
/// The following repreats forever | |
DEBUG: 58P01: could not open file "pg_xlog/000000010000000000000013": No such file or directory | |
LOCATION: XLogFileReadAnyTLI, xlog.c:2768 | |
DEBUG: 00000: switched WAL source from archive to stream after failure | |
LOCATION: WaitForWALToBecomeAvailable, xlog.c:9803 | |
DEBUG: 00000: switched WAL source from stream to archive after failure |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Open the WAL segment containing WAL position 'RecPtr'. | |
* | |
* The segment can be fetched via restore_command, or via walreceiver having | |
* streamed the record, or it can already be present in pg_xlog. Checking | |
* pg_xlog is mainly for crash recovery, but it will be polled in standby mode | |
* too, in case someone copies a new segment directly to pg_xlog. That is not | |
* documented or recommended, though. | |
* | |
* If 'fetching_ckpt' is true, we're fetching a checkpoint record, and should | |
* prepare to read WAL starting from RedoStartLSN after this. | |
* | |
* 'RecPtr' might not point to the beginning of the record we're interested | |
* in, it might also point to the page or segment header. In that case, | |
* 'tliRecPtr' is the position of the WAL record we're interested in. It is | |
* used to decide which timeline to stream the requested WAL from. | |
* | |
* If the the record is not immediately available, the function returns false | |
* if we're not in standby mode. In standby mode, waits for it to become | |
* available. | |
* | |
* When the requested record becomes available, the function opens the file | |
* containing it (if not open already), and returns true. When end of standby | |
* mode is triggered by the user, and there is no more WAL available, returns | |
* false. | |
*/ | |
static bool | |
WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess, | |
bool fetching_ckpt, XLogRecPtr tliRecPtr) | |
{ | |
static pg_time_t last_fail_time = 0; | |
pg_time_t now; | |
/*------- | |
* Standby mode is implemented by a state machine: | |
* | |
* 1. Read from either archive or pg_xlog (XLOG_FROM_ARCHIVE), or just | |
* pg_xlog (XLOG_FROM_XLOG) | |
* 2. Check trigger file | |
* 3. Read from primary server via walreceiver (XLOG_FROM_STREAM) | |
* 4. Rescan timelines | |
* 5. Sleep 5 seconds, and loop back to 1. | |
* | |
* Failure to read from the current source advances the state machine to | |
* the next state. | |
* | |
* 'currentSource' indicates the current state. There are no currentSource | |
* values for "check trigger", "rescan timelines", and "sleep" states, | |
* those actions are taken when reading from the previous source fails, as | |
* part of advancing to the next state. | |
*------- | |
*/ | |
if (!InArchiveRecovery) | |
currentSource = XLOG_FROM_PG_XLOG; | |
else if (currentSource == 0) | |
currentSource = XLOG_FROM_ARCHIVE; | |
for (;;) | |
{ | |
int oldSource = currentSource; | |
/* | |
* First check if we failed to read from the current source, and | |
* advance the state machine if so. The failure to read might've | |
* happened outside this function, e.g when a CRC check fails on a | |
* record, or within this loop. | |
*/ | |
if (lastSourceFailed) | |
{ | |
switch (currentSource) | |
{ | |
case XLOG_FROM_ARCHIVE: | |
case XLOG_FROM_PG_XLOG: | |
/* | |
* Check to see if the trigger file exists. Note that we | |
* do this only after failure, so when you create the | |
* trigger file, we still finish replaying as much as we | |
* can from archive and pg_xlog before failover. | |
*/ | |
if (StandbyMode && CheckForStandbyTrigger()) | |
{ | |
ShutdownWalRcv(); | |
return false; | |
} | |
/* | |
* Not in standby mode, and we've now tried the archive | |
* and pg_xlog. | |
*/ | |
if (!StandbyMode) | |
return false; | |
/* | |
* If primary_conninfo is set, launch walreceiver to try | |
* to stream the missing WAL. | |
* | |
* If fetching_ckpt is TRUE, RecPtr points to the initial | |
* checkpoint location. In that case, we use RedoStartLSN | |
* as the streaming start position instead of RecPtr, so | |
* that when we later jump backwards to start redo at | |
* RedoStartLSN, we will have the logs streamed already. | |
*/ | |
if (PrimaryConnInfo) | |
{ | |
XLogRecPtr ptr; | |
TimeLineID tli; | |
if (fetching_ckpt) | |
{ | |
ptr = RedoStartLSN; | |
tli = ControlFile->checkPointCopy.ThisTimeLineID; | |
} | |
else | |
{ | |
ptr = tliRecPtr; | |
tli = tliOfPointInHistory(tliRecPtr, expectedTLEs); | |
if (curFileTLI > 0 && tli < curFileTLI) | |
elog(ERROR, "according to history file, WAL location %X/%X belongs to timeline %u, but previous recovered WAL file came from timeline %u", | |
(uint32) (ptr >> 32), (uint32) ptr, | |
tli, curFileTLI); | |
} | |
curFileTLI = tli; | |
RequestXLogStreaming(tli, ptr, PrimaryConnInfo); | |
receivedUpto = 0; | |
} | |
/* | |
* Move to XLOG_FROM_STREAM state in either case. We'll | |
* get immediate failure if we didn't launch walreceiver, | |
* and move on to the next state. | |
*/ | |
currentSource = XLOG_FROM_STREAM; | |
break; | |
case XLOG_FROM_STREAM: | |
/* | |
* Failure while streaming. Most likely, we got here | |
* because streaming replication was terminated, or | |
* promotion was triggered. But we also get here if we | |
* find an invalid record in the WAL streamed from master, | |
* in which case something is seriously wrong. There's | |
* little chance that the problem will just go away, but | |
* PANIC is not good for availability either, especially | |
* in hot standby mode. So, we treat that the same as | |
* disconnection, and retry from archive/pg_xlog again. | |
* The WAL in the archive should be identical to what was | |
* streamed, so it's unlikely that it helps, but one can | |
* hope... | |
*/ | |
/* | |
* Before we leave XLOG_FROM_STREAM state, make sure that | |
* walreceiver is not active, so that it won't overwrite | |
* WAL that we restore from archive. | |
*/ | |
if (WalRcvStreaming()) | |
ShutdownWalRcv(); | |
/* | |
* Before we sleep, re-scan for possible new timelines if | |
* we were requested to recover to the latest timeline. | |
*/ | |
if (recoveryTargetIsLatest) | |
{ | |
if (rescanLatestTimeLine()) | |
{ | |
currentSource = XLOG_FROM_ARCHIVE; | |
break; | |
} | |
} | |
/* | |
* XLOG_FROM_STREAM is the last state in our state | |
* machine, so we've exhausted all the options for | |
* obtaining the requested WAL. We're going to loop back | |
* and retry from the archive, but if it hasn't been long | |
* since last attempt, sleep 5 seconds to avoid | |
* busy-waiting. | |
*/ | |
now = (pg_time_t) time(NULL); | |
if ((now - last_fail_time) < 5) | |
{ | |
pg_usleep(1000000L * (5 - (now - last_fail_time))); | |
now = (pg_time_t) time(NULL); | |
} | |
last_fail_time = now; | |
currentSource = XLOG_FROM_ARCHIVE; | |
break; | |
default: | |
elog(ERROR, "unexpected WAL source %d", currentSource); | |
} | |
} | |
else if (currentSource == XLOG_FROM_PG_XLOG) | |
{ | |
/* | |
* We just successfully read a file in pg_xlog. We prefer files in | |
* the archive over ones in pg_xlog, so try the next file again | |
* from the archive first. | |
*/ | |
if (InArchiveRecovery) | |
currentSource = XLOG_FROM_ARCHIVE; | |
} | |
if (currentSource != oldSource) | |
elog(DEBUG2, "switched WAL source from %s to %s after %s", | |
xlogSourceNames[oldSource], xlogSourceNames[currentSource], | |
lastSourceFailed ? "failure" : "success"); | |
/* | |
* We've now handled possible failure. Try to read from the chosen | |
* source. | |
*/ | |
lastSourceFailed = false; | |
switch (currentSource) | |
{ | |
case XLOG_FROM_ARCHIVE: | |
case XLOG_FROM_PG_XLOG: | |
/* Close any old file we might have open. */ | |
if (readFile >= 0) | |
{ | |
close(readFile); | |
readFile = -1; | |
} | |
/* Reset curFileTLI if random fetch. */ | |
if (randAccess) | |
curFileTLI = 0; | |
/* | |
* Try to restore the file from archive, or read an existing | |
* file from pg_xlog. | |
*/ | |
readFile = XLogFileReadAnyTLI(readSegNo, DEBUG2, | |
currentSource == XLOG_FROM_ARCHIVE ? XLOG_FROM_ANY : | |
currentSource); | |
if (readFile >= 0) | |
return true; /* success! */ | |
/* | |
* Nope, not found in archive or pg_xlog. | |
*/ | |
lastSourceFailed = true; | |
break; | |
case XLOG_FROM_STREAM: | |
{ | |
bool havedata; | |
/* | |
* Check if WAL receiver is still active. | |
*/ | |
if (!WalRcvStreaming()) | |
{ | |
lastSourceFailed = true; | |
break; | |
} | |
/* | |
* Walreceiver is active, so see if new data has arrived. | |
* | |
* We only advance XLogReceiptTime when we obtain fresh | |
* WAL from walreceiver and observe that we had already | |
* processed everything before the most recent "chunk" | |
* that it flushed to disk. In steady state where we are | |
* keeping up with the incoming data, XLogReceiptTime will | |
* be updated on each cycle. When we are behind, | |
* XLogReceiptTime will not advance, so the grace time | |
* allotted to conflicting queries will decrease. | |
*/ | |
if (RecPtr < receivedUpto) | |
havedata = true; | |
else | |
{ | |
XLogRecPtr latestChunkStart; | |
receivedUpto = GetWalRcvWriteRecPtr(&latestChunkStart, &receiveTLI); | |
if (RecPtr < receivedUpto && receiveTLI == curFileTLI) | |
{ | |
havedata = true; | |
if (latestChunkStart <= RecPtr) | |
{ | |
XLogReceiptTime = GetCurrentTimestamp(); | |
SetCurrentChunkStartTime(XLogReceiptTime); | |
} | |
} | |
else | |
havedata = false; | |
} | |
if (havedata) | |
{ | |
/* | |
* Great, streamed far enough. Open the file if it's | |
* not open already. Also read the timeline history | |
* file if we haven't initialized timeline history | |
* yet; it should be streamed over and present in | |
* pg_xlog by now. Use XLOG_FROM_STREAM so that | |
* source info is set correctly and XLogReceiptTime | |
* isn't changed. | |
*/ | |
if (readFile < 0) | |
{ | |
if (!expectedTLEs) | |
expectedTLEs = readTimeLineHistory(receiveTLI); | |
readFile = XLogFileRead(readSegNo, PANIC, | |
receiveTLI, | |
XLOG_FROM_STREAM, false); | |
Assert(readFile >= 0); | |
} | |
else | |
{ | |
/* just make sure source info is correct... */ | |
readSource = XLOG_FROM_STREAM; | |
XLogReceiptSource = XLOG_FROM_STREAM; | |
return true; | |
} | |
break; | |
} | |
/* | |
* Data not here yet. Check for trigger, then wait for | |
* walreceiver to wake us up when new WAL arrives. | |
*/ | |
if (CheckForStandbyTrigger()) | |
{ | |
/* | |
* Note that we don't "return false" immediately here. | |
* After being triggered, we still want to replay all | |
* the WAL that was already streamed. It's in pg_xlog | |
* now, so we just treat this as a failure, and the | |
* state machine will move on to replay the streamed | |
* WAL from pg_xlog, and then recheck the trigger and | |
* exit replay. | |
*/ | |
lastSourceFailed = true; | |
break; | |
} | |
/* | |
* Wait for more WAL to arrive. Time out after 5 seconds, | |
* like when polling the archive, to react to a trigger | |
* file promptly. | |
*/ | |
WaitLatch(&XLogCtl->recoveryWakeupLatch, | |
WL_LATCH_SET | WL_TIMEOUT, | |
5000L); | |
ResetLatch(&XLogCtl->recoveryWakeupLatch); | |
break; | |
} | |
default: | |
elog(ERROR, "unexpected WAL source %d", currentSource); | |
} | |
/* | |
* This possibly-long loop needs to handle interrupts of startup | |
* process. | |
*/ | |
HandleStartupProcInterrupts(); | |
} | |
return false; /* not reached */ | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment