pgsql-4137

Version:

8.3.1

Links:

bug report: http://archives.postgresql.org/pgsql-bugs/2008-05/msg00002.php

Patch: http://archives.postgresql.org/pgsql-bugs/2008-05/msg00004.php

Symptom(Failure):

Postgres standby server failed to start since it deleted the Write-Ahead Logging (WAL) file. Then at the start-up of the stand-by server, it failed to start with the following log messages:

could not open file "pg_xlog/0000000100000001000000D9" (log file 1,
segment 217): No such file or directory invalid checkpoint record could not
locate required checkpoint record
If you are not restoring from a backup, try removing the file
"/var/lib/pgsql/data/backup_label".
startup process (PID 19201) was terminated by signal 6: Aborted
aborting startup due to startup process failure

How it is diagnosed:

Source analysis.

Root Cause:

standby server deleted the WAL file too early.

src/backend/access/transam/xlog.c

/* Open WAL log file. */

static int XLogFileOpen(uint32 log, uint32 seg) {

   2277     char        path[MAXPGPATH];

   2278     int         fd;

   2279

   2280     XLogFilePath(path, ThisTimeLineID, log, seg);

   2281

   2282     fd = open (path, O_RDWR | PG_BINARY | XLOG_SYNC_BIT,

   2283                        S_IRUSR | S_IWUSR);

   2284     if (fd < 0)

   2285         ereport(PANIC,

   2286                 (errcode_for_file_access(),

   2287            errmsg("could not open file \"%s\" (log file %u, segment %u): %m",

   2288                   path, log, seg)));

   2289

   2290     return fd;

   2291 }

 /*

  * This must be called ONCE during postmaster or standalone-backend startup

  */

void StartupXLOG(void) {

       … ...

   4841         record = ReadCheckpointRecord(checkPointLoc, 0);

   4842         if (record != NULL)

   4843         {

   4844             ereport(DEBUG1,

   4845                     (errmsg("checkpoint record is at %X/%X",

   4846                             checkPointLoc.xlogid, checkPointLoc.xrecoff)));

   4847             InRecovery = true;  /* force recovery even if SHUTDOWNED */

   4848         }

   4849         else

   4850         {

   4851             ereport(PANIC,

   4852                     (errmsg("could not locate required checkpoint record"),

   4853                      errhint("If you are not restoring from a backup, try removing the file \"%s/backup_label\".", DataDir)));

   4854         }

}

/*

 * Subroutine to try to fetch and validate a prior checkpoint record.

*

* whichChkpt identifies the checkpoint (merely for reporting purposes).

* 1 for "primary", 2 for "secondary", 0 for "other" (backup_label)

 */

static XLogRecord *ReadCheckpointRecord(XLogRecPtr RecPtr, int whichChkpt){

   5330     XLogRecord *record;

   5331

 

   5352     record = ReadRecord(&RecPtr, LOG);

   5353

   5354     if (record == NULL)

   5355     {

   5356         switch (whichChkpt)

   5357         {

   5358             case 1:

   5359                 ereport(LOG,

   5360                         (errmsg("invalid primary checkpoint record")));

   5361                 break;

   5362             case 2:

   5363                 ereport(LOG,

   5364                         (errmsg("invalid secondary checkpoint record")));

   5365                 break;

   5366             default:

   5367                 ereport(LOG,

   5368                         (errmsg("invalid checkpoint record")));

   5369                 break;

   5370         }

   5371         return NULL;

   5372     }

Is there any log message?:

Yes.

How can we automatically insert log message?

System call return value (open).