pgsql-3969

Version:

8.2.6

Bug Link:

http://archives.postgresql.org/pgsql-bugs/2008-02/msg00170.php

Patch Link:

http://archives.postgresql.org/pgsql-committers/2008-02/msg00244.php

http://anoncvs.postgresql.org/cvsweb.cgi/pgsql/src/bin/pg_ctl/pg_ctl.c?r1=1.92&r2=1.93;f=h

Symptom:

pg_ctl cannot detect server startup with some options

Failure type:

Hang

Is there any log message?

No

How it is diagnosed:

We reproduced the failure.

How to reproduce:

pg_ctl -w -o "-p 5432 -c max_connections=100" start

waits for server startup forever and timeout,

but server has successfully started actually.

However the following work as expected.

* pg_ctl -w -o "-p 5432" start

* pg_ctl -w -o "-c max_connections=100" start

* pg_ctl -w -o "-c max_connections=100 -p 5432" start

$ ./bin/pg_ctl -D data -w -o "-p 5432" start

LOG:  database system was interrupted at 2011-02-20 17:01:23 PST

LOG:  checkpoint record is at 0/43DE38

LOG:  redo record is at 0/43DE38; undo record is at 0/0; shutdown FALSE

LOG:  next transaction ID: 0/602; next OID: 24576

LOG:  next MultiXactId: 1; next MultiXactOffset: 0

LOG:  database system was not properly shut down; automatic recovery in progress

LOG:  record with zero length at 0/43DE80

LOG:  redo is not required

LOG:  database system is ready

waiting for server to start... done

server started

$ ./bin/pg_ctl -D data -w -o "-p 5432 -c max_connections=100" start

LOG:  database system was shut down at 2011-02-20 17:35:37 PST

LOG:  checkpoint record is at 0/43DEC8

LOG:  redo record is at 0/43DEC8; undo record is at 0/0; shutdown TRUE

LOG:  next transaction ID: 0/603; next OID: 24576

LOG:  next MultiXactId: 1; next MultiXactOffset: 0

LOG:  database system is ready

waiting for server to start....................

--

Background:

pg_ctl -- start, stop, or restart a PostgreSQL server

-w

Wait for the start or shutdown to complete. Times out after 60 seconds. This is the default for shutdowns.

-o options

Specifies options to be passed directly to the postgres command.

Root Cause:

Because of the definition of WHITESPACE characters in pg_ctl.c, the port str is extacted wrong from command line.

WHITESPACE is defined as folows:

#define WHITESPACE "\f\n\r\t\v"

In fact, WHITESPACE does not contain whilespace (0x20)

test_postmaster_connection(bool do_checkpoint)

{

        PGconn           *conn;

        bool                success = false;

        int                        i;

        char                portstr[32];

        char           *p;

        char                connstr[128]; /* Should be way more than enough! */

        *portstr = '\0';

        /* post_opts = "-p 5432 -c max_connections=100"*/

+        char           *q;

        for (p = post_opts; *p;)

        {

                /* advance past whitespace/quoting */

                while (isspace((unsigned char) *p) || *p == '\'' || *p == '"')

                        p++;

                if (strncmp(p, "-p", strlen("-p")) == 0)

                {

                        p += strlen("-p");

                        /* advance past whitespace/quoting */

                        while (isspace((unsigned char) *p) || *p == '\'' || *p == '"')

                                p++;

                        StrNCpy(portstr, p, Min(strcspn(p, "\"'" WHITESPACE) + 1,

                                                                        sizeof(portstr)));

                        //portstr = 5432 -c max_connections=100\000\373!\000..

+                        /* find end of value (not including any ending quote!) */
+                         q = p;
+                        while (*q &&
+                                    !(isspace((unsigned char) *q) || *q == '\'' || *q == '"'))
+                                q++;
+                        /* and save the argument value */
+                         strlcpy(portstr, p, Min((q - p) + 1, sizeof(portstr)));
+                         p = q;

                        /* keep looking, maybe there is another -p */

                }

                /* Advance to next whitespace */

                while (*p && !isspace((unsigned char) *p))

                        p++;

        }

        /* config file */

        if (!*portstr)

                ...

        /* environment */

        if (!*portstr && getenv("PGPORT") != NULL)

                StrNCpy(portstr, getenv("PGPORT"), sizeof(portstr));

        /* default */

        if (!*portstr)

                snprintf(portstr, sizeof(portstr), "%d", DEF_PGPORT);

        snprintf(connstr, sizeof(connstr), "dbname=postgres port=%s connect_timeout=5", portstr);

"connstr: dbname=postgres port=5432 -c max_connections=100 connect_timeout=5\000\267P\362\377\277fQ%\000\036\000\000\000\340D4\000p\362\377\277J[%\000\340D4\000\000\300ȷ\036\000\000\000\210\362\377\277\364?4\000\340D4\000\210\362\377\277\240\236$\000\340D4"!!

        for (i = 0; i < wait_seconds; i++)//wait_seconds = 60;

        {

                if ((conn = PQconnectdb(connstr)) != NULL &&

                        (PQstatus(conn) == CONNECTION_OK ||

                         (strcmp(PQerrorMessage(conn),

                                         PQnoPasswordSupplied) == 0))

)

                {

                        PQfinish(conn);

                        success = true;

                        break;

                }

                else

                {

                        PQfinish(conn);

                        print_msg(".");

                        pg_usleep(1000000); /* 1 sec */

                }

        }

        //so actually it’s not hang, but to the user, it hangs there...

        return success;

}

 

Another simple patch is:

-#define WHITESPACE "\f\n\r\t\v" /* as defined by isspace() */

+#define WHITESPACE " \f\n\r\t\v" /* as defined by isspace() */

Pattern to handle:

input check