httpd-48025

Version:

2.3.head (see source at

http://svn.apache.org/viewvc/httpd/mod_fcgid/trunk/modules/fcgid/fcgid_proc_unix.c?view=markup&pathrev=827834

)

Bug link:

https://issues.apache.org/bugzilla/show_bug.cgi?id=48025

How it is diagnosed (reproduced or source analysis)?

Source analysis (we did not reproduce this failure).

Symptom:

httpd mysteriously closed the connection to the remote fast cgi server when fast cgi server is still alive.

[Mon Oct 19 16:30:10 2009] [info] [client 127.0.0.1] (11)Resource temporarily
unavailable: mod_fcgid: error writing data, FastCGI server closed connection

Root cause:

fastcgi forgot to handle "EAGAIN" signal when writing to a non-blocking cgi pipe.-- wrong result

655 static apr_status_t socket_writev(...) {

   …

667  /* Try nonblock write */

668  do {

669       if ((retcode = writev(unix_socket, vec, nvec)) > 0) {

670            *writecnt = retcode;

671            return APR_SUCCESS;

672        }

673   } while (retcode == -1 && APR_STATUS_IS_EINTR(errno));

674   rv = errno;

675  

676   if (APR_STATUS_IS_EAGAIN(rv)) {

677       /* poll() */

678       pollfds[0].fd = unix_socket;

679       pollfds[0].events = POLLOUT;

680       do {

681             retcode = poll(pollfds, 1, ipc_handle->communation_timeout * 1000);

682        } while (retcode < 0 && APR_STATUS_IS_EINTR(errno));

683

684       if (retcode < 0) {

685              rv = errno;

686        }

687       else if (retcode == 0) {

688              rv = APR_TIMEUP;

689       }

690       else {

691              /* Write again */

692             do {

693                   if ((retcode = writev(unix_socket, vec, nvec)) > 0) {

694                         *writecnt = retcode;

695                         return APR_SUCCESS;  

696                    }

697              } while (retcode == -1 && APR_STATUS_IS_EINTR(errno));

698              rv = errno;

699          }

700    } // if (APR_STATUS_IS_EAGAIN

+    if (APR_STATUS_IS_EAGAIN(rv)) {
+        /* socket is writable, but we can't write the entire buffer; try to write a
+         * smaller amount, and if even that fails then sleep
+         */
+        size_t to_write = vec[0].iov_len;
+        int slept = 0;
+        const apr_interval_time_t sleep_time = APR_USEC_PER_SEC / 4;
+        const int max_sleeps = 8;
+
+        do {
+            if ((retcode = write(unix_socket, vec[0].iov_base, to_write)) > 0) {
+                ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, ipc_handle->request,
+                              "wrote %d byte(s) and slept %d time(s) after EAGAIN",
+                              retcode, slept);
+                *writecnt = retcode;
+                return APR_SUCCESS;
+            }
+            if (APR_STATUS_IS_EAGAIN(errno)) {
+                if (to_write == 1) {
+                    apr_sleep(sleep_time);
+                    ++slept;
+                }
+                else {
+                    to_write /= 2;
+                }
+            }
+        } while ((APR_STATUS_IS_EINTR(errno) || APR_STATUS_IS_EAGAIN(errno))
+                 && slept < max_sleeps);
+        rv = errno;
+    }
+

     /* We reach here because the above writev returned <= 0 value -- error value.  */

     ap_log_rerror(APLOG_MARK, APLOG_INFO, rv, ipc_handle->request, "mod_fcgid: error writing data to FastCGI server");

}

Can Errlog automatically anticipate this error msg?

Yes. The error message was printed when the ‘writev’ system call failed (returned negative). It belongs to the pattern: log at failed library/system call.