svn-3764

Version:

1.7.0

Bug Link:

http://subversion.tigris.org/issues/show_bug.cgi?id=3764

Patch Link:

source code patch: http://svn.apache.org/viewvc?view=revision&revision=

reproduction: http://subversion.tigris.org/nonav/issues/showattachment.cgi/1134/

Symptom:

When ‘svn patch’ is executed with the patch of many hunks (a chunk of patch), then “svn: Can’t open file ‘test.diff’: Too many open files” is printed.

Run: ‘svn patch patch.diff’

 

*Result: Error happens. The message is following:

 

subversion/svn/patch-cmd.c:92: (apr_err=24)

… ...

svn: Can't open file '/home/yyzhou/mmlee/LOG-project/svn/3764/prev/client/patch.diff': Too many open files

subversion/libsvn_wc/lock.c:1637: (apr_err=200030)

… ...

svn: unable to open database file

subversion/libsvn_subr/sqlite.c:537: (apr_err=200030)

svn: unable to open database file

How it is diagnosed:

reproduced

 

File.txt content:

X

X

 

Patch.diff content:

Index: file.txt

===================================================================

--- file.txt        (revision 2)

+++ file.txt        (working copy)

@@ -3,6 +3,12 @@

 X

 X

 X

+#include <stdio.h>

+int main(void) {

+           int i;

+           for ( i =0; i < n; i ++ )

+                   sum += i;

+}

 X

 X

 X

@@ -11,6 +17,12 @@

 X

 X

 X

+#include <stdio.h>

+int main(void) {

+           int i;

+           for ( i =0; i < n; i ++ )

+                   sum += i;

+}

 X

 X

 X

 

 

ð  #hunks are 500

 

Run: ‘svn patch patch.diff’

 

*Result: Error happens. The message is following:

 

subversion/svn/patch-cmd.c:92: (apr_err=24)

subversion/libsvn_wc/lock.c:1637: (apr_err=24)

subversion/libsvn_client/patch.c:2667: (apr_err=24)

subversion/libsvn_client/patch.c:2667: (apr_err=24)

subversion/libsvn_diff/parse-diff.c:1291: (apr_err=24)

subversion/libsvn_diff/parse-diff.c:744: (apr_err=24)

subversion/libsvn_subr/io.c:2793: (apr_err=24)

svn: Can't open file '/home/yyzhou/mmlee/LOG-project/svn/3764/prev/client/patch.diff': Too many open files

subversion/libsvn_wc/lock.c:1637: (apr_err=200030)

subversion/libsvn_wc/wc_db.c:8327: (apr_err=200030)

subversion/libsvn_subr/sqlite.c:192: (apr_err=200030)

subversion/libsvn_subr/sqlite.c:176: (apr_err=200030)

subversion/libsvn_subr/sqlite.c:212: (apr_err=200030)

svn: unable to open database file

subversion/libsvn_subr/sqlite.c:537: (apr_err=200030)

svn: unable to open database file

Root Cause:

Brief:

‘svn patch’ opens the patch file too many times.

 

‘svn patch’ opens the patch file three times for each hunk while the limit of file descriptor is 1024. If we have more than 341 hunks, the error happens.

 

The patch is to make only one single file descriptor per patch file. Instead of making file handle for each hunk, just create one handle and perform ‘seek’ within the patch file to execute all the hunks.

Detail:

Error message location:

 

-        Call stack:

 

#0  svn_io_file_open (fname="/home/yyzhou/mmlee/LOG-project/svn/3764/prev/client/patch.diff", flag=129, perm=4095, ...)

              at subversion/libsvn_subr/io.c:2792

#1  parse_next_hunk (...)

              at subversion/libsvn_diff/parse-diff.c:729

#2  svn_diff_parse_next_patch (...)

              at subversion/libsvn_diff/parse-diff.c:1288

#3  apply_patches (...)

              at subversion/libsvn_client/patch.c:2665

#4  svn_wc__call_with_write_lock (local_abspath="/home/yyzhou/mmlee/LOG-project/svn/3764/prev/client", ...)

              at subversion/libsvn_wc/lock.c:1655

#5  svn_client_patch (patch_abspath= "/home/yyzhou/mmlee/LOG-project/svn/3764/prev/client/patch.diff",

                          local_abspath= "/home/yyzhou/mmlee/LOG-project/svn/3764/prev/client", ...)

              at subversion/libsvn_client/patch.c:2761

#6  svn_cl__patch (...)

              at subversion/svn/patch-cmd.c:88

#7  main (argc, argv) at subversion/svn/main.c:2373

 

 

/* ‘svn_io_file_open’ calls ‘file_open’ to open the file and consequently calls ‘apr_file_open’ in APR library. ‘apr_file_open’ calls ‘open’ system call */

 

svn_error_t *

svn_io_file_open(apr_file_t **new_file, const char *fname,

                     apr_int32_t flag, apr_fileperms_t perm,

                     apr_pool_t *pool)

{

  const char *fname_apr;

  apr_status_t status;

 

  SVN_ERR(cstring_from_utf8(&fname_apr, fname, pool));

  status = file_open(new_file, fname_apr, flag | APR_BINARY, perm, TRUE,

                         pool);

 

  if (status)

        return svn_error_wrap_apr(status, _("Can't open file '%s'"),

                              svn_dirent_local_style(fname, pool));

  else

        return SVN_NO_ERROR;

}

 

/* ‘file_open’ returns the error code of APR library. To get the appropriate error message, it calls ‘svn_error_wrap_apr’. with this function, we get the message “Too many open files”. This message and ‘Can’t open file’ are concatenated . */

static apr_status_t

file_open(…)

{

  apr_status_t status = apr_file_open(f, fname_apr, flag, perm, pool);

  ...

  return status;

}

 

APR_DECLARE(apr_status_t) apr_file_open(…)

{

...

        else {

            fd = open(fname, oflags, apr_unix_perms2mode(perm));

        }

        if (fd < 0) {

           return errno;

        } /* returns error */

...

}

 

/* in this function, for each hunk, three ‘svn_io_file_open’ is called so that it runs out of file descriptors */

 

static svn_error_t *

parse_next_hunk(… … )

{

 

...

 

  if (hunk_seen && start < end)

        {

         …

          /* Create a stream which returns the hunk text itself. */

          SVN_ERR(svn_io_file_open(&f, patch->path, flags, APR_OS_DEFAULT,

                               result_pool));

         …

          /* Create a stream which returns the original hunk text. */

          SVN_ERR(svn_io_file_open(&f, patch->path, flags, APR_OS_DEFAULT,

                               result_pool));

         …

          /* Create a stream which returns the modified hunk text. */

          SVN_ERR(svn_io_file_open(&f, patch->path, flags, APR_OS_DEFAULT,

                               result_pool));

         …

 

....

 

  return SVN_NO_ERROR;

}

 

/* ‘svn_diff_parse_next_patch’ function calls ‘parse_next_hunk’ repeatedly to handle all the hunks in the patch file */

svn_error_t *

svn_diff_parse_next_patch(…)

{

  ...

 

          do

            {

              …

              SVN_ERR(parse_next_hunk(&hunk, &is_property, &prop_name,

                                  &prop_operation, *patch, stream,

                                  ignore_whitespace,

                                      result_pool, iterpool));

               …

            }

          while (hunk);

  ...

}

 

/* ‘func’ is a function pointer of ‘apply_patches’. This function returns the error message ‘svn: Can’t open file `test.diff`: Too many open files’, and it is stored to ‘err1’ variable. Next, we call ‘svn_wc__release_write_lock’ and ‘err2’ has the error message of ‘svn: unable to open database file’. In ‘svn_error_compose_create’, the error message of both err1 and err2 are concatenated.  */

svn_error_t *

svn_wc__call_with_write_lock(…)

{

  …

  err1 = svn_error_return(func(baton, result_pool, scratch_pool));

  err2 = svn_wc__release_write_lock(wc_ctx, lock_root_abspath, scratch_pool);

  return svn_error_compose_create(err1, err2);

}

 

The patch:

To manage only one file descriptor for the patch file, they don’t use ‘stream’, but ‘svn_io_*’ api with the information of current ranges.

 

--- subversion/trunk/subversion/libsvn_diff/parse-diff.c        2011/01/08 02:22:22              1056600

+++ subversion/trunk/subversion/libsvn_diff/parse-diff.c 2011/01/08 02:26:03              1056601

@@ -61,28 +74,28 @@ struct svn_diff_hunk_t {

   svn_linenum_t trailing_context;

 };

 

 

/* they need to store the current location to seek the appropriate patch hunk*/

-svn_error_t *

-svn_diff_hunk_reset_diff_text(const svn_diff_hunk_t *hunk)

+void

+svn_diff_hunk_reset_diff_text(svn_diff_hunk_t *hunk)

 {

-  return svn_error_return(svn_stream_reset(hunk->diff_text));

+  hunk->diff_text_range.current = hunk->diff_text_range.start;

 }

 

-svn_error_t *

-svn_diff_hunk_reset_original_text(const svn_diff_hunk_t *hunk)

+void

+svn_diff_hunk_reset_original_text(svn_diff_hunk_t *hunk)

 {

   if (hunk->patch->reverse)

-        return svn_error_return(svn_stream_reset(hunk->modified_text));

+    hunk->modified_text_range.current = hunk->modified_text_range.start;

   else

-        return svn_error_return(svn_stream_reset(hunk->original_text));

+    hunk->original_text_range.current = hunk->original_text_range.start;

 }

 

-svn_error_t *

-svn_diff_hunk_reset_modified_text(const svn_diff_hunk_t *hunk)

+void

+svn_diff_hunk_reset_modified_text(svn_diff_hunk_t *hunk)

 {

   if (hunk->patch->reverse)

-    return svn_error_return(svn_stream_reset(hunk->original_text));

+    hunk->original_text_range.current = hunk->original_text_range.start;

   else

-        return svn_error_return(svn_stream_reset(hunk->modified_text));

+    hunk->modified_text_range.current = hunk->modified_text_range.start;

 }

 

/* when they parse the hunk header, they are using ‘svn_io_file_*’ API instead of ‘svn_stream_*’ API */

@@ -253,32 +266,49 @@ parse_hunk_header(const char *header, sv

   return TRUE;

 }

 

-/* Set *EOL to the first end-of-line string found in the stream

- * accessed through READ_FN, MARK_FN and SEEK_FN, whose stream baton

- * is BATON.  Leave the stream read position unchanged.

+/* Set *EOL to the first end-of-line string found in FILE.

+ * Start scanning at the current file cursor offset and scan up

+ * to MAX_LEN bytes. Leave the current file cursor position unchanged.

  * Allocate *EOL statically; POOL is a scratch pool. */

 static svn_error_t *

-scan_eol(const char **eol, svn_stream_t *stream, apr_pool_t *pool)

+scan_eol(const char **eol, apr_file_t *file, apr_size_t max_len,

+             apr_pool_t *pool)

 {

   const char *eol_str;

-  svn_stream_mark_t *mark;

+  apr_off_t pos;

+  apr_size_t total_len;

 

-  SVN_ERR(svn_stream_mark(stream, &mark, pool));

+  pos = 0;

+  SVN_ERR(svn_io_file_seek(file, APR_CUR, &pos, pool));

 

   eol_str = NULL;

+  total_len = 0;

   while (! eol_str)

         {

-          char buf[512];

+          char buf[256];

           apr_size_t len;

+          svn_error_t *err;

+

+          if (total_len >= max_len)

+            break;

+

+          len = sizeof(buf) - 1 < (max_len - total_len) ? sizeof(buf) - 1

+                                                    : (max_len - total_len);

+          err = svn_io_file_read_full(file, buf, sizeof(buf) - 1, &len, pool);

+          if (err && APR_STATUS_IS_EOF(err->apr_err))

+        svn_error_clear(err);

+          else

+            SVN_ERR(err);

 

-          len = sizeof(buf);

-      SVN_ERR(svn_stream_read(stream, buf, &len));

           if (len == 0)

             break; /* EOF */

+

+          buf[len] = '\0';

+          total_len += len;

           eol_str = svn_eol__detect_eol(buf, buf + len);

         }

 

-  SVN_ERR(svn_stream_seek(stream, mark));

+  SVN_ERR(svn_io_file_seek(file, APR_SET, &pos, pool));

 

   *eol = eol_str;

 

/* previously, when you parse the hunk, you are using stream and open the patch file three times for one hunk, so it is easy to use up all the file descriptor, but the patched code allows only one file descriptor of the patch file, so that we could solve this problem.  */

@@ -717,41 +801,23 @@ parse_next_hunk(svn_diff_hunk_t **hunk,

         /* Rewind to the start of the line just read, so subsequent calls

          * to this function or svn_diff_parse_next_patch() don't end

          * up skipping the line -- it may contain a patch or hunk header. */

-    SVN_ERR(svn_io_file_seek(patch->patch_file, APR_SET, &last_line,

-                                 scratch_pool));

+        SVN_ERR(svn_io_file_seek(apr_file, APR_SET, &last_line, scratch_pool));

 

   if (hunk_seen && start < end)

         {

-          apr_file_t *f;

-          apr_int32_t flags = APR_READ | APR_BUFFERED;

-

/* do not need to open the patch file three times for each hunk */

-          /* Create a stream which returns the hunk text itself. */

-          SVN_ERR(svn_io_file_open(&f, patch->path, flags, APR_OS_DEFAULT,

-                                   result_pool));

-          diff_text = svn_stream_from_aprfile_range_readonly(f, FALSE,

-                                                         start, end,

-                                                         result_pool);

-

-          /* Create a stream which returns the original hunk text. */

-          SVN_ERR(svn_io_file_open(&f, patch->path, flags, APR_OS_DEFAULT,

-                                   result_pool));

-          original_text = svn_stream_from_aprfile_range_readonly(f, FALSE,

-                                                             start, end,

-                                                                 result_pool);

-

-          /* Create a stream which returns the modified hunk text. */

-          SVN_ERR(svn_io_file_open(&f, patch->path, flags, APR_OS_DEFAULT,

-                                   result_pool));

-          modified_text = svn_stream_from_aprfile_range_readonly(f, FALSE,

-                                                             start, end,

-                                                             result_pool);

-

-          (*hunk)->diff_text = diff_text;

           (*hunk)->patch = patch;

-          (*hunk)->original_text = original_text;

-          (*hunk)->modified_text = modified_text;

+          (*hunk)->apr_file = apr_file;

           (*hunk)->leading_context = leading_context;

           (*hunk)->trailing_context = trailing_context;

 

/* we just need to have the range of each hunk, so that we can access the appropriate content using ‘seek’ function */

+          (*hunk)->diff_text_range.start = start;

+          (*hunk)->diff_text_range.current = start;

+          (*hunk)->diff_text_range.end = end;

+          (*hunk)->original_text_range.start = start;

+          (*hunk)->original_text_range.current = start;

+          (*hunk)->original_text_range.end = end;

+          (*hunk)->modified_text_range.start = start;

+          (*hunk)->modified_text_range.current = start;

+          (*hunk)->modified_text_range.end = end;

         }

   else

         /* Something went wrong, just discard the result. */

Failure symptom category

Resource exhaustion

Is there any log message?

Yes

 

How can we automatically insert the log message?

Check system call (open) if we have the apr_file_open source code. But if we do not, then we can use frequent logging pattern. Or we can simply annotate apr_file_open.