split -C N

Version:

This bug is present at least as far back as textutils-1.22 (Jan, 1997)  (fixed in 6.9)

Bug Link:

http://lists.gnu.org/archive/html/bug-coreutils/2007-04/msg00079.html

Symptom:

split -C sometimes creates empty files

Failure type:

unnecessary computation

Is there log messages (default verbosity level) printed?

No

How to reproduce?

$echo x | ./split -C 1

$ls -l x??

... 1 2011-03-07 16:12 xaa

... 1 2011-03-07 16:12 xab

... 0 2011-03-07 16:12 xac        <--expected behavior is not to create this empty file

Root cause:

There is no checking and break once it finds the buffered is already zero.

src/split.c

line_bytes_split (size_t n_bytes)

{

  size_t n_read;

  char *bp;

  bool eof = false;

  size_t n_buffered = 0;

  char *buf = xmalloc (n_bytes);

  do  

    {  

      /* Fill up the full buffer size from the input file.  */

      n_read = full_read (STDIN_FILENO, buf + n_buffered, n_bytes - n_buffered);

      if (n_read == SAFE_READ_ERROR) <--this seems will never be triggered!

        error (EXIT_FAILURE, errno, "%s", infile);

      /*

the third iteration, n_read is 0! n_buffered is also 0, since previous                 two iterations already output all the buffered contents!          

     */                                                                                                 

      n_buffered += n_read;

      if (n_buffered != n_bytes)        //n_bytes is 1

-       eof = true;        //although eof is set, it will still continue and create empty file!
+       {
+         if (n_buffered == 0)
+           break;
+         eof = true;
+       }

      ...

      /* Output the chars as one output file.  */

      cwrite (true, buf, bp - buf);        //empty file created!

      ...

      n_buffered -= bp - buf;

      if (n_buffered > 0)

        memmove (buf, bp, n_buffered);

    }  

   while (!eof);

  free (buf);

}

size_t

full_read (int fd, const void *buf, size_t count)                                                                                                              

{

  size_t total = 0;

  const char *ptr = (const char *) buf;

  while (count > 0)

    {  

      size_t n_rw = safe_read (fd, ptr, count);

      if (n_rw == (size_t) -1)

        break;

      if (n_rw == 0)

            {  

              errno = ZERO_BYTE_TRANSFER_ERRNO;

              break;

            }

      ...

    }

  return total;

}

safe_read (int fd, void const *buf, size_t count)

{

  enum { BUGGY_READ_MAXIMUM = INT_MAX & ~8191 };

  for (;;)

    {  

      ssize_t result = read (fd, buf, count); // read system call failed

      if (0 <= result)

        return result;

      else if (IS_EINTR (errno))

        continue;

      else if (errno == EINVAL && BUGGY_READ_MAXIMUM < count)

        count = BUGGY_READ_MAXIMUM;

      else

        return result;

    }  

}

Can Errlog insert an error message?

Yes. read system call’s return value, or frequent logging pattern of safe_read