Fix infinite loop in ftell when writing wide char data (BZ #16398)

ftell tries to avoid flushing the buffer when it is in write mode by converting the wide char data and placing it into the binary buffer. If the output buffer space is full and there is data to write, the code reverts to flushing the buffer. This breaks when there is space in the buffer but it is not enough to convert the next character in the wide data buffer, due to which __codecvt_do_out returns a __codecvt_partial status. In this case, ftell keeps running in an infinite loop. The fix here is to detect the __codecvt_partial status in addition to checking if the buffer is full. I have also added a test case that demonstrates the infinite loop.
2014-02-05 12:49:00 +05:30 · 2014-02-05 12:49:00 +05:30 · df675f9933
parent 6815994630
commit df675f9933
3 changed files with 115 additions and 5 deletions
--- a/libio/Makefile
+++ b/libio/Makefile
@ -60,7 +60,7 @@ tests = tst_swprintf tst_wprintf tst_swscanf tst_wscanf tst_getwc tst_putwc   \
 	tst-wmemstream1 tst-wmemstream2 \
 	bug-memstream1 bug-wmemstream1 \
 	tst-setvbuf1 tst-popen1 tst-fgetwc bug-wsetpos tst-fseek \
-	tst-fwrite-error
+	tst-fwrite-error tst-ftell-partial-wide
 ifeq (yes,$(build-shared))
 # Add test-fopenloc only if shared library is enabled since it depends on
 # shared localedata objects.
--- a/libio/tst-ftell-partial-wide.c
+++ b/libio/tst-ftell-partial-wide.c
@ -0,0 +1,107 @@
+/* Verify that ftell does not go into an infinite loop when a conversion fails
+   due to insufficient space in the buffer.
+   Copyright (C) 2014 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <wchar.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <locale.h>
+#include <errno.h>
+#include <unistd.h>
+
+static int do_test (void);
+#define TEST_FUNCTION do_test ()
+#include "../test-skeleton.c"
+
+/* Arbitrary number large enough so that the target buffer during conversion is
+   not large enough.  */
+#define STRING_SIZE (1400)
+#define NSTRINGS (2)
+
+static int
+do_test (void)
+{
+  FILE *fp = NULL;
+  wchar_t *inputs[NSTRINGS] = {NULL};
+  int ret = 1;
+
+  if (setlocale (LC_ALL, "en_US.UTF-8") == NULL)
+    {
+      printf ("Cannot set en_US.UTF-8 locale.\n");
+      goto out;
+    }
+
+
+  /* Generate input from one character, chosen because it has an odd number of
+     bytes in UTF-8, making it easier to reproduce the problem:
+
+     NAME    Hiragana letter GO
+     CHAR    ご
+     UTF-8   E38194
+     UCS     3054
+     MARC-8  692434  */
+  wchar_t seed = L'ご';
+  for (int i = 0; i < NSTRINGS; i++)
+    {
+      inputs[i] = malloc (STRING_SIZE * sizeof (wchar_t));
+      if (inputs[i] == NULL)
+	{
+	  printf ("Failed to allocate memory for inputs: %m\n");
+	  goto out;
+	}
+      wmemset (inputs[i], seed, STRING_SIZE - 1);
+      inputs[i][STRING_SIZE - 1] = L'\0';
+    }
+
+  char *filename;
+  int fd = create_temp_file ("tst-fseek-wide-partial.out", &filename);
+
+  if (fd == -1)
+    {
+      printf ("create_temp_file: %m\n");
+      goto out;
+    }
+
+  fp = fdopen (fd, "w+");
+  if (fp == NULL)
+    {
+      printf ("fopen: %m\n");
+      close (fd);
+      goto out;
+    }
+
+  for (int i = 0; i < NSTRINGS; i++)
+    {
+      printf ("offset: %ld\n", ftell (fp));
+      if (fputws (inputs[i], fp) == -1)
+	{
+	  perror ("fputws");
+	  goto out;
+	}
+    }
+  ret = 0;
+
+out:
+  if (fp != NULL)
+    fclose (fp);
+  for (int i = 0; i < NSTRINGS; i++)
+    free (inputs[i]);
+
+  return ret;
+}
--- a/libio/wfileops.c
+++ b/libio/wfileops.c
@ -715,7 +715,7 @@ _IO_wfile_seekoff (fp, offset, dir, mode)
 		       - fp->_wide_data->_IO_write_base) / clen;
 	  else
 	    {
-	      enum __codecvt_result status;
+	      enum __codecvt_result status = __codecvt_ok;
 	      delta = (fp->_wide_data->_IO_write_ptr
 		       - fp->_wide_data->_IO_write_base);
 	      const wchar_t *write_base = fp->_wide_data->_IO_write_base;
@ -728,9 +728,12 @@ _IO_wfile_seekoff (fp, offset, dir, mode)
 		 flush buffers for every ftell.  */
 	      do
 		{
-		  /* Ugh, no point trying to avoid the flush.  Just do it
-		     and go back to how it was with the read mode.  */
-		  if (delta > 0 && new_write_ptr == fp->_IO_buf_end)
+		  /* There is not enough space in the buffer to do the entire
+		     conversion, so there is no point trying to avoid the
+		     buffer flush.  Just do it and go back to how it was with
+		     the read mode.  */
+		  if (status == __codecvt_partial
+		      || (delta > 0 && new_write_ptr == fp->_IO_buf_end))
 		    {
 		      if (_IO_switch_to_wget_mode (fp))
 			return WEOF;