Commits

Bryan O'Sullivan committed bee604d

Improve on previous fix

This version tries to force the real decoding function to be inlined
into each of its callers, which in turn each have different criteria
for backing up a byte. This avoids an extra test at the end of
strict decoding.

While this seems to fix gh-61, I want to beef up the test suite so
that it will correctly detect the bug.

Comments (0)

Files changed (1)

  *      state0 != UTF8_ACCEPT, UTF8_REJECT
  *
  */
-uint8_t const *
-_hs_text_decode_utf8_state(uint16_t *dest, size_t *destoff,
-                           const uint8_t const *src, const uint8_t const *srcend,
-                           uint32_t *codepoint0, uint32_t *state0)
+
+#if defined(__GNUC__) || defined(__clang__)
+static inline uint8_t const *
+_hs_text_decode_utf8_int(uint16_t *dest, size_t *destoff,
+			 const uint8_t const *src, const uint8_t const *srcend,
+			 uint32_t *codepoint0, uint32_t *state0)
+  __attribute((always_inline));
+#endif
+static inline uint8_t const *
+_hs_text_decode_utf8_int(uint16_t *dest, size_t *destoff,
+			 const uint8_t const *src, const uint8_t const *srcend,
+			 uint32_t *codepoint0, uint32_t *state0)
 {
   uint16_t *d = dest + *destoff;
   const uint8_t const *s = src;
     }
   }
 
-  /* Invalid encoding, back up to the errant character */
-  if (state == UTF8_REJECT)
-    s -= 1;
-
   *destoff = d - dest;
   *codepoint0 = codepoint;
   *state0 = state;
   return s;
 }
 
+uint8_t const *
+_hs_text_decode_utf8_state(uint16_t *dest, size_t *destoff,
+                           const uint8_t const *src,
+			   const uint8_t const *srcend,
+                           uint32_t *codepoint0, uint32_t *state0)
+{
+  uint8_t const *ret = _hs_text_decode_utf8_int(dest, destoff, src, srcend,
+						codepoint0, state0);
+  if (*state0 == UTF8_REJECT)
+    ret -=1;
+  return ret;
+}
+
 /*
  * Helper to decode buffer and discard final decoder state
  */
 {
   uint32_t codepoint;
   uint32_t state = UTF8_ACCEPT;
-  uint8_t const *ret = _hs_text_decode_utf8_state(dest, destoff, src, srcend,
-						  &codepoint, &state);
-  /* Back up if we have an incomplete encoding */
-  if (state != UTF8_ACCEPT && state != UTF8_REJECT)
+  uint8_t const *ret = _hs_text_decode_utf8_int(dest, destoff, src, srcend,
+						&codepoint, &state);
+  /* Back up if we have an incomplete or invalid encoding */
+  if (state != UTF8_ACCEPT)
     ret -= 1;
   return ret;
 }