text / cbits / cbits.c

The default branch has multiple heads

Diff from to

cbits/cbits.c

  * A best-effort decoder. Runs until it hits either end of input or
  * the start of an invalid byte sequence.
  *
- * At exit, updates *destoff with the next offset to write to, and
- * returns the next source offset to read from. Moreover, this function
- * exposes the internal decoder state (state0 and codepoint0), allowing one
- * to restart the decoder after it terminates (say, due to a partial codepoint).
+ * At exit, we update *destoff with the next offset to write to, *src
+ * with the next source location past the last one successfully
+ * decoded, and return the next source location to read from.
+ *
+ * Moreover, we expose the internal decoder state (state0 and
+ * codepoint0), allowing one to restart the decoder after it
+ * terminates (say, due to a partial codepoint).
  *
  * In particular, there are a few possible outcomes,
  *
  */
 const uint8_t *
 _hs_text_decode_utf8_state(uint16_t *const dest, size_t *destoff,
-                           const uint8_t *const src, const uint8_t *const srcend,
+                           const uint8_t **const src,
+                           const uint8_t *const srcend,
                            uint32_t *codepoint0, uint32_t *state0)
 {
   uint16_t *d = dest + *destoff;
-  const uint8_t *s = src;
+  const uint8_t *s = *src, *last = *src;
   uint32_t state = *state0;
   uint32_t codepoint = *codepoint0;
 
 	*d++ = (uint16_t) ((codepoint >> 16) & 0xff);
 	*d++ = (uint16_t) ((codepoint >> 24) & 0xff);
       }
+      last = s;
     }
 #endif
 
       *d++ = (uint16_t) (0xD7C0 + (codepoint >> 10));
       *d++ = (uint16_t) (0xDC00 + (codepoint & 0x3FF));
     }
+    last = s;
   }
 
   /* Invalid encoding, back up to the errant character */
   *destoff = d - dest;
   *codepoint0 = codepoint;
   *state0 = state;
+  *src = last;
 
   return s;
 }
  */
 const uint8_t *
 _hs_text_decode_utf8(uint16_t *const dest, size_t *destoff,
-                     const uint8_t *const src, const uint8_t *const srcend)
+                     const uint8_t *src, const uint8_t *const srcend)
 {
   uint32_t codepoint;
   uint32_t state = UTF8_ACCEPT;
-  return _hs_text_decode_utf8_state(dest, destoff, src, srcend, &codepoint, &state);
+  return _hs_text_decode_utf8_state(dest, destoff, &src, srcend, &codepoint, &state);
 }
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.