Anonymous avatar Anonymous committed cf7167f

reject attempts to upload invalid UTF-8 JSON. Closes COUCHDB-345

This patch requires JSON to be encoded using UTF-8. In the future we will
accept other encodings. Thanks Joan Touzet and James Dumay for the bug reports
and Curt Arnold for patches and discussion.

Comments (0)

Files changed (4)

  * Sebastian Cohnen <sebastian.cohnen@gmx.net>
  * Sven Helmberger <sven.helmberger@gmx.de>
  * Dan Walters <dan@danwalters.net>
+ * Curt Arnold <carnold@apache.org>
 
 For a list of authors see the `AUTHORS` file.

share/www/script/test/view_errors.js

           map : "function(doc){emit(doc.integer)}"
         })
       });
-      T(JSON.parse(xhr.responseText).error == "invalid_json");
+      T(JSON.parse(xhr.responseText).error == "bad_request");
 
       // views should ignore Content-Type, like the rest of CouchDB
       var xhr = CouchDB.request("POST", "/test_suite_db/_temp_view", {

src/couchdb/couch_httpd.erl

     catch
         throw:{http_head_abort, Resp0} ->
             {ok, Resp0};
+        throw:{invalid_json, S} ->
+            ?LOG_ERROR("attempted upload of invalid JSON ~s", [S]),
+            send_error(HttpReq, {bad_request, "invalid UTF-8 JSON"});
         exit:normal ->
             exit(normal);
         throw:Error ->

src/mochiweb/mochijson2.erl

     case B of
         <<_:O/binary, ?Q, _/binary>> ->
             O;
-        <<_:O/binary, C, _/binary>> when C =/= $\\ ->
+        <<_:O/binary, $\\, _/binary>> ->
+            {escape, O};
+        <<_:O/binary, C1, _/binary>> when C1 < 128 ->
             tokenize_string_fast(B, 1 + O);
+        <<_:O/binary, C1, C2, _/binary>> when C1 >= 194, C1 =< 223,
+                C2 >= 128, C2 =< 191 ->
+            tokenize_string_fast(B, 2 + O);
+        <<_:O/binary, C1, C2, C3, _/binary>> when C1 >= 224, C1 =< 239,
+                C2 >= 128, C2 =< 191,
+                C3 >= 128, C3 =< 191 ->
+            tokenize_string_fast(B, 3 + O);
+        <<_:O/binary, C1, C2, C3, C4, _/binary>> when C1 >= 240, C1 =< 244,
+                C2 >= 128, C2 =< 191,
+                C3 >= 128, C3 =< 191,
+                C4 >= 128, C4 =< 191 ->
+            tokenize_string_fast(B, 4 + O);
         _ ->
-            {escape, O}
+            throw(invalid_utf8)
     end.
 
 tokenize_string(B, S=#decoder{offset=O}, Acc) ->
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.