Anonymous avatar Anonymous committed f645a47

http://code.google.com/p/mochiweb/issues/detail?id=32 utf-8 output option for mochijson2:encoder

Comments (0)

Files changed (1)

src/mochijson2.erl

 %% @type json_term() = json_string() | json_number() | json_array() |
 %%                     json_object()
 
--record(encoder, {handler=null}).
+-record(encoder, {handler=null,
+                  utf8=false}).
 
 -record(decoder, {object_hook=null,
                   offset=0,
 
 %% @spec encoder([encoder_option()]) -> function()
 %% @doc Create an encoder/1 with the given options.
+%% @type encoder_option() = handler_option() | utf8_option()
+%% @type utf8_option() = boolean(). Emit unicode as utf8 (default - false)
 encoder(Options) ->
     State = parse_encoder_options(Options, #encoder{}),
     fun (O) -> json_encode(O, State) end.
 parse_encoder_options([], State) ->
     State;
 parse_encoder_options([{handler, Handler} | Rest], State) ->
-    parse_encoder_options(Rest, State#encoder{handler=Handler}).
+    parse_encoder_options(Rest, State#encoder{handler=Handler});
+parse_encoder_options([{utf8, Switch} | Rest], State) ->
+    parse_encoder_options(Rest, State#encoder{utf8=Switch}).
 
 parse_decoder_options([], State) ->
     State;
     [$, | Acc1] = lists:foldl(F, "{", Props),
     lists:reverse([$\} | Acc1]).
 
-json_encode_string(A, _State) when is_atom(A) ->
+json_encode_string(A, State) when is_atom(A) ->
     L = atom_to_list(A),
     case json_string_is_safe(L) of
         true ->
             [?Q, L, ?Q];
         false ->
-            json_encode_string_unicode(xmerl_ucs:from_utf8(L), [?Q])
+            json_encode_string_unicode(xmerl_ucs:from_utf8(L), State, [?Q])
     end;
-json_encode_string(B, _State) when is_binary(B) ->
+json_encode_string(B, State) when is_binary(B) ->
     case json_bin_is_safe(B) of
         true ->
             [?Q, B, ?Q];
         false ->
-            json_encode_string_unicode(xmerl_ucs:from_utf8(B), [?Q])
+            json_encode_string_unicode(xmerl_ucs:from_utf8(B), State, [?Q])
     end;
 json_encode_string(I, _State) when is_integer(I) ->
     [?Q, integer_to_list(I), ?Q];
-json_encode_string(L, _State) when is_list(L) ->
+json_encode_string(L, State) when is_list(L) ->
     case json_string_is_safe(L) of
         true ->
             [?Q, L, ?Q];
         false ->
-            json_encode_string_unicode(L, [?Q])
+            json_encode_string_unicode(L, State, [?Q])
     end.
 
 json_string_is_safe([]) ->
             false
     end.
 
-json_encode_string_unicode([], Acc) ->
+json_encode_string_unicode([], _State, Acc) ->
     lists:reverse([$\" | Acc]);
-json_encode_string_unicode([C | Cs], Acc) ->
+json_encode_string_unicode([C | Cs], State, Acc) ->
     Acc1 = case C of
                ?Q ->
                    [?Q, $\\ | Acc];
                    [$r, $\\ | Acc];
                $\t ->
                    [$t, $\\ | Acc];
-               C when C >= 0, C < $\s; C >= 16#7f, C =< 16#10FFFF ->
+               C when C >= 0, C < $\s ->
+                   [unihex(C) | Acc];
+               C when C >= 16#7f, C =< 16#10FFFF, State#encoder.utf8 ->
+                   [xmerl_ucs:to_utf8(C) | Acc];
+               C when  C >= 16#7f, C =< 16#10FFFF, not State#encoder.utf8 ->
                    [unihex(C) | Acc];
                C when C < 16#7f ->
                    [C | Acc];
                _ ->
                    exit({json_encode, {bad_char, C}})
            end,
-    json_encode_string_unicode(Cs, Acc1).
+    json_encode_string_unicode(Cs, State, Acc1).
 
 hexdigit(C) when C >= 0, C =< 9 ->
     C + $0;
 
 test_all() ->
     [1199344435545.0, 1] = decode(<<"[1199344435545.0,1]">>),
+    test_encoder_utf8(),
     test_one(e2j_test_vec(utf8), 1).
 
 test_one([], _N) ->
      {[-123, <<"foo">>, obj_from_list([{<<"bar">>, []}]), null],
       "[-123,\"foo\",{\"bar\":[]},null]"}
     ].
+
+%% test utf8 encoding
+test_encoder_utf8() ->
+    %% safe conversion case (default)
+    [34,"\\u0001","\\u0442","\\u0435","\\u0441","\\u0442",34] = 
+        encode(<<1,"\321\202\320\265\321\201\321\202">>),
+
+    %% raw utf8 output (optional)
+    Enc = mochijson2:encoder([{utf8, true}]),
+    [34,"\\u0001",[209,130],[208,181],[209,129],[209,130],34] =
+        Enc(<<1,"\321\202\320\265\321\201\321\202">>).
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.