Commits

Anonymous committed 8020f7a

merge files from mochiweb-tip

  • Participants
  • Parent commits 6cc9439

Comments (0)

Files changed (22)

ebin/mochiweb.app

  [{description, "MochiMedia Web Server"},
   {vsn, "0.02"},
   {modules, [
+        mochifmt,
         mochiglobal,            
         mochihex,
         mochijson,
         mochijson2,
+        mochilists,
+        mochilogfile2,
         mochinum,
+        mochitemp,
+        mochiutf8,
         mochiweb,
+        mochiweb_acceptor,
         mochiweb_app,
         mochiweb_charref,
         mochiweb_cookies,
         mochiweb_cover,
         mochiweb_echo,
         mochiweb_headers,
+        mochiweb_io,
         mochiweb_html,
         mochiweb_http,
     	mochiweb_mime,
         mochiweb_request,
         mochiweb_response,
         mochiweb_skel,
+        mochiweb_socket,
         mochiweb_socket_server,
         mochiweb_sup,
         mochiweb_util,
         reloader,
-        mochifmt,
         mochifmt_std,
         mochifmt_records
 	    ]},
+
+-define(RECBUF_SIZE, 8192).
+

src/mochilists.erl

+%% @copyright Copyright (c) 2010 Mochi Media, Inc.
+%% @author David Reid <dreid@mochimedia.com>
+
+%% @doc Utility functions for dealing with proplists.
+
+-module(mochilists).
+-author("David Reid <dreid@mochimedia.com>").
+-export([get_value/2, get_value/3, is_defined/2, set_default/2, set_defaults/2]).
+
+%% @spec set_default({Key::term(), Value::term()}, Proplist::list()) -> list()
+%%
+%% @doc Return new Proplist with {Key, Value} set if not is_defined(Key, Proplist).
+set_default({Key, Value}, Proplist) ->
+    case is_defined(Key, Proplist) of
+        true ->
+            Proplist;
+        false ->
+            [{Key, Value} | Proplist]
+    end.
+
+%% @spec set_defaults([{Key::term(), Value::term()}], Proplist::list()) -> list()
+%%
+%% @doc Return new Proplist with {Key, Value} set if not is_defined(Key, Proplist).
+set_defaults(DefaultProps, Proplist) ->
+    lists:foldl(fun set_default/2, Proplist, DefaultProps).
+
+
+%% @spec is_defined(Key::term(), Proplist::list()) -> bool()
+%%
+%% @doc Returns true if Propist contains at least one entry associated
+%%      with Key, otherwise false is returned.
+is_defined(Key, Proplist) ->
+    lists:keyfind(Key, 1, Proplist) =/= false.
+
+
+%% @spec get_value(Key::term(), Proplist::list()) -> term() | undefined
+%%
+%% @doc Return the value of <code>Key</code> or undefined
+get_value(Key, Proplist) ->
+    get_value(Key, Proplist, undefined).
+
+%% @spec get_value(Key::term(), Proplist::list(), Default::term()) -> term()
+%%
+%% @doc Return the value of <code>Key</code> or <code>Default</code>
+get_value(_Key, [], Default) ->
+    Default;
+get_value(Key, Proplist, Default) ->
+    case lists:keyfind(Key, 1, Proplist) of
+        false ->
+            Default;
+        {Key, Value} ->
+            Value
+    end.
+
+%%
+%% Tests
+%%
+-include_lib("eunit/include/eunit.hrl").
+-ifdef(TEST).
+
+set_defaults_test() ->
+    ?assertEqual(
+       [{k, v}],
+       set_defaults([{k, v}], [])),
+    ?assertEqual(
+       [{k, v}],
+       set_defaults([{k, vee}], [{k, v}])),
+    ?assertEqual(
+       lists:sort([{kay, vee}, {k, v}]),
+       lists:sort(set_defaults([{k, vee}, {kay, vee}], [{k, v}]))),
+    ok.
+
+set_default_test() ->
+    ?assertEqual(
+       [{k, v}],
+       set_default({k, v}, [])),
+    ?assertEqual(
+       [{k, v}],
+       set_default({k, vee}, [{k, v}])),
+    ok.
+
+get_value_test() ->
+    ?assertEqual(
+       undefined,
+       get_value(foo, [])),
+    ?assertEqual(
+       undefined,
+       get_value(foo, [{bar, baz}])),
+    ?assertEqual(
+       bar,
+       get_value(foo, [{foo, bar}])),
+    ?assertEqual(
+       default,
+       get_value(foo, [], default)),
+    ?assertEqual(
+       default,
+       get_value(foo, [{bar, baz}], default)),
+    ?assertEqual(
+       bar,
+       get_value(foo, [{foo, bar}], default)),
+    ok.
+
+-endif.
+

src/mochilogfile2.erl

+%% @author Bob Ippolito <bob@mochimedia.com>
+%% @copyright 2010 Mochi Media, Inc.
+
+%% @doc Write newline delimited log files, ensuring that if a truncated
+%%      entry is found on log open then it is fixed before writing. Uses
+%%      delayed writes and raw files for performance.
+-module(mochilogfile2).
+-author('bob@mochimedia.com').
+
+-export([open/1, write/2, close/1, name/1]).
+
+%% @spec open(Name) -> Handle
+%% @doc Open the log file Name, creating or appending as necessary. All data
+%%      at the end of the file will be truncated until a newline is found, to
+%%      ensure that all records are complete.
+open(Name) ->
+    {ok, FD} = file:open(Name, [raw, read, write, delayed_write, binary]),
+    fix_log(FD),
+    {?MODULE, Name, FD}.
+
+%% @spec name(Handle) -> string()
+%% @doc Return the path of the log file.
+name({?MODULE, Name, _FD}) ->
+    Name.
+
+%% @spec write(Handle, IoData) -> ok
+%% @doc Write IoData to the log file referenced by Handle.
+write({?MODULE, _Name, FD}, IoData) ->
+    ok = file:write(FD, [IoData, $\n]),
+    ok.
+
+%% @spec close(Handle) -> ok
+%% @doc Close the log file referenced by Handle.
+close({?MODULE, _Name, FD}) ->
+    ok = file:sync(FD),
+    ok = file:close(FD),
+    ok.
+
+fix_log(FD) ->
+    {ok, Location} = file:position(FD, eof),
+    Seek = find_last_newline(FD, Location),
+    {ok, Seek} = file:position(FD, Seek),
+    ok = file:truncate(FD),
+    ok.
+
+%% Seek backwards to the last valid log entry
+find_last_newline(_FD, N) when N =< 1 ->
+    0;
+find_last_newline(FD, Location) ->
+    case file:pread(FD, Location - 1, 1) of
+	{ok, <<$\n>>} ->
+            Location;
+	{ok, _} ->
+	    find_last_newline(FD, Location - 1)
+    end.
+
+%%
+%% Tests
+%%
+-include_lib("eunit/include/eunit.hrl").
+-ifdef(TEST).
+name_test() ->
+    D = mochitemp:mkdtemp(),
+    FileName = filename:join(D, "open_close_test.log"),
+    H = open(FileName),
+    ?assertEqual(
+       FileName,
+       name(H)),
+    close(H),
+    file:delete(FileName),
+    file:del_dir(D),
+    ok.
+
+open_close_test() ->
+    D = mochitemp:mkdtemp(),
+    FileName = filename:join(D, "open_close_test.log"),
+    OpenClose = fun () ->
+                        H = open(FileName),
+                        ?assertEqual(
+                           true,
+                           filelib:is_file(FileName)),
+                        ok = close(H),
+                        ?assertEqual(
+                           {ok, <<>>},
+                           file:read_file(FileName)),
+                        ok
+                end,
+    OpenClose(),
+    OpenClose(),
+    file:delete(FileName),
+    file:del_dir(D),
+    ok.
+
+write_test() ->
+    D = mochitemp:mkdtemp(),
+    FileName = filename:join(D, "write_test.log"),
+    F = fun () ->
+                H = open(FileName),
+                write(H, "test line"),
+                close(H),
+                ok
+        end,
+    F(),
+    ?assertEqual(
+       {ok, <<"test line\n">>},
+       file:read_file(FileName)),
+    F(),
+    ?assertEqual(
+       {ok, <<"test line\ntest line\n">>},
+       file:read_file(FileName)),
+    file:delete(FileName),
+    file:del_dir(D),
+    ok.
+
+fix_log_test() ->
+    D = mochitemp:mkdtemp(),
+    FileName = filename:join(D, "write_test.log"),
+    file:write_file(FileName, <<"first line good\nsecond line bad">>),
+    F = fun () ->
+                H = open(FileName),
+                write(H, "test line"),
+                close(H),
+                ok
+        end,
+    F(),
+    ?assertEqual(
+       {ok, <<"first line good\ntest line\n">>},
+       file:read_file(FileName)),
+    file:write_file(FileName, <<"first line bad">>),
+    F(),
+    ?assertEqual(
+       {ok, <<"test line\n">>},
+       file:read_file(FileName)),
+    F(),
+    ?assertEqual(
+       {ok, <<"test line\ntest line\n">>},
+       file:read_file(FileName)),
+    ok.
+
+-endif.
     1000 = int_pow(10, 3),
     ok.
 
-%% XXX arg 02/12/10
-%% disabled pending resolution of http://code.google.com/p/mochiweb/issues/detail?id=63
-%digits_test_disabled() ->
-%    ?assertEqual("0",
-%                 digits(0)),
-%    ?assertEqual("0.0",
-%                 digits(0.0)),
-%    ?assertEqual("1.0",
-%                 digits(1.0)),
-%    ?assertEqual("-1.0",
-%                 digits(-1.0)),
-%    ?assertEqual("0.1",
-%                 digits(0.1)),
-%    ?assertEqual("0.01",
-%                 digits(0.01)),
-%    ?assertEqual("0.001",
-%                 digits(0.001)),
-%    ?assertEqual("1.0e+6",
-%                 digits(1000000.0)),
-%    ?assertEqual("0.5",
-%                 digits(0.5)),
-%    ?assertEqual("4503599627370496.0",
-%                 digits(4503599627370496.0)),
-%    %% small denormalized number
-%    %% 4.94065645841246544177e-324
-%    <<SmallDenorm/float>> = <<0,0,0,0,0,0,0,1>>,
-%    ?assertEqual("4.9406564584124654e-324",
-%                 digits(SmallDenorm)),
-%    ?assertEqual(SmallDenorm,
-%                 list_to_float(digits(SmallDenorm))),
-%    %% large denormalized number
-%    %% 2.22507385850720088902e-308
-%    <<BigDenorm/float>> = <<0,15,255,255,255,255,255,255>>,
-%    ?assertEqual("2.225073858507201e-308",
-%                 digits(BigDenorm)),
-%    ?assertEqual(BigDenorm,
-%                 list_to_float(digits(BigDenorm))),
-%    %% small normalized number
-%    %% 2.22507385850720138309e-308
-%    <<SmallNorm/float>> = <<0,16,0,0,0,0,0,0>>,
-%    ?assertEqual("2.2250738585072014e-308",
-%                 digits(SmallNorm)),
-%    ?assertEqual(SmallNorm,
-%                 list_to_float(digits(SmallNorm))),
-%    %% large normalized number
-%    %% 1.79769313486231570815e+308
-%    <<LargeNorm/float>> = <<127,239,255,255,255,255,255,255>>,
-%    ?assertEqual("1.7976931348623157e+308",
-%                 digits(LargeNorm)),
-%    ?assertEqual(LargeNorm,
-%                 list_to_float(digits(LargeNorm))),
-%    ok.
+digits_test() ->
+    ?assertEqual("0",
+                 digits(0)),
+    ?assertEqual("0.0",
+                 digits(0.0)),
+    ?assertEqual("1.0",
+                 digits(1.0)),
+    ?assertEqual("-1.0",
+                 digits(-1.0)),
+    ?assertEqual("0.1",
+                 digits(0.1)),
+    ?assertEqual("0.01",
+                 digits(0.01)),
+    ?assertEqual("0.001",
+                 digits(0.001)),
+    ?assertEqual("1.0e+6",
+                 digits(1000000.0)),
+    ?assertEqual("0.5",
+                 digits(0.5)),
+    ?assertEqual("4503599627370496.0",
+                 digits(4503599627370496.0)),
+    %% small denormalized number
+    %% 4.94065645841246544177e-324
+    <<SmallDenorm/float>> = <<0,0,0,0,0,0,0,1>>,
+    ?assertEqual("4.9406564584124654e-324",
+                 digits(SmallDenorm)),
+    ?assertEqual(SmallDenorm,
+                 list_to_float(digits(SmallDenorm))),
+    %% large denormalized number
+    %% 2.22507385850720088902e-308
+    <<BigDenorm/float>> = <<0,15,255,255,255,255,255,255>>,
+    ?assertEqual("2.225073858507201e-308",
+                 digits(BigDenorm)),
+    ?assertEqual(BigDenorm,
+                 list_to_float(digits(BigDenorm))),
+    %% small normalized number
+    %% 2.22507385850720138309e-308
+    <<SmallNorm/float>> = <<0,16,0,0,0,0,0,0>>,
+    ?assertEqual("2.2250738585072014e-308",
+                 digits(SmallNorm)),
+    ?assertEqual(SmallNorm,
+                 list_to_float(digits(SmallNorm))),
+    %% large normalized number
+    %% 1.79769313486231570815e+308
+    <<LargeNorm/float>> = <<127,239,255,255,255,255,255,255>>,
+    ?assertEqual("1.7976931348623157e+308",
+                 digits(LargeNorm)),
+    ?assertEqual(LargeNorm,
+                 list_to_float(digits(LargeNorm))),
+    ok.
 
 frexp_test() ->
     %% zero

src/mochitemp.erl

+%% @author Bob Ippolito <bob@mochimedia.com>
+%% @copyright 2010 Mochi Media, Inc.
+
+%% @doc Create temporary files and directories. Requires crypto to be started.
+
+-module(mochitemp).
+-export([gettempdir/0]).
+-export([mkdtemp/0, mkdtemp/3]).
+-export([rmtempdir/1]).
+%% -export([mkstemp/4]).
+-define(SAFE_CHARS, {$a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m,
+                     $n, $o, $p, $q, $r, $s, $t, $u, $v, $w, $x, $y, $z,
+                     $A, $B, $C, $D, $E, $F, $G, $H, $I, $J, $K, $L, $M,
+                     $N, $O, $P, $Q, $R, $S, $T, $U, $V, $W, $X, $Y, $Z,
+                     $0, $1, $2, $3, $4, $5, $6, $7, $8, $9, $_}).
+-define(TMP_MAX, 10000).
+
+-include_lib("kernel/include/file.hrl").
+
+%% TODO: An ugly wrapper over the mktemp tool with open_port and sadness?
+%%       We can't implement this race-free in Erlang without the ability
+%%       to issue O_CREAT|O_EXCL. I suppose we could hack something with
+%%       mkdtemp, del_dir, open.
+%% mkstemp(Suffix, Prefix, Dir, Options) ->
+%%    ok.
+
+rmtempdir(Dir) ->
+    case file:del_dir(Dir) of
+        {error, eexist} ->
+            ok = rmtempdirfiles(Dir),
+            ok = file:del_dir(Dir);
+        ok ->
+            ok
+    end.
+
+rmtempdirfiles(Dir) ->
+    {ok, Files} = file:list_dir(Dir),
+    ok = rmtempdirfiles(Dir, Files).
+
+rmtempdirfiles(_Dir, []) ->
+    ok;
+rmtempdirfiles(Dir, [Basename | Rest]) ->
+    Path = filename:join([Dir, Basename]),
+    case filelib:is_dir(Path) of
+        true ->
+            ok = rmtempdir(Path);
+        false ->
+            ok = file:delete(Path)
+    end,
+    rmtempdirfiles(Dir, Rest).
+
+mkdtemp() ->
+    mkdtemp("", "tmp", gettempdir()).
+
+mkdtemp(Suffix, Prefix, Dir) ->
+    mkdtemp_n(rngpath_fun(Suffix, Prefix, Dir), ?TMP_MAX).
+
+
+
+mkdtemp_n(RngPath, 1) ->
+    make_dir(RngPath());
+mkdtemp_n(RngPath, N) ->
+    try make_dir(RngPath())
+    catch throw:{error, eexist} ->
+            mkdtemp_n(RngPath, N - 1)
+    end.
+
+make_dir(Path) ->
+    case file:make_dir(Path) of
+        ok ->
+            ok;
+        E={error, eexist} ->
+            throw(E)
+    end,
+    %% Small window for a race condition here because dir is created 777
+    ok = file:write_file_info(Path, #file_info{mode=8#0700}),
+    Path.
+
+rngpath_fun(Prefix, Suffix, Dir) ->
+    fun () ->
+            filename:join([Dir, Prefix ++ rngchars(6) ++ Suffix])
+    end.
+
+rngchars(0) ->
+    "";
+rngchars(N) ->
+    [rngchar() | rngchars(N - 1)].
+
+rngchar() ->
+    rngchar(crypto:rand_uniform(0, tuple_size(?SAFE_CHARS))).
+
+rngchar(C) ->
+    element(1 + C, ?SAFE_CHARS).
+
+%% @spec gettempdir() -> string()
+%% @doc Get a usable temporary directory using the first of these that is a directory:
+%%      $TMPDIR, $TMP, $TEMP, "/tmp", "/var/tmp", "/usr/tmp", ".".
+gettempdir() ->
+    gettempdir(gettempdir_checks(), fun normalize_dir/1).
+
+gettempdir_checks() ->
+    [{fun os:getenv/1, ["TMPDIR", "TMP", "TEMP"]},
+     {fun gettempdir_identity/1, ["/tmp", "/var/tmp", "/usr/tmp"]},
+     {fun gettempdir_cwd/1, [cwd]}].
+
+gettempdir_identity(L) ->
+    L.
+
+gettempdir_cwd(cwd) ->
+    {ok, L} = file:get_cwd(),
+    L.
+
+gettempdir([{_F, []} | RestF], Normalize) ->
+    gettempdir(RestF, Normalize);
+gettempdir([{F, [L | RestL]} | RestF], Normalize) ->
+    case Normalize(F(L)) of
+        false ->
+            gettempdir([{F, RestL} | RestF], Normalize);
+        Dir ->
+            Dir
+    end.
+
+normalize_dir(False) when False =:= false orelse False =:= "" ->
+    %% Erlang doesn't have an unsetenv, wtf.
+    false;
+normalize_dir(L) ->
+    Dir = filename:absname(L),
+    case filelib:is_dir(Dir) of
+        false ->
+            false;
+        true ->
+            Dir
+    end.
+
+%%
+%% Tests
+%%
+-include_lib("eunit/include/eunit.hrl").
+-ifdef(TEST).
+pushenv(L) ->
+    [{K, os:getenv(K)} || K <- L].
+popenv(L) ->
+    F = fun ({K, false}) ->
+                %% Erlang doesn't have an unsetenv, wtf.
+                os:putenv(K, "");
+            ({K, V}) ->
+                os:putenv(K, V)
+        end,
+    lists:foreach(F, L).
+
+gettempdir_fallback_test() ->
+    ?assertEqual(
+       "/",
+       gettempdir([{fun gettempdir_identity/1, ["/--not-here--/"]},
+                   {fun gettempdir_identity/1, ["/"]}],
+                  fun normalize_dir/1)),
+    ?assertEqual(
+       "/",
+       %% simulate a true os:getenv unset env
+       gettempdir([{fun gettempdir_identity/1, [false]},
+                   {fun gettempdir_identity/1, ["/"]}],
+                  fun normalize_dir/1)),
+    ok.
+
+gettempdir_identity_test() ->
+    ?assertEqual(
+       "/",
+       gettempdir([{fun gettempdir_identity/1, ["/"]}], fun normalize_dir/1)),
+    ok.
+
+gettempdir_cwd_test() ->
+    {ok, Cwd} = file:get_cwd(),
+    ?assertEqual(
+       normalize_dir(Cwd),
+       gettempdir([{fun gettempdir_cwd/1, [cwd]}], fun normalize_dir/1)),
+    ok.
+
+rngchars_test() ->
+    crypto:start(),
+    ?assertEqual(
+       "",
+       rngchars(0)),
+    ?assertEqual(
+       10,
+       length(rngchars(10))),
+    ok.
+
+rngchar_test() ->
+    ?assertEqual(
+       $a,
+       rngchar(0)),
+    ?assertEqual(
+       $A,
+       rngchar(26)),
+    ?assertEqual(
+       $_,
+       rngchar(62)),
+    ok.
+
+mkdtemp_n_failonce_test() ->
+    crypto:start(),
+    D = mkdtemp(),
+    Path = filename:join([D, "testdir"]),
+    %% Toggle the existence of a dir so that it fails
+    %% the first time and succeeds the second.
+    F = fun () ->
+                case filelib:is_dir(Path) of
+                    true ->
+                        file:del_dir(Path);
+                    false ->
+                        file:make_dir(Path)
+                end,
+                Path
+        end,
+    try
+        %% Fails the first time
+        ?assertThrow(
+           {error, eexist},
+           mkdtemp_n(F, 1)),
+        %% Reset state
+        file:del_dir(Path),
+        %% Succeeds the second time
+        ?assertEqual(
+           Path,
+           mkdtemp_n(F, 2))
+    after rmtempdir(D)
+    end,
+    ok.
+
+mkdtemp_n_fail_test() ->
+    {ok, Cwd} = file:get_cwd(),
+    ?assertThrow(
+       {error, eexist},
+       mkdtemp_n(fun () -> Cwd end, 1)),
+    ?assertThrow(
+       {error, eexist},
+       mkdtemp_n(fun () -> Cwd end, 2)),
+    ok.
+
+make_dir_fail_test() ->
+    {ok, Cwd} = file:get_cwd(),
+    ?assertThrow(
+      {error, eexist},
+      make_dir(Cwd)),
+    ok.
+
+mkdtemp_test() ->
+    crypto:start(),
+    D = mkdtemp(),
+    ?assertEqual(
+       true,
+       filelib:is_dir(D)),
+    ?assertEqual(
+       ok,
+       file:del_dir(D)),
+    ok.
+
+rmtempdir_test() ->
+    crypto:start(),
+    D1 = mkdtemp(),
+    ?assertEqual(
+       true,
+       filelib:is_dir(D1)),
+    ?assertEqual(
+       ok,
+       rmtempdir(D1)),
+    D2 = mkdtemp(),
+    ?assertEqual(
+       true,
+       filelib:is_dir(D2)),
+    ok = file:write_file(filename:join([D2, "foo"]), <<"bytes">>),
+    D3 = mkdtemp("suffix", "prefix", D2),
+    ?assertEqual(
+       true,
+       filelib:is_dir(D3)),
+    ok = file:write_file(filename:join([D3, "foo"]), <<"bytes">>),
+    ?assertEqual(
+       ok,
+       rmtempdir(D2)),
+    ?assertEqual(
+       {error, enoent},
+       file:consult(D3)),
+    ?assertEqual(
+       {error, enoent},
+       file:consult(D2)),
+    ok.
+
+gettempdir_env_test() ->
+    Env = pushenv(["TMPDIR", "TEMP", "TMP"]),
+    FalseEnv = [{"TMPDIR", false}, {"TEMP", false}, {"TMP", false}],
+    try
+        popenv(FalseEnv),
+        popenv([{"TMPDIR", "/"}]),
+        ?assertEqual(
+           "/",
+           os:getenv("TMPDIR")),
+        ?assertEqual(
+           "/",
+           gettempdir()),
+        {ok, Cwd} = file:get_cwd(),
+        popenv(FalseEnv),
+        popenv([{"TMP", Cwd}]),
+        ?assertEqual(
+           normalize_dir(Cwd),
+           gettempdir())
+    after popenv(Env)
+    end,
+    ok.
+
+-endif.

src/mochiutf8.erl

+%% @copyright 2010 Mochi Media, Inc.
+%% @author Bob Ippolito <bob@mochimedia.com>
+
+%% @doc Algorithm to convert any binary to a valid UTF-8 sequence by ignoring
+%%      invalid bytes.
+
+-module(mochiutf8).
+-export([valid_utf8_bytes/1, codepoint_to_bytes/1, bytes_to_codepoints/1]).
+-export([bytes_foldl/3, codepoint_foldl/3, read_codepoint/1, len/1]).
+
+%% External API
+
+-type unichar_low() :: 0..16#d7ff.
+-type unichar_high() :: 16#e000..16#10ffff.
+-type unichar() :: unichar_low() | unichar_high().
+
+-spec codepoint_to_bytes(unichar()) -> binary().
+%% @doc Convert a unicode codepoint to UTF-8 bytes.
+codepoint_to_bytes(C) when (C >= 16#00 andalso C =< 16#7f) ->
+    %% U+0000 - U+007F - 7 bits
+    <<C>>;
+codepoint_to_bytes(C) when (C >= 16#080 andalso C =< 16#07FF) ->
+    %% U+0080 - U+07FF - 11 bits
+    <<0:5, B1:5, B0:6>> = <<C:16>>,
+    <<2#110:3, B1:5,
+      2#10:2, B0:6>>;
+codepoint_to_bytes(C) when (C >= 16#0800 andalso C =< 16#FFFF) andalso
+                           (C < 16#D800 orelse C > 16#DFFF) ->
+    %% U+0800 - U+FFFF - 16 bits (excluding UTC-16 surrogate code points)
+    <<B2:4, B1:6, B0:6>> = <<C:16>>,
+    <<2#1110:4, B2:4,
+      2#10:2, B1:6,
+      2#10:2, B0:6>>;
+codepoint_to_bytes(C) when (C >= 16#010000 andalso C =< 16#10FFFF) ->
+    %% U+10000 - U+10FFFF - 21 bits
+    <<0:3, B3:3, B2:6, B1:6, B0:6>> = <<C:24>>,
+    <<2#11110:5, B3:3,
+      2#10:2, B2:6,
+      2#10:2, B1:6,
+      2#10:2, B0:6>>.
+
+-spec codepoints_to_bytes([unichar()]) -> binary().
+%% @doc Convert a list of codepoints to a UTF-8 binary.
+codepoints_to_bytes(L) ->
+    <<<<(codepoint_to_bytes(C))/binary>> || C <- L>>.
+
+-spec read_codepoint(binary()) -> {unichar(), binary(), binary()}.
+read_codepoint(Bin = <<2#0:1, C:7, Rest/binary>>) ->
+    %% U+0000 - U+007F - 7 bits
+    <<B:1/binary, _/binary>> = Bin,
+    {C, B, Rest};
+read_codepoint(Bin = <<2#110:3, B1:5,
+                       2#10:2, B0:6,
+                       Rest/binary>>) ->
+    %% U+0080 - U+07FF - 11 bits
+    case <<B1:5, B0:6>> of
+        <<C:11>> when C >= 16#80 ->
+            <<B:2/binary, _/binary>> = Bin,
+            {C, B, Rest}
+    end;
+read_codepoint(Bin = <<2#1110:4, B2:4,
+                       2#10:2, B1:6,
+                       2#10:2, B0:6,
+                       Rest/binary>>) ->
+    %% U+0800 - U+FFFF - 16 bits (excluding UTC-16 surrogate code points)
+    case <<B2:4, B1:6, B0:6>> of
+        <<C:16>> when (C >= 16#0800 andalso C =< 16#FFFF) andalso
+                      (C < 16#D800 orelse C > 16#DFFF) ->
+            <<B:3/binary, _/binary>> = Bin,
+            {C, B, Rest}
+    end;
+read_codepoint(Bin = <<2#11110:5, B3:3,
+                       2#10:2, B2:6,
+                       2#10:2, B1:6,
+                       2#10:2, B0:6,
+                       Rest/binary>>) ->
+    %% U+10000 - U+10FFFF - 21 bits
+    case <<B3:3, B2:6, B1:6, B0:6>> of
+        <<C:21>> when (C >= 16#010000 andalso C =< 16#10FFFF) ->
+            <<B:4/binary, _/binary>> = Bin,
+            {C, B, Rest}
+    end.
+
+-spec codepoint_foldl(fun((unichar(), _) -> _), _, binary()) -> _.
+codepoint_foldl(F, Acc, <<>>) when is_function(F, 2) ->
+    Acc;
+codepoint_foldl(F, Acc, Bin) ->
+    {C, _, Rest} = read_codepoint(Bin),
+    codepoint_foldl(F, F(C, Acc), Rest).
+
+-spec bytes_foldl(fun((binary(), _) -> _), _, binary()) -> _.
+bytes_foldl(F, Acc, <<>>) when is_function(F, 2) ->
+    Acc;
+bytes_foldl(F, Acc, Bin) ->
+    {_, B, Rest} = read_codepoint(Bin),
+    bytes_foldl(F, F(B, Acc), Rest).
+
+-spec bytes_to_codepoints(binary()) -> [unichar()].
+bytes_to_codepoints(B) ->
+    lists:reverse(codepoint_foldl(fun (C, Acc) -> [C | Acc] end, [], B)).
+
+-spec len(binary()) -> non_neg_integer().
+len(<<>>) ->
+    0;
+len(B) ->
+    {_, _, Rest} = read_codepoint(B),
+    1 + len(Rest).
+
+-spec valid_utf8_bytes(B::binary()) -> binary().
+%% @doc Return only the bytes in B that represent valid UTF-8. Uses
+%%      the following recursive algorithm: skip one byte if B does not
+%%      follow UTF-8 syntax (a 1-4 byte encoding of some number),
+%%      skip sequence of 2-4 bytes if it represents an overlong encoding
+%%      or bad code point (surrogate U+D800 - U+DFFF or > U+10FFFF).
+valid_utf8_bytes(B) when is_binary(B) ->
+    binary_skip_bytes(B, invalid_utf8_indexes(B)).
+
+%% Internal API
+
+-spec binary_skip_bytes(binary(), [non_neg_integer()]) -> binary().
+%% @doc Return B, but skipping the 0-based indexes in L.
+binary_skip_bytes(B, []) ->
+    B;
+binary_skip_bytes(B, L) ->
+    binary_skip_bytes(B, L, 0, []).
+
+%% @private
+-spec binary_skip_bytes(binary(), [non_neg_integer()], non_neg_integer(), iolist()) -> binary().
+binary_skip_bytes(B, [], _N, Acc) ->
+    iolist_to_binary(lists:reverse([B | Acc]));
+binary_skip_bytes(<<_, RestB/binary>>, [N | RestL], N, Acc) ->
+    binary_skip_bytes(RestB, RestL, 1 + N, Acc);
+binary_skip_bytes(<<C, RestB/binary>>, L, N, Acc) ->
+    binary_skip_bytes(RestB, L, 1 + N, [C | Acc]).
+
+-spec invalid_utf8_indexes(binary()) -> [non_neg_integer()].
+%% @doc Return the 0-based indexes in B that are not valid UTF-8.
+invalid_utf8_indexes(B) ->
+    invalid_utf8_indexes(B, 0, []).
+
+%% @private.
+-spec invalid_utf8_indexes(binary(), non_neg_integer(), [non_neg_integer()]) -> [non_neg_integer()].
+invalid_utf8_indexes(<<C, Rest/binary>>, N, Acc) when C < 16#80 ->
+    %% U+0000 - U+007F - 7 bits
+    invalid_utf8_indexes(Rest, 1 + N, Acc);
+invalid_utf8_indexes(<<C1, C2, Rest/binary>>, N, Acc)
+  when C1 band 16#E0 =:= 16#C0,
+       C2 band 16#C0 =:= 16#80 ->
+    %% U+0080 - U+07FF - 11 bits
+    case ((C1 band 16#1F) bsl 6) bor (C2 band 16#3F) of
+	C when C < 16#80 ->
+            %% Overlong encoding.
+            invalid_utf8_indexes(Rest, 2 + N, [1 + N, N | Acc]);
+        _ ->
+            %% Upper bound U+07FF does not need to be checked
+            invalid_utf8_indexes(Rest, 2 + N, Acc)
+    end;
+invalid_utf8_indexes(<<C1, C2, C3, Rest/binary>>, N, Acc)
+  when C1 band 16#F0 =:= 16#E0,
+       C2 band 16#C0 =:= 16#80,
+       C3 band 16#C0 =:= 16#80 ->
+    %% U+0800 - U+FFFF - 16 bits
+    case ((((C1 band 16#0F) bsl 6) bor (C2 band 16#3F)) bsl 6) bor
+	(C3 band 16#3F) of
+	C when (C < 16#800) orelse (C >= 16#D800 andalso C =< 16#DFFF) ->
+	    %% Overlong encoding or surrogate.
+            invalid_utf8_indexes(Rest, 3 + N, [2 + N, 1 + N, N | Acc]);
+	_ ->
+            %% Upper bound U+FFFF does not need to be checked
+	    invalid_utf8_indexes(Rest, 3 + N, Acc)
+    end;
+invalid_utf8_indexes(<<C1, C2, C3, C4, Rest/binary>>, N, Acc)
+  when C1 band 16#F8 =:= 16#F0,
+       C2 band 16#C0 =:= 16#80,
+       C3 band 16#C0 =:= 16#80,
+       C4 band 16#C0 =:= 16#80 ->
+    %% U+10000 - U+10FFFF - 21 bits
+    case ((((((C1 band 16#0F) bsl 6) bor (C2 band 16#3F)) bsl 6) bor
+           (C3 band 16#3F)) bsl 6) bor (C4 band 16#3F) of
+	C when (C < 16#10000) orelse (C > 16#10FFFF) ->
+	    %% Overlong encoding or invalid code point.
+	    invalid_utf8_indexes(Rest, 4 + N, [3 + N, 2 + N, 1 + N, N | Acc]);
+	_ ->
+	    invalid_utf8_indexes(Rest, 4 + N, Acc)
+    end;
+invalid_utf8_indexes(<<_, Rest/binary>>, N, Acc) ->
+    %% Invalid char
+    invalid_utf8_indexes(Rest, 1 + N, [N | Acc]);
+invalid_utf8_indexes(<<>>, _N, Acc) ->
+    lists:reverse(Acc).
+
+%%
+%% Tests
+%%
+-include_lib("eunit/include/eunit.hrl").
+-ifdef(TEST).
+
+binary_skip_bytes_test() ->
+    ?assertEqual(<<"foo">>,
+                 binary_skip_bytes(<<"foo">>, [])),
+    ?assertEqual(<<"foobar">>,
+                 binary_skip_bytes(<<"foo bar">>, [3])),
+    ?assertEqual(<<"foo">>,
+                 binary_skip_bytes(<<"foo bar">>, [3, 4, 5, 6])),
+    ?assertEqual(<<"oo bar">>,
+                 binary_skip_bytes(<<"foo bar">>, [0])),
+    ok.
+
+invalid_utf8_indexes_test() ->
+    ?assertEqual(
+       [],
+       invalid_utf8_indexes(<<"unicode snowman for you: ", 226, 152, 131>>)),
+    ?assertEqual(
+       [0],
+       invalid_utf8_indexes(<<128>>)),
+    ?assertEqual(
+       [57,59,60,64,66,67],
+       invalid_utf8_indexes(<<"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; (",
+                              167, 65, 170, 186, 73, 83, 80, 166, 87, 186, 217, 41, 41>>)),
+    ok.
+
+codepoint_to_bytes_test() ->
+    %% U+0000 - U+007F - 7 bits
+    %% U+0080 - U+07FF - 11 bits
+    %% U+0800 - U+FFFF - 16 bits (excluding UTC-16 surrogate code points)
+    %% U+10000 - U+10FFFF - 21 bits
+    ?assertEqual(
+       <<"a">>,
+       codepoint_to_bytes($a)),
+    ?assertEqual(
+       <<16#c2, 16#80>>,
+       codepoint_to_bytes(16#80)),
+    ?assertEqual(
+       <<16#df, 16#bf>>,
+       codepoint_to_bytes(16#07ff)),
+    ?assertEqual(
+       <<16#ef, 16#bf, 16#bf>>,
+       codepoint_to_bytes(16#ffff)),
+    ?assertEqual(
+       <<16#f4, 16#8f, 16#bf, 16#bf>>,
+       codepoint_to_bytes(16#10ffff)),
+    ok.
+
+bytes_foldl_test() ->
+    ?assertEqual(
+       <<"abc">>,
+       bytes_foldl(fun (B, Acc) -> <<Acc/binary, B/binary>> end, <<>>, <<"abc">>)),
+    ?assertEqual(
+       <<"abc", 226, 152, 131, 228, 184, 173, 194, 133, 244,143,191,191>>,
+       bytes_foldl(fun (B, Acc) -> <<Acc/binary, B/binary>> end, <<>>,
+                   <<"abc", 226, 152, 131, 228, 184, 173, 194, 133, 244,143,191,191>>)),
+    ok.
+
+bytes_to_codepoints_test() ->
+    ?assertEqual(
+       "abc" ++ [16#2603, 16#4e2d, 16#85, 16#10ffff],
+       bytes_to_codepoints(<<"abc", 226, 152, 131, 228, 184, 173, 194, 133, 244,143,191,191>>)),
+    ok.
+
+codepoint_foldl_test() ->
+    ?assertEqual(
+       "cba",
+       codepoint_foldl(fun (C, Acc) -> [C | Acc] end, [], <<"abc">>)),
+    ?assertEqual(
+       [16#10ffff, 16#85, 16#4e2d, 16#2603 | "cba"],
+       codepoint_foldl(fun (C, Acc) -> [C | Acc] end, [],
+                       <<"abc", 226, 152, 131, 228, 184, 173, 194, 133, 244,143,191,191>>)),
+    ok.
+
+len_test() ->
+    ?assertEqual(
+       29,
+       len(<<"unicode snowman for you: ", 226, 152, 131, 228, 184, 173, 194, 133, 244, 143, 191, 191>>)),
+    ok.
+
+codepoints_to_bytes_test() ->
+    ?assertEqual(
+       iolist_to_binary(lists:map(fun codepoint_to_bytes/1, lists:seq(1, 1000))),
+       codepoints_to_bytes(lists:seq(1, 1000))),
+    ok.
+
+valid_utf8_bytes_test() ->
+    ?assertEqual(
+       <<"invalid U+11ffff: ">>,
+       valid_utf8_bytes(<<"invalid U+11ffff: ", 244, 159, 191, 191>>)),
+    ?assertEqual(
+       <<"U+10ffff: ", 244, 143, 191, 191>>,
+       valid_utf8_bytes(<<"U+10ffff: ", 244, 143, 191, 191>>)),
+    ?assertEqual(
+       <<"overlong 2-byte encoding (a): ">>,
+       valid_utf8_bytes(<<"overlong 2-byte encoding (a): ", 2#11000001, 2#10100001>>)),
+    ?assertEqual(
+       <<"overlong 2-byte encoding (!): ">>,
+       valid_utf8_bytes(<<"overlong 2-byte encoding (!): ", 2#11000000, 2#10100001>>)),
+    ?assertEqual(
+       <<"mu: ", 194, 181>>,
+       valid_utf8_bytes(<<"mu: ", 194, 181>>)),
+    ?assertEqual(
+       <<"bad coding bytes: ">>,
+       valid_utf8_bytes(<<"bad coding bytes: ", 2#10011111, 2#10111111, 2#11111111>>)),
+    ?assertEqual(
+       <<"low surrogate (unpaired): ">>,
+       valid_utf8_bytes(<<"low surrogate (unpaired): ", 237, 176, 128>>)),
+    ?assertEqual(
+       <<"high surrogate (unpaired): ">>,
+       valid_utf8_bytes(<<"high surrogate (unpaired): ", 237, 191, 191>>)),
+    ?assertEqual(
+       <<"unicode snowman for you: ", 226, 152, 131>>,
+       valid_utf8_bytes(<<"unicode snowman for you: ", 226, 152, 131>>)),
+    ?assertEqual(
+       <<"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; (AISPW))">>,
+       valid_utf8_bytes(<<"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; (",
+                          167, 65, 170, 186, 73, 83, 80, 166, 87, 186, 217, 41, 41>>)),
+    ok.
+
+-endif.
 -include_lib("eunit/include/eunit.hrl").
 -ifdef(TEST).
 
+-record(treq, {path, body= <<>>, xreply= <<>>}).
+
+ssl_cert_opts() ->
+    EbinDir = filename:dirname(code:which(?MODULE)),
+    CertDir = filename:join([EbinDir, "..", "support", "test-materials"]),
+    CertFile = filename:join(CertDir, "test_ssl_cert.pem"),
+    KeyFile = filename:join(CertDir, "test_ssl_key.pem"),
+    [{certfile, CertFile}, {keyfile, KeyFile}].
+
+with_server(Transport, ServerFun, ClientFun) ->
+    ServerOpts0 = [{ip, "127.0.0.1"}, {port, 0}, {loop, ServerFun}],
+    ServerOpts = case Transport of
+        plain ->
+            ServerOpts0;
+        ssl ->
+            ServerOpts0 ++ [{ssl, true}, {ssl_opts, ssl_cert_opts()}]
+    end,
+    {ok, Server} = mochiweb_http:start(ServerOpts),
+    Port = mochiweb_socket_server:get(Server, port),
+    Res = (catch ClientFun(Transport, Port)),
+    mochiweb_http:stop(Server),
+    Res.
+
 request_test() ->
     R = mochiweb_request:new(z, z, "/foo/bar/baz%20wibble+quux?qs=2", z, []),
     "/foo/bar/baz wibble quux" = R:get(path),
     ok.
 
+single_http_GET_test() ->
+    do_GET(plain, 1).
+
+single_https_GET_test() ->
+    do_GET(ssl, 1).
+
+multiple_http_GET_test() ->
+    do_GET(plain, 3).
+
+multiple_https_GET_test() ->
+    do_GET(ssl, 3).
+
+hundred_http_GET_test() ->
+    do_GET(plain, 100).
+
+hundred_https_GET_test() ->
+    do_GET(ssl, 100).
+
+single_128_http_POST_test() ->
+    do_POST(plain, 128, 1).
+
+single_128_https_POST_test() ->
+    do_POST(ssl, 128, 1).
+
+single_2k_http_POST_test() ->
+    do_POST(plain, 2048, 1).
+
+single_2k_https_POST_test() ->
+    do_POST(ssl, 2048, 1).
+
+single_100k_http_POST_test() ->
+    do_POST(plain, 102400, 1).
+
+single_100k_https_POST_test() ->
+    do_POST(ssl, 102400, 1).
+
+multiple_100k_http_POST_test() ->
+    do_POST(plain, 102400, 3).
+
+multiple_100K_https_POST_test() ->
+    do_POST(ssl, 102400, 3).
+
+hundred_128_http_POST_test() ->
+    do_POST(plain, 128, 100).
+
+hundred_128_https_POST_test() ->
+    do_POST(ssl, 128, 100).
+
+do_GET(Transport, Times) ->
+    PathPrefix = "/whatever/",
+    ReplyPrefix = "You requested: ",
+    ServerFun = fun (Req) ->
+                        Reply = ReplyPrefix ++ Req:get(path),
+                        Req:ok({"text/plain", Reply})
+                end,
+    TestReqs = [begin
+                    Path = PathPrefix ++ integer_to_list(N),
+                    ExpectedReply = list_to_binary(ReplyPrefix ++ Path),
+                    #treq{path=Path, xreply=ExpectedReply}
+                end || N <- lists:seq(1, Times)],
+    ClientFun = new_client_fun('GET', TestReqs),
+    ok = with_server(Transport, ServerFun, ClientFun),
+    ok.
+
+do_POST(Transport, Size, Times) ->
+    ServerFun = fun (Req) ->
+                        Body = Req:recv_body(),
+                        Headers = [{"Content-Type", "application/octet-stream"}],
+                        Req:respond({201, Headers, Body})
+                end,
+    TestReqs = [begin
+                    Path = "/stuff/" ++ integer_to_list(N),
+                    Body = crypto:rand_bytes(Size),
+                    #treq{path=Path, body=Body, xreply=Body}
+                end || N <- lists:seq(1, Times)],
+    ClientFun = new_client_fun('POST', TestReqs),
+    ok = with_server(Transport, ServerFun, ClientFun),
+    ok.
+
+new_client_fun(Method, TestReqs) ->
+    fun (Transport, Port) ->
+            client_request(Transport, Port, Method, TestReqs)
+    end.
+
+client_request(Transport, Port, Method, TestReqs) ->
+    Opts = [binary, {active, false}, {packet, http}],
+    SockFun = case Transport of
+        plain ->
+            {ok, Socket} = gen_tcp:connect("127.0.0.1", Port, Opts),
+            fun (recv) ->
+                    gen_tcp:recv(Socket, 0);
+                ({recv, Length}) ->
+                    gen_tcp:recv(Socket, Length);
+                ({send, Data}) ->
+                    gen_tcp:send(Socket, Data);
+                ({setopts, L}) ->
+                    inet:setopts(Socket, L)
+            end;
+        ssl ->
+            {ok, Socket} = ssl:connect("127.0.0.1", Port, [{ssl_imp, new} | Opts]),
+            fun (recv) ->
+                    ssl:recv(Socket, 0);
+                ({recv, Length}) ->
+                    ssl:recv(Socket, Length);
+                ({send, Data}) ->
+                    ssl:send(Socket, Data);
+                ({setopts, L}) ->
+                    ssl:setopts(Socket, L)
+            end
+    end,
+    client_request(SockFun, Method, TestReqs).
+
+client_request(SockFun, _Method, []) ->
+    {the_end, {error, closed}} = {the_end, SockFun(recv)},
+    ok;
+client_request(SockFun, Method,
+               [#treq{path=Path, body=Body, xreply=ExReply} | Rest]) ->
+    Request = [atom_to_list(Method), " ", Path, " HTTP/1.1\r\n",
+               client_headers(Body, Rest =:= []),
+               "\r\n",
+               Body],
+    ok = SockFun({send, Request}),
+    case Method of
+        'GET' ->
+            {ok, {http_response, {1,1}, 200, "OK"}} = SockFun(recv);
+        'POST' ->
+            {ok, {http_response, {1,1}, 201, "Created"}} = SockFun(recv)
+    end,
+    ok = SockFun({setopts, [{packet, httph}]}),
+    {ok, {http_header, _, 'Server', _, "MochiWeb" ++ _}} = SockFun(recv),
+    {ok, {http_header, _, 'Date', _, _}} = SockFun(recv),
+    {ok, {http_header, _, 'Content-Type', _, _}} = SockFun(recv),
+    {ok, {http_header, _, 'Content-Length', _, ConLenStr}} = SockFun(recv),
+    ContentLength = list_to_integer(ConLenStr),
+    {ok, http_eoh} = SockFun(recv),
+    ok = SockFun({setopts, [{packet, raw}]}),
+    {payload, ExReply} = {payload, drain_reply(SockFun, ContentLength, <<>>)},
+    ok = SockFun({setopts, [{packet, http}]}),
+    client_request(SockFun, Method, Rest).
+
+client_headers(Body, IsLastRequest) ->
+    ["Host: localhost\r\n",
+     case Body of
+        <<>> ->
+            "";
+        _ ->
+            ["Content-Type: application/octet-stream\r\n",
+             "Content-Length: ", integer_to_list(byte_size(Body)), "\r\n"]
+     end,
+     case IsLastRequest of
+         true ->
+             "Connection: close\r\n";
+         false ->
+             ""
+     end].
+
+drain_reply(_SockFun, 0, Acc) ->
+    Acc;
+drain_reply(SockFun, Length, Acc) ->
+    Sz = erlang:min(Length, 1024),
+    {ok, B} = SockFun({recv, Sz}),
+    drain_reply(SockFun, Length - Sz, <<Acc/bytes, B/bytes>>).
+
 -endif.

src/mochiweb_acceptor.erl

+%% @author Bob Ippolito <bob@mochimedia.com>
+%% @copyright 2010 Mochi Media, Inc.
+
+%% @doc MochiWeb acceptor.
+
+-module(mochiweb_acceptor).
+-author('bob@mochimedia.com').
+
+-include("internal.hrl").
+
+-export([start_link/3, init/3]).
+
+start_link(Server, Listen, Loop) ->
+    proc_lib:spawn_link(?MODULE, init, [Server, Listen, Loop]).
+
+init(Server, Listen, Loop) ->
+    T1 = now(),
+    case catch mochiweb_socket:accept(Listen) of
+        {ok, Socket} ->
+            gen_server:cast(Server, {accepted, self(), timer:now_diff(now(), T1)}),
+            call_loop(Loop, Socket);
+        {error, closed} ->
+            exit(normal);
+        {error, timeout} ->
+            exit(normal);
+        {error, esslaccept} ->
+            exit(normal);
+        Other ->
+            error_logger:error_report(
+              [{application, mochiweb},
+               "Accept failed error",
+               lists:flatten(io_lib:format("~p", [Other]))]),
+            exit({error, accept_failed})
+    end.
+
+call_loop({M, F}, Socket) ->
+    M:F(Socket);
+call_loop({M, F, A}, Socket) ->
+    erlang:apply(M, F, [Socket | A]);
+call_loop(Loop, Socket) ->
+    Loop(Socket).
+
+%%
+%% Tests
+%%
+-include_lib("eunit/include/eunit.hrl").
+-ifdef(TEST).
+-endif.

src/mochiweb_echo.erl

                                   {loop, {?MODULE, loop}}]).
 
 loop(Socket) ->
-    case gen_tcp:recv(Socket, 0, 30000) of
+    case mochiweb_socket:recv(Socket, 0, 30000) of
         {ok, Data} ->
-            case gen_tcp:send(Socket, Data) of
+            case mochiweb_socket:send(Socket, Data) of
                 ok ->
                     loop(Socket);
                 _ ->

src/mochiweb_html.erl

 -define(IS_LITERAL_SAFE(C),
         ((C >= $A andalso C =< $Z) orelse (C >= $a andalso C =< $z)
          orelse (C >= $0 andalso C =< $9))).
+-define(PROBABLE_CLOSE(C),
+        (C =:= $> orelse ?IS_WHITESPACE(C))).
 
 -record(decoder, {line=1,
                   column=1,
 to_tokens(T={comment, _}) ->
     [T];
 to_tokens({Tag0, Acc}) ->
+    %% This is only allowed in sub-tags: {p, [{"class", "foo"}]}
     to_tokens({Tag0, [], Acc});
 to_tokens({Tag0, Attrs, Acc}) ->
     Tag = to_tag(Tag0),
 to_tokens([{Tag0, [T0={comment, _C0} | R1]} | Rest], Acc) ->
     %% Allow {comment, iolist()}
     to_tokens([{Tag0, R1} | Rest], [T0 | Acc]);
+to_tokens([{Tag0, [T0={pi, _S0, _A0} | R1]} | Rest], Acc) ->
+    %% Allow {pi, binary(), list()}
+    to_tokens([{Tag0, R1} | Rest], [T0 | Acc]);
 to_tokens([{Tag0, [{T0, A0=[{_, _} | _]} | R1]} | Rest], Acc) ->
     %% Allow {p, [{"class", "foo"}]}
     to_tokens([{Tag0, [{T0, A0, []} | R1]} | Rest], Acc);
             {{end_tag, Tag}, S2};
         <<_:O/binary, "<", C, _/binary>> when ?IS_WHITESPACE(C) ->
             %% This isn't really strict HTML
-            tokenize_data(B, ?INC_COL(S));
+            {{data, Data, _Whitespace}, S1} = tokenize_data(B, ?INC_COL(S)),
+            {{data, <<$<, Data/binary>>, false}, S1};
         <<_:O/binary, "<", _/binary>> ->
             {Tag, S1} = tokenize_literal(B, ?INC_COL(S)),
             {Attrs, S2} = tokenize_attributes(B, S1),
     case Bin of
         <<_:O/binary, "?>", _/binary>> ->
             ?ADV_COL(S, 2);
-        <<_:O/binary, C, _/binary>> ->
-            find_qgt(Bin, ?INC_CHAR(S, C));
-        _ ->
+        %% tokenize_attributes takes care of this state:
+        %% <<_:O/binary, C, _/binary>> ->
+        %%     find_qgt(Bin, ?INC_CHAR(S, C));
+        <<_:O/binary>> ->
             S
     end.
 
                            <<_:Start1/binary, R:Len1/binary, _/binary>> = Bin,
                            R;
                        Unichar ->
-                           list_to_binary(xmerl_ucs:to_utf8(Unichar))
+                           mochiutf8:codepoint_to_bytes(Unichar)
                    end,
             {{data, Data, false}, ?INC_COL(S)};
         _ ->
 
 tokenize_word_or_literal(Bin, S=#decoder{offset=O}) ->
     case Bin of
-        <<_:O/binary, C, _/binary>> when ?IS_WHITESPACE(C) ->
-            {error, {whitespace, [C], S}};
         <<_:O/binary, C, _/binary>> when C =:= ?QUOTE orelse C =:= ?SQUOTE ->
             tokenize_word(Bin, ?INC_COL(S), C);
-        _ ->
+        <<_:O/binary, C, _/binary>> when not ?IS_WHITESPACE(C) ->
+            %% Sanity check for whitespace
             tokenize_literal(Bin, S, [])
     end.
 
 tokenize_script(Bin, S=#decoder{offset=O}, Start) ->
     case Bin of
         %% Just a look-ahead, we want the end_tag separately
-        <<_:O/binary, $<, $/, SS, CC, RR, II, PP, TT, _/binary>>
+        <<_:O/binary, $<, $/, SS, CC, RR, II, PP, TT, ZZ, _/binary>>
         when (SS =:= $s orelse SS =:= $S) andalso
              (CC =:= $c orelse CC =:= $C) andalso
              (RR =:= $r orelse RR =:= $R) andalso
              (II =:= $i orelse II =:= $I) andalso
              (PP =:= $p orelse PP =:= $P) andalso
-             (TT=:= $t orelse TT =:= $T) ->
+             (TT=:= $t orelse TT =:= $T) andalso
+             ?PROBABLE_CLOSE(ZZ) ->
             Len = O - Start,
             <<_:Start/binary, Raw:Len/binary, _/binary>> = Bin,
             {{data, Raw, false}, S};
 tokenize_textarea(Bin, S=#decoder{offset=O}, Start) ->
     case Bin of
         %% Just a look-ahead, we want the end_tag separately
-        <<_:O/binary, $<, $/, TT, EE, XX, TT2, AA, RR, EE2, AA2, _/binary>>
+        <<_:O/binary, $<, $/, TT, EE, XX, TT2, AA, RR, EE2, AA2, ZZ, _/binary>>
         when (TT =:= $t orelse TT =:= $T) andalso
              (EE =:= $e orelse EE =:= $E) andalso
              (XX =:= $x orelse XX =:= $X) andalso
              (AA =:= $a orelse AA =:= $A) andalso
              (RR =:= $r orelse RR =:= $R) andalso
              (EE2 =:= $e orelse EE2 =:= $E) andalso
-             (AA2 =:= $a orelse AA2 =:= $A) ->
+             (AA2 =:= $a orelse AA2 =:= $A) andalso
+             ?PROBABLE_CLOSE(ZZ) ->
             Len = O - Start,
             <<_:Start/binary, Raw:Len/binary, _/binary>> = Bin,
             {{data, Raw, false}, S};
 -ifdef(TEST).
 
 to_html_test() ->
-    Expect = <<"<html><head><title>hey!</title></head><body><p class=\"foo\">what's up<br /></p><div>sucka</div><!-- comment! --></body></html>">>,
-    Expect = iolist_to_binary(
-               to_html({html, [],
-                        [{<<"head">>, [],
-                          [{title, <<"hey!">>}]},
-                         {body, [],
-                          [{p, [{class, foo}], [<<"what's">>, <<" up">>, {br}]},
-                           {'div', <<"sucka">>},
-                           {comment, <<" comment! ">>}]}]})),
-    Expect1 = <<"<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">">>,
-    Expect1 = iolist_to_binary(
-                to_html({doctype,
-                         [<<"html">>, <<"PUBLIC">>,
-                          <<"-//W3C//DTD XHTML 1.0 Transitional//EN">>,
-                          <<"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">>]})),
+    ?assertEqual(
+       <<"<html><head><title>hey!</title></head><body><p class=\"foo\">what's up<br /></p><div>sucka</div>RAW!<!-- comment! --></body></html>">>,
+       iolist_to_binary(
+         to_html({html, [],
+                  [{<<"head">>, [],
+                    [{title, <<"hey!">>}]},
+                   {body, [],
+                    [{p, [{class, foo}], [<<"what's">>, <<" up">>, {br}]},
+                     {'div', <<"sucka">>},
+                     {'=', <<"RAW!">>},
+                     {comment, <<" comment! ">>}]}]}))),
+    ?assertEqual(
+       <<"<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">">>,
+       iolist_to_binary(
+         to_html({doctype,
+                  [<<"html">>, <<"PUBLIC">>,
+                   <<"-//W3C//DTD XHTML 1.0 Transitional//EN">>,
+                   <<"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">>]}))),
+    ?assertEqual(
+       <<"<html><?xml:namespace prefix=\"o\" ns=\"urn:schemas-microsoft-com:office:office\"?></html>">>,
+       iolist_to_binary(
+         to_html({<<"html">>,[],
+                  [{pi, <<"xml:namespace">>,
+                    [{<<"prefix">>,<<"o">>},
+                     {<<"ns">>,<<"urn:schemas-microsoft-com:office:office">>}]}]}))),
     ok.
 
 escape_test() ->
-    <<"&amp;quot;\"word &lt;&lt;up!&amp;quot;">> =
-        escape(<<"&quot;\"word <<up!&quot;">>),
+    ?assertEqual(
+       <<"&amp;quot;\"word &gt;&lt;&lt;up!&amp;quot;">>,
+       escape(<<"&quot;\"word ><<up!&quot;">>)),
+    ?assertEqual(
+       <<"&amp;quot;\"word &gt;&lt;&lt;up!&amp;quot;">>,
+       escape("&quot;\"word ><<up!&quot;")),
+    ?assertEqual(
+       <<"&amp;quot;\"word &gt;&lt;&lt;up!&amp;quot;">>,
+       escape('&quot;\"word ><<up!&quot;')),
     ok.
 
 escape_attr_test() ->
-    <<"&amp;quot;&quot;word &lt;&lt;up!&amp;quot;">> =
-        escape_attr(<<"&quot;\"word <<up!&quot;">>),
+    ?assertEqual(
+       <<"&amp;quot;&quot;word &gt;&lt;&lt;up!&amp;quot;">>,
+       escape_attr(<<"&quot;\"word ><<up!&quot;">>)),
+    ?assertEqual(
+       <<"&amp;quot;&quot;word &gt;&lt;&lt;up!&amp;quot;">>,
+       escape_attr("&quot;\"word ><<up!&quot;")),
+    ?assertEqual(
+       <<"&amp;quot;&quot;word &gt;&lt;&lt;up!&amp;quot;">>,
+       escape_attr('&quot;\"word ><<up!&quot;')),
+    ?assertEqual(
+       <<"12345">>,
+       escape_attr(12345)),
+    ?assertEqual(
+       <<"1.5">>,
+       escape_attr(1.5)),
     ok.
 
 tokens_test() ->
-    [{start_tag, <<"foo">>, [{<<"bar">>, <<"baz">>},
-                             {<<"wibble">>, <<"wibble">>},
-                             {<<"alice">>, <<"bob">>}], true}] =
-        tokens(<<"<foo bar=baz wibble='wibble' alice=\"bob\"/>">>),
-    [{start_tag, <<"foo">>, [{<<"bar">>, <<"baz">>},
-                             {<<"wibble">>, <<"wibble">>},
-                             {<<"alice">>, <<"bob">>}], true}] =
-        tokens(<<"<foo bar=baz wibble='wibble' alice=bob/>">>),
-    [{comment, <<"[if lt IE 7]>\n<style type=\"text/css\">\n.no_ie { display: none; }\n</style>\n<![endif]">>}] =
-        tokens(<<"<!--[if lt IE 7]>\n<style type=\"text/css\">\n.no_ie { display: none; }\n</style>\n<![endif]-->">>),
-    [{start_tag, <<"script">>, [{<<"type">>, <<"text/javascript">>}], false},
-     {data, <<" A= B <= C ">>, false},
-     {end_tag, <<"script">>}] =
-        tokens(<<"<script type=\"text/javascript\"> A= B <= C </script>">>),
-    [{start_tag, <<"script">>, [{<<"type">>, <<"text/javascript">>}], false},
-     {data, <<" A= B <= C ">>, false},
-     {end_tag, <<"script">>}] =
-        tokens(<<"<script type =\"text/javascript\"> A= B <= C </script>">>),
-    [{start_tag, <<"script">>, [{<<"type">>, <<"text/javascript">>}], false},
-     {data, <<" A= B <= C ">>, false},
-     {end_tag, <<"script">>}] =
-        tokens(<<"<script type = \"text/javascript\"> A= B <= C </script>">>),
-    [{start_tag, <<"script">>, [{<<"type">>, <<"text/javascript">>}], false},
-     {data, <<" A= B <= C ">>, false},
-     {end_tag, <<"script">>}] =
-        tokens(<<"<script type= \"text/javascript\"> A= B <= C </script>">>),
-    [{start_tag, <<"textarea">>, [], false},
-     {data, <<"<html></body>">>, false},
-     {end_tag, <<"textarea">>}] =
-        tokens(<<"<textarea><html></body></textarea>">>),
+    ?assertEqual(
+       [{start_tag, <<"foo">>, [{<<"bar">>, <<"baz">>},
+                                {<<"wibble">>, <<"wibble">>},
+                                {<<"alice">>, <<"bob">>}], true}],
+       tokens(<<"<foo bar=baz wibble='wibble' alice=\"bob\"/>">>)),
+    ?assertEqual(
+       [{start_tag, <<"foo">>, [{<<"bar">>, <<"baz">>},
+                                {<<"wibble">>, <<"wibble">>},
+                                {<<"alice">>, <<"bob">>}], true}],
+       tokens(<<"<foo bar=baz wibble='wibble' alice=bob/>">>)),
+    ?assertEqual(
+       [{comment, <<"[if lt IE 7]>\n<style type=\"text/css\">\n.no_ie { display: none; }\n</style>\n<![endif]">>}],
+       tokens(<<"<!--[if lt IE 7]>\n<style type=\"text/css\">\n.no_ie { display: none; }\n</style>\n<![endif]-->">>)),
+    ?assertEqual(
+       [{start_tag, <<"script">>, [{<<"type">>, <<"text/javascript">>}], false},
+        {data, <<" A= B <= C ">>, false},
+        {end_tag, <<"script">>}],
+       tokens(<<"<script type=\"text/javascript\"> A= B <= C </script>">>)),
+    ?assertEqual(
+       [{start_tag, <<"script">>, [{<<"type">>, <<"text/javascript">>}], false},
+        {data, <<" A= B <= C ">>, false},
+        {end_tag, <<"script">>}],
+       tokens(<<"<script type =\"text/javascript\"> A= B <= C </script>">>)),
+    ?assertEqual(
+       [{start_tag, <<"script">>, [{<<"type">>, <<"text/javascript">>}], false},
+        {data, <<" A= B <= C ">>, false},
+        {end_tag, <<"script">>}],
+       tokens(<<"<script type = \"text/javascript\"> A= B <= C </script>">>)),
+    ?assertEqual(
+       [{start_tag, <<"script">>, [{<<"type">>, <<"text/javascript">>}], false},
+        {data, <<" A= B <= C ">>, false},
+        {end_tag, <<"script">>}],
+       tokens(<<"<script type= \"text/javascript\"> A= B <= C </script>">>)),
+    ?assertEqual(
+       [{start_tag, <<"textarea">>, [], false},
+        {data, <<"<html></body>">>, false},
+        {end_tag, <<"textarea">>}],
+       tokens(<<"<textarea><html></body></textarea>">>)),
+    ?assertEqual(
+       [{start_tag, <<"textarea">>, [], false},
+        {data, <<"<html></body></textareaz>">>, false}],
+       tokens(<<"<textarea ><html></body></textareaz>">>)),
+    ?assertEqual(
+       [{pi, <<"xml:namespace">>,
+         [{<<"prefix">>,<<"o">>},
+          {<<"ns">>,<<"urn:schemas-microsoft-com:office:office">>}]}],
+       tokens(<<"<?xml:namespace prefix=\"o\" ns=\"urn:schemas-microsoft-com:office:office\"?>">>)),
+    ?assertEqual(
+       [{pi, <<"xml:namespace">>,
+         [{<<"prefix">>,<<"o">>},
+          {<<"ns">>,<<"urn:schemas-microsoft-com:office:office">>}]}],
+       tokens(<<"<?xml:namespace prefix=o ns=urn:schemas-microsoft-com:office:office \n?>">>)),
+    ?assertEqual(
+       [{pi, <<"xml:namespace">>,
+         [{<<"prefix">>,<<"o">>},
+          {<<"ns">>,<<"urn:schemas-microsoft-com:office:office">>}]}],
+       tokens(<<"<?xml:namespace prefix=o ns=urn:schemas-microsoft-com:office:office">>)),
+    ?assertEqual(
+       [{data, <<"<">>, false}],
+       tokens(<<"&lt;">>)),
+    ?assertEqual(
+       [{data, <<"not html ">>, false},
+        {data, <<"< at all">>, false}],
+       tokens(<<"not html < at all">>)),
     ok.
 
 parse_test() ->
  </head>
  <body id=\"home\" class=\"tundra\"><![CDATA[&lt;<this<!-- is -->CDATA>&gt;]]></body>
 </html>">>,
-    Expect = {<<"html">>, [],
-              [{<<"head">>, [],
-                [{<<"meta">>,
-                  [{<<"http-equiv">>,<<"Content-Type">>},
-                   {<<"content">>,<<"text/html; charset=UTF-8">>}],
-                  []},
-                 {<<"title">>,[],[<<"Foo">>]},
-                 {<<"link">>,
-                  [{<<"rel">>,<<"stylesheet">>},
-                   {<<"type">>,<<"text/css">>},
-                   {<<"href">>,<<"/static/rel/dojo/resources/dojo.css">>},
-                   {<<"media">>,<<"screen">>}],
-                  []},
-                 {<<"link">>,
-                  [{<<"rel">>,<<"stylesheet">>},
-                   {<<"type">>,<<"text/css">>},
-                   {<<"href">>,<<"/static/foo.css">>},
-                   {<<"media">>,<<"screen">>}],
-                  []},
-                 {comment,<<"[if lt IE 7]>\n   <style type=\"text/css\">\n     .no_ie { display: none; }\n   </style>\n   <![endif]">>},
-                 {<<"link">>,
-                  [{<<"rel">>,<<"icon">>},
-                   {<<"href">>,<<"/static/images/favicon.ico">>},
-                   {<<"type">>,<<"image/x-icon">>}],
-                  []},
-                 {<<"link">>,
-                  [{<<"rel">>,<<"shortcut icon">>},
-                   {<<"href">>,<<"/static/images/favicon.ico">>},
-                   {<<"type">>,<<"image/x-icon">>}],
-                  []}]},
-               {<<"body">>,
-                [{<<"id">>,<<"home">>},
-                 {<<"class">>,<<"tundra">>}],
-                [<<"&lt;<this<!-- is -->CDATA>&gt;">>]}]},
-    Expect = parse(D0),
+    ?assertEqual(
+       {<<"html">>, [],
+        [{<<"head">>, [],
+          [{<<"meta">>,
+            [{<<"http-equiv">>,<<"Content-Type">>},
+             {<<"content">>,<<"text/html; charset=UTF-8">>}],
+            []},
+           {<<"title">>,[],[<<"Foo">>]},
+           {<<"link">>,
+            [{<<"rel">>,<<"stylesheet">>},
+             {<<"type">>,<<"text/css">>},
+             {<<"href">>,<<"/static/rel/dojo/resources/dojo.css">>},
+             {<<"media">>,<<"screen">>}],
+            []},
+           {<<"link">>,
+            [{<<"rel">>,<<"stylesheet">>},
+             {<<"type">>,<<"text/css">>},
+             {<<"href">>,<<"/static/foo.css">>},
+             {<<"media">>,<<"screen">>}],
+            []},
+           {comment,<<"[if lt IE 7]>\n   <style type=\"text/css\">\n     .no_ie { display: none; }\n   </style>\n   <![endif]">>},
+           {<<"link">>,
+            [{<<"rel">>,<<"icon">>},
+             {<<"href">>,<<"/static/images/favicon.ico">>},
+             {<<"type">>,<<"image/x-icon">>}],
+            []},
+           {<<"link">>,
+            [{<<"rel">>,<<"shortcut icon">>},
+             {<<"href">>,<<"/static/images/favicon.ico">>},
+             {<<"type">>,<<"image/x-icon">>}],
+            []}]},
+         {<<"body">>,
+          [{<<"id">>,<<"home">>},
+           {<<"class">>,<<"tundra">>}],
+          [<<"&lt;<this<!-- is -->CDATA>&gt;">>]}]},
+       parse(D0)),
+    ?assertEqual(
+       {<<"html">>,[],
+        [{pi, <<"xml:namespace">>,
+          [{<<"prefix">>,<<"o">>},
+           {<<"ns">>,<<"urn:schemas-microsoft-com:office:office">>}]}]},
+       parse(
+         <<"<html><?xml:namespace prefix=\"o\" ns=\"urn:schemas-microsoft-com:office:office\"?></html>">>)),
+    ?assertEqual(
+       {<<"html">>, [],
+        [{<<"dd">>, [], [<<"foo">>]},
+         {<<"dt">>, [], [<<"bar">>]}]},
+       parse(<<"<html><dd>foo<dt>bar</html>">>)),
+    %% Singleton sadness
+    ?assertEqual(
+       {<<"html">>, [],
+        [{<<"link">>, [], []},
+         <<"foo">>,
+         {<<"br">>, [], []},
+         <<"bar">>]},
+       parse(<<"<html><link>foo<br>bar</html>">>)),
+    ?assertEqual(
+       {<<"html">>, [],
+        [{<<"link">>, [], [<<"foo">>,
+                           {<<"br">>, [], []},
+                           <<"bar">>]}]},
+       parse(<<"<html><link>foo<br>bar</link></html>">>)),
+    ok.
+
+exhaustive_is_singleton_test() ->
+    T = mochiweb_cover:clause_lookup_table(?MODULE, is_singleton),
+    [?assertEqual(V, is_singleton(K)) || {K, V} <- T].
+
+tokenize_attributes_test() ->
+    ?assertEqual(
+       {<<"foo">>,
+        [{<<"bar">>, <<"b\"az">>},
+         {<<"wibble">>, <<"wibble">>},
+         {<<"taco", 16#c2, 16#a9>>, <<"bell">>},
+         {<<"quux">>, <<"quux">>}],
+        []},
+       parse(<<"<foo bar=\"b&quot;az\" wibble taco&copy;=bell quux">>)),
     ok.
 
 tokens2_test() ->
     D0 = <<"<channel><title>from __future__ import *</title><link>http://bob.pythonmac.org</link><description>Bob's Rants</description></channel>">>,
-    Expect = [{start_tag,<<"channel">>,[],false},
-              {start_tag,<<"title">>,[],false},
-              {data,<<"from __future__ import *">>,false},
-              {end_tag,<<"title">>},
-              {start_tag,<<"link">>,[],true},
-              {data,<<"http://bob.pythonmac.org">>,false},
-              {end_tag,<<"link">>},
-              {start_tag,<<"description">>,[],false},
-              {data,<<"Bob's Rants">>,false},
-              {end_tag,<<"description">>},
-              {end_tag,<<"channel">>}],
-    Expect = tokens(D0),
+    ?assertEqual(
+       [{start_tag,<<"channel">>,[],false},
+        {start_tag,<<"title">>,[],false},
+        {data,<<"from __future__ import *">>,false},
+        {end_tag,<<"title">>},
+        {start_tag,<<"link">>,[],true},
+        {data,<<"http://bob.pythonmac.org">>,false},
+        {end_tag,<<"link">>},
+        {start_tag,<<"description">>,[],false},
+        {data,<<"Bob's Rants">>,false},
+        {end_tag,<<"description">>},
+        {end_tag,<<"channel">>}],
+       tokens(D0)),
+    ok.
+
+to_tokens_test() ->
+    ?assertEqual(
+       [{start_tag, <<"p">>, [{class, 1}], false},
+        {end_tag, <<"p">>}],
+       to_tokens({p, [{class, 1}], []})),
+    ?assertEqual(
+       [{start_tag, <<"p">>, [], false},
+        {end_tag, <<"p">>}],
+       to_tokens({p})),
+    ?assertEqual(
+       [{'=', <<"data">>}],
+       to_tokens({'=', <<"data">>})),
+    ?assertEqual(
+       [{comment, <<"comment">>}],
+       to_tokens({comment, <<"comment">>})),
+    %% This is only allowed in sub-tags:
+    %% {p, [{"class", "foo"}]} as {p, [{"class", "foo"}], []}
+    %% On the outside it's always treated as follows:
+    %% {p, [], [{"class", "foo"}]} as {p, [], [{"class", "foo"}]}
+    ?assertEqual(
+       [{start_tag, <<"html">>, [], false},
+        {start_tag, <<"p">>, [{class, 1}], false},
+        {end_tag, <<"p">>},
+        {end_tag, <<"html">>}],
+       to_tokens({html, [{p, [{class, 1}]}]})),
     ok.
 
 parse2_test() ->
     D0 = <<"<channel><title>from __future__ import *</title><link>http://bob.pythonmac.org<br>foo</link><description>Bob's Rants</description></channel>">>,
-    Expect = {<<"channel">>,[],
-              [{<<"title">>,[],[<<"from __future__ import *">>]},
-               {<<"link">>,[],[
-                               <<"http://bob.pythonmac.org">>,
-                               {<<"br">>,[],[]},
-                               <<"foo">>]},
-               {<<"description">>,[],[<<"Bob's Rants">>]}]},
-    Expect = parse(D0),
+    ?assertEqual(
+       {<<"channel">>,[],
+        [{<<"title">>,[],[<<"from __future__ import *">>]},
+         {<<"link">>,[],[
+                         <<"http://bob.pythonmac.org">>,
+                         {<<"br">>,[],[]},
+                         <<"foo">>]},
+         {<<"description">>,[],[<<"Bob's Rants">>]}]},
+       parse(D0)),
     ok.
 
 parse_tokens_test() ->
     D0 = [{doctype,[<<"HTML">>,<<"PUBLIC">>,<<"-//W3C//DTD HTML 4.01 Transitional//EN">>]},
           {data,<<"\n">>,true},
           {start_tag,<<"html">>,[],false}],
-    {<<"html">>, [], []} = parse_tokens(D0),
+    ?assertEqual(
+       {<<"html">>, [], []},
+       parse_tokens(D0)),
     D1 = D0 ++ [{end_tag, <<"html">>}],
-    {<<"html">>, [], []} = parse_tokens(D1),
+    ?assertEqual(
+       {<<"html">>, [], []},
+       parse_tokens(D1)),
     D2 = D0 ++ [{start_tag, <<"body">>, [], false}],
-    {<<"html">>, [], [{<<"body">>, [], []}]} = parse_tokens(D2),
+    ?assertEqual(
+       {<<"html">>, [], [{<<"body">>, [], []}]},
+       parse_tokens(D2)),
     D3 = D0 ++ [{start_tag, <<"head">>, [], false},
                 {end_tag, <<"head">>},
                 {start_tag, <<"body">>, [], false}],
-    {<<"html">>, [], [{<<"head">>, [], []}, {<<"body">>, [], []}]} = parse_tokens(D3),
+    ?assertEqual(
+       {<<"html">>, [], [{<<"head">>, [], []}, {<<"body">>, [], []}]},
+       parse_tokens(D3)),
     D4 = D3 ++ [{data,<<"\n">>,true},
                 {start_tag,<<"div">>,[{<<"class">>,<<"a">>}],false},
                 {start_tag,<<"a">>,[{<<"name">>,<<"#anchor">>}],false},
                 {start_tag,<<"div">>,[{<<"class">>,<<"c">>}],false},
                 {end_tag,<<"div">>},
                 {end_tag,<<"div">>}],
-    {<<"html">>, [],
-     [{<<"head">>, [], []},
-      {<<"body">>, [],
-       [{<<"div">>, [{<<"class">>, <<"a">>}], [{<<"a">>, [{<<"name">>, <<"#anchor">>}], []}]},
-        {<<"div">>, [{<<"class">>, <<"b">>}], [{<<"div">>, [{<<"class">>, <<"c">>}], []}]}
-       ]}]} = parse_tokens(D4),
+    ?assertEqual(
+       {<<"html">>, [],
+        [{<<"head">>, [], []},
+         {<<"body">>, [],
+          [{<<"div">>, [{<<"class">>, <<"a">>}], [{<<"a">>, [{<<"name">>, <<"#anchor">>}], []}]},
+           {<<"div">>, [{<<"class">>, <<"b">>}], [{<<"div">>, [{<<"class">>, <<"c">>}], []}]}
+          ]}]},
+       parse_tokens(D4)),
     D5 = [{start_tag,<<"html">>,[],false},
           {data,<<"\n">>,true},
           {data,<<"boo">>,false},
           {data,<<"hoo">>,false},
           {data,<<"\n">>,true},
           {end_tag,<<"html">>}],
-    {<<"html">>, [], [<<"\nboohoo\n">>]} = parse_tokens(D5),
+    ?assertEqual(
+       {<<"html">>, [], [<<"\nboohoo\n">>]},
+       parse_tokens(D5)),
     D6 = [{start_tag,<<"html">>,[],false},
           {data,<<"\n">>,true},
           {data,<<"\n">>,true},
           {end_tag,<<"html">>}],
-    {<<"html">>, [], []} = parse_tokens(D6),
+    ?assertEqual(
+       {<<"html">>, [], []},
+       parse_tokens(D6)),
     D7 = [{start_tag,<<"html">>,[],false},
           {start_tag,<<"ul">>,[],false},
           {start_tag,<<"li">>,[],false},
           {data,<<"asdf">>,false},
           {end_tag,<<"ul">>},
           {end_tag,<<"html">>}],
-    {<<"html">>, [],
-     [{<<"ul">>, [],
-       [{<<"li">>, [], [<<"word">>]},
-        {<<"li">>, [], [<<"up">>]},
-        {<<"li">>, [], [<<"fdsa">>,{<<"br">>, [], []}, <<"asdf">>]}]}]} = parse_tokens(D7),
+    ?assertEqual(
+       {<<"html">>, [],
+        [{<<"ul">>, [],
+          [{<<"li">>, [], [<<"word">>]},
+           {<<"li">>, [], [<<"up">>]},
+           {<<"li">>, [], [<<"fdsa">>,{<<"br">>, [], []}, <<"asdf">>]}]}]},
+       parse_tokens(D7)),
     ok.
 
 destack_test() ->

src/mochiweb_http.erl

 -export([after_response/2, reentry/1]).
 -export([parse_range_request/1, range_skip_length/2]).
 
--define(IDLE_TIMEOUT, 30000).
+-define(REQUEST_RECV_TIMEOUT, 300000).   % timeout waiting for request line
+-define(HEADERS_RECV_TIMEOUT, 30000). % timeout waiting for headers
 
 -define(MAX_HEADERS, 1000).
 -define(DEFAULTS, [{name, ?MODULE},
                    {port, 8888}]).
 
-set_default({Prop, Value}, PropList) ->
-    case proplists:is_defined(Prop, PropList) of
-        true ->
-            PropList;
-        false ->
-            [{Prop, Value} | PropList]
-    end.
-
-set_defaults(Defaults, PropList) ->
-    lists:foldl(fun set_default/2, PropList, Defaults).
-
 parse_options(Options) ->
     {loop, HttpLoop} = proplists:lookup(loop, Options),
     Loop = fun (S) ->
                    ?MODULE:loop(S, HttpLoop)
            end,
     Options1 = [{loop, Loop} | proplists:delete(loop, Options)],
-    set_defaults(?DEFAULTS, Options1).
+    mochilists:set_defaults(?DEFAULTS, Options1).
 
 stop() ->
     mochiweb_socket_server:stop(?MODULE).
     default_body(Req, Req:get(method), Req:get(path)).
 
 loop(Socket, Body) ->
-    inet:setopts(Socket, [{packet, http}]),
+    mochiweb_socket:setopts(Socket, [{packet, http}]),
     request(Socket, Body).
 
 request(Socket, Body) ->
-    case gen_tcp:recv(Socket, 0, ?IDLE_TIMEOUT) of
+    case mochiweb_socket:recv(Socket, 0, ?REQUEST_RECV_TIMEOUT) of
         {ok, {http_request, Method, Path, Version}} ->
+            mochiweb_socket:setopts(Socket, [{packet, httph}]),
             headers(Socket, {Method, Path, Version}, [], Body, 0);
         {error, {http_error, "\r\n"}} ->
             request(Socket, Body);
         {error, {http_error, "\n"}} ->
             request(Socket, Body);
         {error, closed} ->
-            gen_tcp:close(Socket),
+            mochiweb_socket:close(Socket),
             exit(normal);
         {error, timeout} ->
-            gen_tcp:close(Socket),
-            exit(normal);            
+            mochiweb_socket:close(Socket),
+            exit(normal);
         _Other ->
             handle_invalid_request(Socket)
     end.
 
 headers(Socket, Request, Headers, _Body, ?MAX_HEADERS) ->
     %% Too many headers sent, bad request.
-    inet:setopts(Socket, [{packet, raw}]),
+    mochiweb_socket:setopts(Socket, [{packet, raw}]),
     handle_invalid_request(Socket, Request, Headers);
 headers(Socket, Request, Headers, Body, HeaderCount) ->
-    case gen_tcp:recv(Socket, 0, ?IDLE_TIMEOUT) of
+    case mochiweb_socket:recv(Socket, 0, ?HEADERS_RECV_TIMEOUT) of
         {ok, http_eoh} ->
-            inet:setopts(Socket, [{packet, raw}]),
+            mochiweb_socket:setopts(Socket, [{packet, raw}]),
             Req = mochiweb:new_request({Socket, Request,
                                         lists:reverse(Headers)}),
-            Body(Req),
+            call_body(Body, Req),
             ?MODULE:after_response(Body, Req);
         {ok, {http_header, _, Name, _, Value}} ->
             headers(Socket, Request, [{Name, Value} | Headers], Body,
                     1 + HeaderCount);
         {error, closed} ->
-            gen_tcp:close(Socket),
+            mochiweb_socket:close(Socket),
             exit(normal);
         _Other ->
             handle_invalid_request(Socket, Request, Headers)
     end.
 
+call_body({M, F}, Req) ->
+    M:F(Req);
+call_body(Body, Req) ->
+    Body(Req).
+
 handle_invalid_request(Socket) ->
     handle_invalid_request(Socket, {'GET', {abs_path, "/"}, {0,9}}, []).
 
 handle_invalid_request(Socket, Request, RevHeaders) ->
-    inet:setopts(Socket, [{packet, raw}]),
+    mochiweb_socket:setopts(Socket, [{packet, raw}]),
     Req = mochiweb:new_request({Socket, Request,
                                 lists:reverse(RevHeaders)}),
     Req:respond({400, [], []}),
-    gen_tcp:close(Socket),
+    mochiweb_socket:close(Socket),
     exit(normal).
 
 after_response(Body, Req) ->
     Socket = Req:get(socket),
     case Req:should_close() of
         true ->
-            gen_tcp:close(Socket),
+            mochiweb_socket:close(Socket),
             exit(normal);
         false ->
             Req:cleanup(),

src/mochiweb_io.erl

+%% @author Bob Ippolito <bob@mochimedia.com>
+%% @copyright 2007 Mochi Media, Inc.
+
+%% @doc Utilities for dealing with IO devices (open files).
+
+-module(mochiweb_io).
+-author('bob@mochimedia.com').
+
+-export([iodevice_stream/3, iodevice_stream/2]).
+-export([iodevice_foldl/4, iodevice_foldl/3]).
+-export([iodevice_size/1]).
+-define(READ_SIZE, 8192).
+
+iodevice_foldl(F, Acc, IoDevice) ->
+    iodevice_foldl(F, Acc, IoDevice, ?READ_SIZE).
+
+iodevice_foldl(F, Acc, IoDevice, BufferSize) ->
+    case file:read(IoDevice, BufferSize) of
+        eof ->
+            Acc;
+        {ok, Data} ->
+            iodevice_foldl(F, F(Data, Acc), IoDevice, BufferSize)
+    end.
+
+iodevice_stream(Callback, IoDevice) ->
+    iodevice_stream(Callback, IoDevice, ?READ_SIZE).
+
+iodevice_stream(Callback, IoDevice, BufferSize) ->
+    F = fun (Data, ok) -> Callback(Data) end,
+    ok = iodevice_foldl(F, ok, IoDevice, BufferSize).
+
+iodevice_size(IoDevice) ->
+    {ok, Size} = file:position(IoDevice, eof),
+    {ok, 0} = file:position(IoDevice, bof),
+    Size.
+
+
+%%
+%% Tests
+%%
+-include_lib("eunit/include/eunit.hrl").
+-ifdef(TEST).
+
+
+
+-endif.

src/mochiweb_multipart.erl

 
 -export([parse_form/1, parse_form/2]).
 -export([parse_multipart_request/2]).
+-export([parts_to_body/3, parts_to_multipart_body/4]).
+-export([default_file_handler/2]).
 
 -define(CHUNKSIZE, 4096).
 
 -record(mp, {state, boundary, length, buffer, callback, req}).
 
 %% TODO: DOCUMENT THIS MODULE.
+%% @type key() = atom() | string() | binary().
+%% @type value() = atom() | iolist() | integer().
+%% @type header() = {key(), value()}.
+%% @type bodypart() = {Start::integer(), End::integer(), Body::iolist()}.
+%% @type formfile() = {Name::string(), ContentType::string(), Content::binary()}.
+%% @type request().
+%% @type file_handler() = (Filename::string(), ContentType::string()) -> file_handler_callback().
+%% @type file_handler_callback() = (binary() | eof) -> file_handler_callback() | term().
 
+%% @spec parts_to_body([bodypart()], ContentType::string(),
+%%                     Size::integer()) -> {[header()], iolist()}
+%% @doc Return {[header()], iolist()} representing the body for the given
+%%      parts, may be a single part or multipart.
+parts_to_body([{Start, End, Body}], ContentType, Size) ->
+    HeaderList = [{"Content-Type", ContentType},
+                  {"Content-Range",
+                   ["bytes ",
+                    mochiweb_util:make_io(Start), "-", mochiweb_util:make_io(End),
+                    "/", mochiweb_util:make_io(Size)]}],
+    {HeaderList, Body};
+parts_to_body(BodyList, ContentType, Size) when is_list(BodyList) ->
+    parts_to_multipart_body(BodyList, ContentType, Size,
+                            mochihex:to_hex(crypto:rand_bytes(8))).
+
+%% @spec parts_to_multipart_body([bodypart()], ContentType::string(),
+%%                               Size::integer(), Boundary::string()) ->
+%%           {[header()], iolist()}
+%% @doc Return {[header()], iolist()} representing the body for the given
+%%      parts, always a multipart response.
+parts_to_multipart_body(BodyList, ContentType, Size, Boundary) ->
+    HeaderList = [{"Content-Type",
+                   ["multipart/byteranges; ",
+                    "boundary=", Boundary]}],
+    MultiPartBody = multipart_body(BodyList, ContentType, Boundary, Size),
+
+    {HeaderList, MultiPartBody}.
+
+%% @spec multipart_body([bodypart()], ContentType::string(),
+%%                      Boundary::string(), Size::integer()) -> iolist()
+%% @doc Return the representation of a multipart body for the given [bodypart()].
+multipart_body([], _ContentType, Boundary, _Size) ->
+    ["--", Boundary, "--\r\n"];