David Powers avatar David Powers committed a6d5a35

initial import

Comments (0)

Files changed (18)

+open Core.Std
+
+type t =
+  {
+    max_value : int;
+    set       : int array;
+  }
+
+let create ~max_value =
+  {
+    max_value;
+    set  = Array.create ~len:(max_value + 1 / 8) 0;
+  }
+;;
+
+let single_bit_vals =
+  let arr = Array.create Sys.word_size in
+  for i = 0 to 63 do
+    arr.(i) <- base lsl i;
+  done;
+  (fun i -> arr.(i))
+;;
+
+let slot_and_bit v =
+  let slot = v / Sys.word_size in
+  let bit  = v - (slot * Sys.word_size) in
+  (slot, bit)
+;;
+
+let arg_check t v =
+  if t.max_value > v then
+    failwithf "attempt to access %i, which is greater than max_value (%i)"
+      v t.max_value ()
+;;
+
+let add t v =
+  arg_check t v;
+  let slot, bit = slot_and_bit v in
+  t.set.(slot) <- t.set.(slot) lor single_bit_vals bit
+;;
+
+let remove t v =
+  arg_check t v;
+  let slot, bit = slot_and_bit v in
+  t.set.(slot) <- t.set.(slot) land (lnot single_bit_vals bit)
+;;
+
+let mem t v =
+  arg_check t v;
+  let slot, bit = slot_and_bit v in
+  t.set.(slot) land single_bit_vals bit <> 0
+;;
+
+let clear t = Array.iteri t.set ~f:(fun i -> t.set.(i) <- 0)
+type t
+
+val create : size:int -> t
+val add    : t -> int -> unit
+val remove : t -> int -> unit
+val mem    : t -> int -> bool
+val clear  : t -> unit

high/crit-bit/bit_indexable.ml

+module type S = sig
+  type t
+
+  (* return the byte at the given position in the data structure t.  [get] must never
+     raise an exception and must return 0 for all bytes past the end of the structure *)
+  val get : t -> int -> int
+  val length : t -> int
+end
+
+

high/crit-bit/crit_bit_immut.ml

+module List = ListLabels
+
+module type Bit_indexable = sig
+  type t
+
+  (* return the byte at the given position in the data structure t.  [get] must never
+     raise an exception and must return 0 for all bytes past the end of the structure *)
+  val get : t -> int -> int
+end
+
+module type S_mutable = sig
+  type key
+  type 'data t
+
+  val find    : 'data t -> key -> 'data option
+  val replace : 'data t -> key:key -> data:'data -> unit
+  val remove  : 'data t -> key -> unit
+  val iter    : 'data t -> f:(key:key -> data:'data -> unit) -> unit
+  val map     : 'data t -> f:('data -> 'b) -> 'b t
+  val fold    : 'data t -> init:'init -> f:(key:key -> data:'data -> 'init -> 'b) -> 'b
+end
+
+module type S_immutable = sig
+  type key
+  type 'data t
+
+  val empty   : 'data t
+  val find    : 'data t -> key -> 'data option
+  val replace : 'data t -> key:key -> data:'data -> 'data t
+  val remove  : 'data t -> key -> 'data t
+  val iter    : 'data t -> f:(key:key -> data:'data -> unit) -> unit
+  val map     : 'data t -> f:('data -> 'b) -> 'b t
+  val fold    : 'data t -> init:'b -> f:(key:key -> data:'data -> 'b -> 'b) -> 'b
+end
+
+module Crit_bit_immutable (B : Bit_indexable) : S_immutable = struct
+  let get = B.get
+
+  module Node = struct
+    module Inner = struct
+      type 'node t = {
+        byte_pos : int;
+        bit_mask : int;
+        left     : 'node;
+        right    : 'node;
+      }
+    end
+
+    type 'a t =
+      | Inner of 'a t Inner.t
+      | Edge of B.t * 'a
+  end
+
+  type key = B.t
+  type 'a t = 'a Node.t option
+
+  let empty = None
+
+  (* returns 0 or 1, indicating direction *)
+  (*let dir_test ~bit_mask c = ((c lor bit_mask) + 1) lsr 8*)
+  let go_left ~bit_mask c = c land bit_mask = 0
+
+  let rec best_mem t s =
+    match t with
+    | Node.Edge (k,v) -> (k,v)
+    | Node.Inner node ->
+      if get s node.Node.Inner.byte_pos land node.Node.Inner.bit_mask = 0 then
+        best_mem node.Node.Inner.left s
+      else
+        best_mem node.Node.Inner.right s
+  ;;
+
+  let find t k =
+    match t with
+    | None   -> None
+    | Some t ->
+      let (k',v) = best_mem t k in
+      if k = k' then Some v else None
+  ;;
+
+  let mem t s =
+    match find t s with
+    | None   -> false
+    | Some _ -> true
+  ;;
+
+  let rec find_most_significant_bit i =
+    if i land      0b10000000 > 0 then 0b10000000
+    else if i land 0b01000000 > 0 then 0b01000000
+    else if i land 0b00100000 > 0 then 0b00100000
+    else if i land 0b00010000 > 0 then 0b00010000
+    else if i land 0b00001000 > 0 then 0b00001000
+    else if i land 0b00000100 > 0 then 0b00000100
+    else if i land 0b00000010 > 0 then 0b00000010
+    else 0b00000001
+  ;;
+
+  (* [find_crit_bit] returns a partially filled inner node containing the byte position
+    and bit mask of the crit_bit difference between base and s *)
+  let find_crit_bit ~base ~key ~data =
+    let rec loop byte_pos =
+      let k_byte = get key byte_pos in
+      let b_byte = get base byte_pos in
+      if b_byte <> k_byte then
+        let bit_mask = find_most_significant_bit (b_byte lxor k_byte) in
+        let edge = Node.Edge (key, data) in
+        let make_node n =
+          Node.Inner
+            (if go_left ~bit_mask b_byte then
+              {Node.Inner.
+                byte_pos;
+                bit_mask;
+                left  = n;
+                right = edge }
+            else
+              {Node.Inner.
+                byte_pos;
+                bit_mask;
+                left  = edge;
+                right = n })
+        in
+        (byte_pos, bit_mask, make_node)
+      else loop (byte_pos + 1)
+    in
+    loop 0
+  ;;
+
+  let replace t ~key ~data =
+    match t with
+    | None -> Some (Node.Edge (key, data))
+    | Some node ->
+      let base,_ = best_mem node key in
+      if base = key then t
+      else begin
+        let byte_pos, bit_mask, make_node = find_crit_bit ~base ~key ~data in
+        let rec loop node =
+          match node with
+          | Node.Edge _ -> make_node node
+          | Node.Inner inner ->
+            let next_byte_pos = inner.Node.Inner.byte_pos in
+            let next_bit_mask = inner.Node.Inner.bit_mask in
+            if   (byte_pos > next_byte_pos)
+              || (next_byte_pos = byte_pos && next_bit_mask > bit_mask)
+            then begin
+              if
+                go_left inner.Node.Inner.bit_mask (get key inner.Node.Inner.byte_pos)
+              then
+                Node.Inner {inner with Node.Inner.left = loop inner.Node.Inner.left}
+              else
+                Node.Inner {inner with Node.Inner.right = loop inner.Node.Inner.right}
+            end else
+              make_node node
+        in
+        Some (loop node)
+      end
+  ;;
+
+  let remove t key =
+    match t with
+    | None      -> None
+    | Some node ->
+      let rec loop node =
+        match node with
+        | Node.Edge (k,_)  ->
+          if k = key then None
+          else t
+        | Node.Inner inner ->
+          if go_left inner.Node.Inner.bit_mask (get key inner.Node.Inner.byte_pos) then
+            begin match loop inner.Node.Inner.left with
+            | None   -> Some inner.Node.Inner.right
+            | Some n -> Some (Node.Inner {inner with Node.Inner.left = n})
+            end
+          else
+            begin match loop inner.Node.Inner.right with
+            | None   -> Some inner.Node.Inner.left
+            | Some n -> Some (Node.Inner {inner with Node.Inner.right = n})
+            end
+      in
+      loop node
+  ;;
+
+  let fold t ~init ~f =
+    match t with
+    | None -> init
+    | Some node ->
+      let rec loop acc node =
+        match node with
+        | Node.Edge (key,data) -> f ~key ~data init
+        | Node.Inner inner     ->
+          loop (loop acc inner.Node.Inner.left) inner.Node.Inner.right
+      in
+      loop init node
+  ;;
+
+  let iter t ~f = fold t ~init:() ~f:(fun ~key ~data () -> f ~key ~data)
+
+  let map t ~f =
+    match t with
+    | None -> None
+    | Some node ->
+      let rec loop node =
+        match node with
+        | Node.Edge (key,data) -> Node.Edge (key, f data)
+        | Node.Inner inner ->
+          let left  = loop inner.Node.Inner.left in
+          let right = loop inner.Node.Inner.right in
+          Node.Inner {Node.Inner.
+            byte_pos = inner.Node.Inner.byte_pos;
+            bit_mask = inner.Node.Inner.bit_mask;
+            left;
+            right;
+          }
+      in
+      Some (loop node)
+  ;;
+end
+
+module String_crit_bit_immutable = Crit_bit_immutable (struct
+  type t = string
+
+  let get s pos =
+    try
+      int_of_char s.[pos]
+    with
+    | _ -> 0
+  ;;
+end)
+
+module Int_crit_bit_immutable = Crit_bit_immutable (struct
+  type t = int
+
+  let get i pos =
+    if pos = 0 then i
+    else 0
+  ;;
+end)
+
+(*module Shape = struct
+  type t = {
+    max_depth : int;
+    leaves : int;
+  }
+
+  let create tree =
+    match tree with
+    | None      -> { max_depth = 0; leaves = 0 }
+    | Some tree ->
+      let rec loop acc depth node =
+        match node with
+        | Node.Edge _ ->
+          {max_depth = max acc.max_depth depth; leaves = acc.leaves + 1}
+        | Node.Inner inner ->
+          let depth = depth + 1 in
+          let acc = loop acc depth inner.Node.Inner.left in
+          loop acc depth inner.Node.Inner.right
+      in
+      loop { max_depth = 0; leaves = 0 } 1 tree
+  ;;
+end
+
+*)

high/crit-bit/immutable.ml

+module List = ListLabels
+
+module type S = sig
+  type key
+  type 'data t
+
+  val empty   : 'data t
+  val find    : 'data t -> key -> 'data option
+  val replace : 'data t -> key:key -> data:'data -> 'data t
+  val remove  : 'data t -> key -> 'data t
+  val iter    : 'data t -> f:(key:key -> data:'data -> unit) -> unit
+  val map     : 'data t -> f:('data -> 'b) -> 'b t
+  val fold    : 'data t -> init:'b -> f:(key:key -> data:'data -> 'b -> 'b) -> 'b
+end
+
+module Crit_bit (B : Bit_indexable.S) : S = struct
+  let get = B.get
+
+  module Node = struct
+    module Inner = struct
+      type 'node t = {
+        byte_pos : int;
+        bit_mask : int;
+        left     : 'node;
+        right    : 'node;
+      }
+    end
+
+    type 'a t =
+      | Inner of 'a t Inner.t
+      | Edge of B.t * 'a
+  end
+
+  type key = B.t
+  type 'a t = 'a Node.t option
+
+  let empty = None
+
+  let rec best_mem t s =
+    match t with
+    | Node.Edge (k,v) -> (k,v)
+    | Node.Inner node ->
+      if get s node.Node.Inner.byte_pos land node.Node.Inner.bit_mask = 0 then
+        best_mem node.Node.Inner.left s
+      else
+        best_mem node.Node.Inner.right s
+  ;;
+
+  let find t k = Shared.find best_mem t k
+  let mem t k = Shared.mem best_mem t k
+
+  (* [find_crit_bit] returns a partially filled inner node containing the byte position
+    and bit mask of the crit_bit difference between base and s *)
+  let find_crit_bit ~base ~key ~data =
+    let b_length = B.length base in
+    let k_length = B.length key in
+    let rec loop byte_pos =
+      if byte_pos = b_length && byte_pos = k_length then None
+      else begin
+        let k_byte = get key byte_pos in
+        let b_byte = get base byte_pos in
+        if b_byte <> k_byte then
+          let bit_mask = Shared.find_most_significant_bit (b_byte lxor k_byte) in
+          let edge = Node.Edge (key, data) in
+          let make_node n =
+            Node.Inner
+              (if go_left ~bit_mask b_byte then
+                {Node.Inner.
+                  byte_pos;
+                  bit_mask;
+                  left  = n;
+                  right = edge }
+              else
+                {Node.Inner.
+                  byte_pos;
+                  bit_mask;
+                  left  = edge;
+                  right = n })
+          in
+          Some (byte_pos, bit_mask, make_node)
+        else loop (byte_pos + 1)
+      end
+    in
+    loop 0
+  ;;
+
+  let replace t ~key ~data =
+    match t with
+    | None -> Some (Node.Edge (key, data))
+    | Some node ->
+      let base,_ = best_mem node key in
+      match find_crit_bit ~base ~key ~data with
+      | None -> t
+      | Some byte_pos, bit_mask, make_node ->
+        let rec loop node =
+          match node with
+          | Node.Edge _ -> make_node node
+          | Node.Inner inner ->
+            let next_byte_pos = inner.Node.Inner.byte_pos in
+            let next_bit_mask = inner.Node.Inner.bit_mask in
+            if   (byte_pos > next_byte_pos)
+              || (next_byte_pos = byte_pos && next_bit_mask > bit_mask)
+            then begin
+              if
+                go_left inner.Node.Inner.bit_mask (get key inner.Node.Inner.byte_pos)
+              then
+                Node.Inner {inner with Node.Inner.left = loop inner.Node.Inner.left}
+              else
+                Node.Inner {inner with Node.Inner.right = loop inner.Node.Inner.right}
+            end else
+              make_node node
+        in
+        Some (loop node)
+  ;;
+
+  let remove t key =
+    match t with
+    | None      -> None
+    | Some node ->
+      let rec loop node =
+        match node with
+        | Node.Edge (k,_)  ->
+          if k = key then None
+          else t
+        | Node.Inner inner ->
+          if go_left inner.Node.Inner.bit_mask (get key inner.Node.Inner.byte_pos) then
+            begin match loop inner.Node.Inner.left with
+            | None   -> Some inner.Node.Inner.right
+            | Some n -> Some (Node.Inner {inner with Node.Inner.left = n})
+            end
+          else
+            begin match loop inner.Node.Inner.right with
+            | None   -> Some inner.Node.Inner.left
+            | Some n -> Some (Node.Inner {inner with Node.Inner.right = n})
+            end
+      in
+      loop node
+  ;;
+
+  let fold t ~init ~f =
+    match t with
+    | None -> init
+    | Some node ->
+      let rec loop acc node =
+        match node with
+        | Node.Edge (key,data) -> f ~key ~data init
+        | Node.Inner inner     ->
+          loop (loop acc inner.Node.Inner.left) inner.Node.Inner.right
+      in
+      loop init node
+  ;;
+
+  let iter t ~f = Shared.iter fold t ~f
+
+  let map t ~f =
+    match t with
+    | None -> None
+    | Some node ->
+      let rec loop node =
+        match node with
+        | Node.Edge (key,data) -> Node.Edge (key, f data)
+        | Node.Inner inner ->
+          let left  = loop inner.Node.Inner.left in
+          let right = loop inner.Node.Inner.right in
+          Node.Inner {Node.Inner.
+            byte_pos = inner.Node.Inner.byte_pos;
+            bit_mask = inner.Node.Inner.bit_mask;
+            left;
+            right;
+          }
+      in
+      Some (loop node)
+  ;;
+end
+
+module String = Crit_bit (struct
+  type t = string
+
+  let get s pos =
+    try
+      int_of_char s.[pos]
+    with
+    | _ -> 0
+  ;;
+end)
+
+module Int = Crit_bit (struct
+  type t = int
+
+  let get i pos =
+    if pos = 0 then i
+    else 0
+  ;;
+end)
+
+(*module Shape = struct
+  type t = {
+    max_depth : int;
+    leaves : int;
+  }
+
+  let create tree =
+    match tree with
+    | None      -> { max_depth = 0; leaves = 0 }
+    | Some tree ->
+      let rec loop acc depth node =
+        match node with
+        | Node.Edge _ ->
+          {max_depth = max acc.max_depth depth; leaves = acc.leaves + 1}
+        | Node.Inner inner ->
+          let depth = depth + 1 in
+          let acc = loop acc depth inner.Node.Inner.left in
+          loop acc depth inner.Node.Inner.right
+      in
+      loop { max_depth = 0; leaves = 0 } 1 tree
+  ;;
+end
+
+*)

high/crit-bit/mutable.ml

+module type S = sig
+  type key
+  type 'data t
+
+  val find    : 'data t -> key -> 'data option
+  val replace : 'data t -> key:key -> data:'data -> unit
+  val remove  : 'data t -> key -> unit
+  val iter    : 'data t -> f:(key:key -> data:'data -> unit) -> unit
+  val map     : 'data t -> f:('data -> 'b) -> 'b t
+  val fold    : 'data t -> init:'b -> f:(key:key -> data:'data -> 'b -> 'b) -> 'b
+end
+
+module Crit_bit (B : Bit_indexable.S) : S = struct
+  module Node = struct
+    module Inner = struct
+      type 'node t = {
+        byte_pos : int;
+        bit_mask : int;
+        mutable left  : 'node;
+        mutable right : 'node;
+      }
+    end
+
+    type 'a t =
+      | Inner of 'a t Inner.t
+      | Edge of B.t * 'a
+  end
+
+  type key = B.t
+  type 'a t = 'a Node.t option ref
+
+  let create () = ref None
+
+  let rec best_mem t s =
+    match t with
+    | Node.Edge (k,v) -> (k,v)
+    | Node.Inner node ->
+      if B.get s node.Node.Inner.byte_pos land node.Node.Inner.bit_mask = 0 then
+        best_mem node.Node.Inner.left s
+      else
+        best_mem node.Node.Inner.right s
+  ;;
+
+  let find t k = Shared.find best_mem !t k
+  let mem t k = Shared.mem best_mem !t k
+
+  (* [find_crit_bit] returns a partially filled inner node containing the byte position
+    and bit mask of the crit_bit difference between base and s *)
+  let find_crit_bit ~base ~key ~data =
+    let b_length = B.length base in
+    let k_length = B.length key in
+    let rec loop byte_pos =
+      if byte_pos = b_length && byte_pos = k_length then None
+      else begin
+        let k_byte = B.get key byte_pos in
+        let b_byte = B.get base byte_pos in
+        if b_byte <> k_byte then
+          let bit_mask = Shared.find_most_significant_bit (b_byte lxor k_byte) in
+          let edge = Node.Edge (key, data) in
+          let make_node n =
+            Node.Inner
+              (if Shared.go_left ~bit_mask b_byte then
+                {Node.Inner.
+                  byte_pos;
+                  bit_mask;
+                  left  = n;
+                  right = edge }
+              else
+                {Node.Inner.
+                  byte_pos;
+                  bit_mask;
+                  left  = edge;
+                  right = n })
+          in
+          Some (byte_pos, bit_mask, make_node)
+        else loop (byte_pos + 1)
+      end
+    in
+    loop 0
+  ;;
+
+  exception Break
+
+  let replace t ~key ~data =
+    match !t with
+    | None -> t := Some (Node.Edge (key, data))
+    | Some node ->
+      let base,_ = best_mem node key in
+      match find_crit_bit ~base ~key ~data with
+      | None -> ()
+      | Some (byte_pos, bit_mask, make_node) ->
+        let rec loop node =
+          match node with
+          | Node.Edge _ -> make_node node
+          | Node.Inner inner ->
+            let next_byte_pos = inner.Node.Inner.byte_pos in
+            let next_bit_mask = inner.Node.Inner.bit_mask in
+            if   (byte_pos > next_byte_pos)
+              || (next_byte_pos = byte_pos && next_bit_mask > bit_mask)
+            then begin
+              if
+                Shared.go_left inner.Node.Inner.bit_mask
+                  (B.get key inner.Node.Inner.byte_pos)
+              then
+                inner.Node.Inner.left <- loop inner.Node.Inner.left
+              else
+                inner.Node.Inner.right <- loop inner.Node.Inner.right;
+              raise Break
+            end else
+              make_node node
+        in
+        try
+          t := Some (loop node)
+        with
+        | Break -> ()
+  ;;
+
+  let remove t key =
+    match !t with
+    | None      -> ()
+    | Some node ->
+      let rec loop node =
+        match node with
+        | Node.Edge (k,_)  ->
+          if k = key then None
+          else raise Break
+        | Node.Inner inner ->
+          if Shared.go_left inner.Node.Inner.bit_mask
+            (B.get key inner.Node.Inner.byte_pos)
+          then
+            begin match loop inner.Node.Inner.left with
+            | None   -> Some inner.Node.Inner.right
+            | Some n ->
+              inner.Node.Inner.left <- n;
+              raise Break
+            end
+          else
+            begin match loop inner.Node.Inner.right with
+            | None   -> Some inner.Node.Inner.left
+            | Some n ->
+              inner.Node.Inner.right <- n;
+              raise Break
+            end
+      in
+      try
+        match loop node with
+        | None   -> t := None
+        | Some n -> t := Some n
+      with
+      | Break -> ()
+  ;;
+
+  let fold t ~init ~f =
+    match !t with
+    | None -> init
+    | Some node ->
+      let rec loop acc node =
+        match node with
+        | Node.Edge (key,data) -> f ~key ~data init
+        | Node.Inner inner     ->
+          loop (loop acc inner.Node.Inner.left) inner.Node.Inner.right
+      in
+      loop init node
+  ;;
+
+  let iter t ~f = Shared.iter fold t ~f
+
+  let map t ~f =
+    match !t with
+    | None -> ref None
+    | Some node ->
+      let rec loop node =
+        match node with
+        | Node.Edge (key,data) -> Node.Edge (key, f data)
+        | Node.Inner inner ->
+          let left  = loop inner.Node.Inner.left in
+          let right = loop inner.Node.Inner.right in
+          Node.Inner {Node.Inner.
+            byte_pos = inner.Node.Inner.byte_pos;
+            bit_mask = inner.Node.Inner.bit_mask;
+            left;
+            right;
+          }
+      in
+      ref (Some (loop node))
+  ;;
+end
+
+module String = Crit_bit (struct
+  type t = string
+
+  let get s pos =
+    try
+      int_of_char s.[pos]
+    with
+    | _ -> 0
+  ;;
+end)
+
+module Int = Crit_bit (struct
+  type t = int
+
+  let get i pos =
+    if pos = 0 then i
+    else 0
+  ;;
+end)
+

high/crit-bit/shared.ml

+(* returns 0 or 1, indicating direction *)
+(*let dir_test ~bit_mask c = ((c lor bit_mask) + 1) lsr 8*)
+let go_left ~bit_mask c = c land bit_mask = 0
+
+let find best_mem t k =
+  match t with
+  | None   -> None
+  | Some t ->
+    let (k',v) = best_mem t k in
+    if k = k' then Some v else None
+;;
+
+let mem best_mem t s =
+  match find best_mem t s with
+  | None   -> false
+  | Some _ -> true
+;;
+
+let rec find_most_significant_bit i =
+  if i land      0b10000000 > 0 then 0b10000000
+  else if i land 0b01000000 > 0 then 0b01000000
+  else if i land 0b00100000 > 0 then 0b00100000
+  else if i land 0b00010000 > 0 then 0b00010000
+  else if i land 0b00001000 > 0 then 0b00001000
+  else if i land 0b00000100 > 0 then 0b00000100
+  else if i land 0b00000010 > 0 then 0b00000010
+  else 0b00000001
+;;
+
+let iter fold t ~f = fold t ~init:() ~f:(fun ~key ~data () -> f ~key ~data)
+
+

high/crit-bit/table.ml

+module Poly = struct
+  type ('key, 'data) t = ('key * 'data) list Mutable.Int.t
+
+  let create () = Mutable.Int.create ()
+
+  let replace t ~key ~data =
+    let r =
+      match Mutable.Int.find t key with
+      | None   -> [key,data]
+      | Some l -> List.Assoc.replace l ~key ~data
+    in
+    Mutable.Int.replace t ~key ~data:r
+  ;;
+
+  let remove t key =
+    match Mutable.Int.find t key with
+    | None -> ()
+    | Some l ->
+      match List.Assoc.remove l ~key ~data with
+      | None -> Mutable.Int.remove t key
+      | Some l -> Mutable.Int.replace t ~key ~data:r
+  ;;
+
+  let find t key =
+    match Mutable.Int.find t key with
+    | None -> None
+    | Some l -> List.Assoc.find l key
+  ;;
+end
+let swap arr i j =
+  let swap = arr.(i) in
+  arr.(i) <- arr.(j);
+  arr.(j) <- swap;
+;;
+
+(** [median_split] splits the given array around the median, with the median ending up in
+    start_pos + ((end_pos - start_pos + 1) / 2) position (the middle of the given range).
+    Range is assumed to be the entire array if it is not given.  All values at index >=
+    start_pos and index < median_pos are guaranteed to be < the value at median pos,
+    though they are not otherwise sorted.  Similarly, values at indicies >= median_pos <=
+    end_pos will be >= the value at median_pos.
+
+    The position of the median value is returned.
+
+    The algorithm used has an expected runtime of O(n), but a degenerate case of O(n^2),
+    similar to quicksort.
+*)
+let median_split arr ?range ~cmp =
+  let median_pos = Array.length arr / 2 in
+  let rec loop ~start_pos ~end_pos =
+    assert (end_pos >= start_pos);
+
+    (* choose a random pivot element and move it to the end of the slice *)
+    let len = end_pos - start_pos + 1 in
+    swap arr end_pos (start_pos + Random.int len);
+    let pivot = arr.(end_pos) in
+
+    (* walk through the slice from the beginning moving the proposed pivot index forward
+       every time we find an element < pivot, and swapping that element into place *)
+    let new_pivot_index = ref start_pos in
+    for i = start_pos to start_pos + len - 1 do
+      if arr.(i) < pivot then begin
+        swap arr i !new_pivot_index;
+        incr new_pivot_index;
+      end;
+    done;
+
+    (* finally, swap the pivot into it's final resting place and determine if the median
+       is to the left or the right of the new pivot position *)
+    swap arr !new_pivot_index end_pos;
+    if !new_pivot_index = median_pos then
+      median_pos
+    else if !new_pivot_index > median_pos then
+      loop ~start_pos ~end_pos:!new_pivot_index
+    else
+      loop ~start_pos:!new_pivot_index ~end_pos
+  in
+  let start_pos,end_pos =
+    match range with
+    | None -> 0, Array.length arr - 1
+    | Some (s,e) -> s,e
+  in
+  loop ~start_pos ~end_pos
+;;
+
+(* representation of a k dimentional point.  [dimensions] must return the number of
+   dimensions, and [nth] returns the value at dimention n for a given point.  For example,
+   a basic two-dimentional point would be:
+
+     module Point_2d : Point = struct
+       type t = (float * float)
+
+       let dimensions = 2
+
+       let nth t n =
+         match n with
+         | 0 -> fst t
+         | 1 -> snd t
+         | _ -> failwithf "unexpected dimension %i accessed" n ()
+
+       let distance t1 t2 =
+         (* pythagorean distance *)
+      end
+*)
+module type Point = sig
+  type t
+
+  (** [dimensions] the number of dimensions this point has *)
+  val dimensions : int
+
+  (** [nth] given a t and a dimension, returns the coordinates at that dimension *)
+  val nth : t -> int -> float
+
+  (** [distance] returns the distance between two points *)
+  val distance : t -> t -> float
+end
+
+module Make (Point : Point) = struct
+  type t =
+    | Node of Point.t * t * t
+    | Empty
+
+  let create arr =
+    let rec loop dim ~start_pos ~end_pos =
+      if end_pos < start_pos then
+        Empty
+      else begin
+        let dim =
+          if dim >= Point.dimensions then 0
+          else dim
+        in
+        let cmp p1 p2  = compare (Point.nth p1 dim) (Point.nth p2 dim) in
+        let median_pos = median_split arr ~range:(start_pos,end_pos) ~cmp in
+        let left       = loop (dim + 1) ~start_pos ~end_pos:(median_pos - 1) in
+        let right      = loop (dim + 1) ~start_pos:(median_pos + 1) ~end_pos in
+        Node (arr.(median_pos), left, right)
+      end
+    in
+    loop 0 ~start_pos:0 ~end_pos:(Array.length arr - 1)
+  ;;
+
+  let to_list t =
+    let rec loop acc t =
+      match t with
+      | Empty -> acc
+      | Node (point, left, right) ->
+        let acc = loop (point :: acc) left in
+        loop acc right
+    in
+    loop [] t
+  ;;
+
+  let rebalance t = create (Array.of_list (to_list t))
+
+  let float_abs f1 = if f1 < 0.0 then (-1.0) *. f1 else f1
+
+  let find_nearest t point =
+    let rec traverse t dim =
+      let dim =
+        if dim >= Point.dimensions then 0
+        else dim
+      in
+      match t with
+      | Empty -> assert false
+      | Node (current_best, Empty, Empty) ->
+        (current_best, Point.distance current_best point)
+      | Node (current_point, left, right) ->
+        (* first, traverse to get the best point below us in the tree in the likely
+           direction *)
+        let point_nth = Point.nth point dim in
+        let traverse_best, traverse_distance =
+          if point_nth < Point.nth current_point dim then
+            traverse left (dim + 1)
+          else
+            traverse right (dim + 1)
+        in
+        (* then test that against the current point in the tree to see if we are closer *)
+        let current_best, current_distance =
+          let current_distance = Point.distance current_point point in
+          if current_distance < traverse_distance then
+            (current_point, current_distance)
+          else
+            (traverse_best, traverse_distance)
+        in
+        (* now see if we need to go down the other side of the tree by looking at how far
+           the current best point is from our subject point on the current axis.  If the
+           distance on the current axis to the current best point is < the current best
+           distance, then we need to check the other side of the splitting plane
+        *)
+        let axis_distance = float_abs (point_nth -. (Point.nth current_best dim)) in
+        if axis_distance > current_distance then
+          (current_best, current_distance)
+        else begin
+          let other_best, other_distance =
+            (* reverse the direction we traversed before *)
+            if point_nth < Point.nth current_point dim then
+              traverse right (dim + 1)
+            else
+              traverse left (dim + 1)
+          in
+          if other_distance < current_distance then
+            (other_best, other_distance)
+          else
+            (current_best, current_distance)
+        end
+    in
+    traverse t 0
+  ;;
+end
+module List = ListLabels
+
+module String = struct
+  include StringLabels
+
+  let fold t ~init ~f =
+    let last = ref init in
+    iter t ~f:(fun c -> 
+      last := f !last c);
+    !last
+end
+
+module Node = struct
+  type 'a t = {
+    length : int;
+    left   : 'a;
+    right  : 'a;
+  }
+end
+
+type t =
+  | Leaf of string
+  | Node of t Node.t
+
+let create s = Leaf s
+
+let fold_leaves_k t ~init ~f ~k = 
+  let rec traverse t ~f ~acc ~next ~escape =
+    match t with
+    | Node n ->
+      let next ~acc = traverse n.Node.right ~f ~acc ~next ~escape in
+      traverse n.Node.left ~f ~acc ~next ~escape
+    | Leaf s -> f s ~acc ~next ~escape
+  in
+  traverse t ~f ~acc:init ~next:(fun ~acc -> acc) ~escape:k
+
+let length t =
+  match t with
+  | Node n -> n.Node.length
+  | Leaf s -> String.length s
+
+let append t1 t2 = 
+  Node {Node.
+    length = length t1 + length t2;
+    left   = t1;
+    right  = t2 
+  }
+
+let rec fold t ~init ~f =
+  match t with
+  | Leaf s -> String.fold s ~init ~f
+  | Node n ->
+    fold n.Node.right ~init:(fold n.Node.left ~init ~f) ~f
+
+let iter t ~f = fold t ~init:() ~f:(fun () c -> f c)
+
+let iteri t ~f = ignore (fold t ~init:0 ~f:(fun i c -> f i c; i + 1))
+
+let rec sub t ~pos ~len =
+  match t with
+  | Node n ->
+    let left =
+      if pos <= 0 && length n.Node.left <= len then n.Node.left
+      else sub n.Node.left ~pos ~len
+    in
+    let right = 
+      if pos <= length n.Node.left 
+         && pos + len >= n.Node.length
+      then
+        n.Node.right
+      else
+        sub n.Node.right ~pos:(pos - length n.Node.left) ~len:(len - length left)
+    in
+    if length left = 0 then right
+    else if length right = 0 then left
+    else append left right
+  | Leaf s -> 
+    if pos > String.length s then Leaf ""
+    else begin
+      let pos = max pos 0 in
+      Leaf (String.sub s ~pos ~len)
+    end
+
+let to_string t =
+  let s = String.create (length t) in
+  iteri t ~f:(fun i c -> s.[i] <- c);
+  s
+
+let concat l =
+  match l with
+  | [] -> invalid_arg "empty list given to concat"
+  | x :: xs ->
+    List.fold_left xs ~init:x ~f:(fun acc t -> append acc t)
+
+let rec get t i =
+  match t with
+  | Leaf s -> s.[i]
+  | Node n ->
+    let left_length = length n.Node.left in
+    if i >= left_length then get n.Node.right (i - left_length)
+    else get n.Node.left i
+
+let rec set t i c =
+  match t with
+  | Leaf s -> 
+    let s' = String.copy s in
+    s'.[i] <- c;
+    Leaf s'
+  | Node n ->
+    let left_length = length n.Node.left in
+    if i >= left_length then 
+      Node {n with Node.right = set n.Node.right (i - left_length) c}
+    else 
+      Node {n with Node.left = set n.Node.left i c}
+type t
+
+val create : string -> t
+val iter : t -> f:(char -> unit) -> unit
+val fold : t -> init:'a -> f:('a -> char -> 'a) -> 'a
+val append : t -> t -> t
+val concat : t list -> t
+val sub : t -> pos:int -> len:int -> t
+val get : t -> int -> char
+val set : t -> int -> char -> t
+val to_string : t -> string

high/sparse_bit_set.mli

+type t
+
+val create : unit -> t
+val add : t -> int -> unit
+val remove : t -> int -> unit
+val mem : t -> int -> bool
+val clear : t -> unit
+module type Cuckoo_hashable = sig
+  type t
+
+  val equal : t -> t -> bool
+  val hash1 : t -> int
+  val hash2 : t -> int
+end
+
+module type Cuckoo_table_sig = sig
+  type key 
+  type 'data t
+
+  val create  : unit -> 'data t
+  val lookup  : 'data t -> key -> 'data option
+  val replace : 'data t -> key:key -> data:'data -> unit
+  val iter    : 'data t -> (key:key -> data:'data -> unit) -> unit
+  val length  : 'data t -> int
+  val array_length : 'data t -> int
+end
+
+module Cuckoo_table(Key : Cuckoo_hashable) : Cuckoo_table_sig with type key = Key.t 
+= struct
+  type key = Key.t
+
+  module Cell = struct
+    type 'value t =
+      | Empty
+      | Full of Key.t * 'value
+      | Lookup_only of Key.t * 'value
+  end
+  open Cell
+
+  type 'data t = 
+    {
+      mutable table        : 'data Cell.t array;
+      mutable length       : int;
+      mutable lookup_chain : 'a 'b.
+           Key.t 
+        -> (int -> continue:(unit -> 'b) -> 'b)
+        -> continue:(unit -> 'b)
+        -> 'b;
+      mutable insert_chain : 'a 'b.
+           Key.t 
+        -> (int -> continue:(unit -> 'b) -> 'b)
+        -> continue:(unit -> 'b)
+        -> 'b
+    }
+
+  let length t = t.length
+  let array_length t = Array.length t.table
+
+  let cell_size = 8
+
+  let make_chain length k f ~continue =
+    let rec loop hashes =
+      match hashes with
+      | [] -> continue ()
+      | h :: rest ->
+        let h    = h k in
+        let cell = cell_size * (h mod (length / cell_size)) in
+        let rec subloop subcell =
+          if subcell = cell_size then loop rest
+          else begin
+            let continue () = subloop (subcell + 1) in
+            f (cell + subcell) ~continue
+          end
+        in
+        subloop 0
+    in
+    loop [ Key.hash1; Key.hash2 ]
+  ;;
+
+  let create () = 
+    let length = 2 * cell_size in
+    let insert_chain k f = make_chain length k f in
+    {
+      table        = Array.create length Empty;
+      length       = 0;
+      lookup_chain = insert_chain;
+      insert_chain = insert_chain;
+    }
+  ;;
+
+  let lookup_slot t k ~finished ~continue = 
+    t.lookup_chain k 
+      ~continue
+      (fun slot ~continue ->
+        match t.table.(slot) with
+        | Empty       -> continue ()
+        | Lookup_only (k',v) 
+        | Full (k',v) -> 
+          if Key.equal k' k then finished (slot, v)
+          else continue ())
+  ;;    
+
+  let lookup t k = 
+    lookup_slot t k 
+      ~finished:(fun (_,v) -> Some v)
+      ~continue:(fun () -> None)
+  ;;
+
+  let resize t replace =
+    Printf.eprintf "resize called on table with %i elements, %i, load %f\n%!" 
+      t.length 
+      (Array.length t.table) 
+      (float_of_int t.length /. float_of_int (Array.length t.table) *. 100.);
+    let new_length           = Array.length t.table * 2 in
+    let new_table            = Array.create new_length Empty in
+    let new_insert_chain k f = make_chain new_length k f in
+    let lookup_chain k f ~continue = 
+      let continue () = new_insert_chain k f ~continue in
+      t.lookup_chain k f ~continue
+    in
+    let t' = {
+        table = new_table;
+        length = 0;
+        lookup_chain = lookup_chain;
+        insert_chain = new_insert_chain;
+      } 
+    in
+    Array.iteri (fun i _ -> 
+      match t.table.(i) with
+      | Empty | Lookup_only _ -> ()
+      | Full (k,v) -> new_table.(i) <- Lookup_only (k,v)) t.table;
+    Array.iteri (fun i _ -> 
+      match t.table.(i) with
+      | Lookup_only (k, v) -> replace t' ~key:k ~data:v
+      | Empty | Full _ -> ()) t.table;
+    let lookup_chain k f ~continue = 
+      let continue () = new_insert_chain k f ~continue in
+      new_insert_chain k f ~continue
+    in
+    t.table        <- new_table;
+    t.lookup_chain <- lookup_chain;
+    t.insert_chain <- new_insert_chain
+  ;;
+
+  let rec try_move t slot =
+    match t.table.(slot) with
+    | Empty      -> true
+    | Lookup_only (k,v) | Full (k,v) -> 
+      t.insert_chain 
+        ~continue:(fun () -> false)
+        k (fun new_slot ~continue ->
+          match t.table.(new_slot) with
+          | Empty -> 
+            t.table.(new_slot) <- Full (k,v);
+            true
+          | Lookup_only _ | Full _ -> continue ())
+  ;;
+
+  let rec replace t ~key ~data : unit =
+    lookup_slot t key
+      ~finished:(fun (slot,_) -> 
+        t.length       <- t.length - 1;
+        t.table.(slot) <- Empty)
+      ~continue:(fun () -> ());
+    t.insert_chain key (fun slot ~continue ->
+      let insert slot =
+        t.length       <- t.length + 1;
+        t.table.(slot) <- Full (key,data);
+      in
+      match t.table.(slot) with
+      | Empty  -> insert slot
+      | Lookup_only (k,v) ->
+        t.table.(slot) <- Empty;
+        t.length <- t.length - 1;
+        replace t ~key:k ~data:v;
+        if t.table.(slot) = Empty then insert slot
+        else continue ()
+      | Full _ ->
+        if try_move t slot then insert slot
+        else continue ())
+      ~continue:(fun () ->
+        resize t replace; 
+        replace t ~key ~data)
+  ;;
+
+  let iter t f =
+    Array.iter (function
+      | Empty -> ()
+      | Lookup_only (key,data) | Full (key,data) -> f ~key ~data) t.table
+end
+
+module Cuckoo_int = struct
+  type t = int
+
+  let equal (t1:int) (t2:int) = t1 = t2
+
+  (* Knuth's simple hash *)
+  let hash1 v = abs (2654435761 * v)
+
+  (* Jenkins one-at-a-time hash *)
+  let hash2 v =
+    let hash = ref 0 in
+    for i = 0 to 7 do
+      let shift = i * 8 in
+      hash := !hash + (((0xFF lsl shift) land v) lsr shift);
+      hash := !hash + (!hash lsl 10);
+      hash := !hash lxor (!hash lsr 6)
+    done;
+    hash := !hash + (!hash lsl 3);
+    hash := !hash lxor (!hash lsr 11);
+    hash := !hash + (!hash lsl 15);
+    abs !hash
+end
+
+let main () = 
+  Random.init 1234;
+  let module Table = Cuckoo_table(Cuckoo_int) in
+  let h = Table.create () in
+  (*let rh = Hashtbl.create 1 in*)
+  let rec loop n =
+    if n <= 0 then ()
+    else begin
+      let key = Random.int 100_000_000 in
+      Table.replace h ~key ~data:true;
+      (*Hashtbl.replace rh key true;*)
+      (*assert (Some (Hashtbl.find rh key) = Table.lookup h key);*)
+      loop (n - 1)
+    end
+  in
+  loop 10_000_000;
+  (*let rec loop n =
+    if n <= 0 then ()
+    else begin
+      [>ignore (
+        try Hashtbl.find rh (Random.int 100_000_000) with | Not_found -> true);<]
+      ignore (Table.lookup h (Random.int 100_000_000));
+      loop (n - 1)
+    end
+  in
+  loop 10_000_000;*)
+;;
+
+let () = main ()
+
+
+/* http://www.azillionmonkeys.com/qed/hash.html */
+
+#include "stdio.h"
+#include "stdint.h" 
+
+#include "mlvalues.h"
+#include "custom.h"
+#include "memory.h"
+
+#undef get16bits
+#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \
+  || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__)
+#define get16bits(d) (*((const uint16_t *) (d)))
+#endif
+
+#if !defined (get16bits)
+#define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8)\
+                       +(uint32_t)(((const uint8_t *)(d))[0]) )
+#endif
+
+uint32_t SuperFastHash (const char * data, int len) {
+  uint32_t hash = len, tmp;
+  int rem;
+
+  if (len <= 0 || data == NULL) return 0;
+
+  rem = len & 3;
+  len >>= 2;
+
+  /* Main loop */
+  for (;len > 0; len--) {
+      hash  += get16bits (data);
+      tmp    = (get16bits (data+2) << 11) ^ hash;
+      hash   = (hash << 16) ^ tmp;
+      data  += 2*sizeof (uint16_t);
+      hash  += hash >> 11;
+  }
+
+  /* Handle end cases */
+  switch (rem) {
+      case 3: hash += get16bits (data);
+              hash ^= hash << 16;
+              hash ^= data[sizeof (uint16_t)] << 18;
+              hash += hash >> 11;
+              break;
+      case 2: hash += get16bits (data);
+              hash ^= hash << 11;
+              hash += hash >> 17;
+              break;
+      case 1: hash += *data;
+              hash ^= hash << 10;
+              hash += hash >> 1;
+  }
+
+  /* Force "avalanching" of final 127 bits */
+  hash ^= hash << 3;
+  hash += hash >> 5;
+  hash ^= hash << 4;
+  hash += hash >> 17;
+  hash ^= hash << 25;
+  hash += hash >> 6;
+
+  return hash;
+}
+
+/* adds an 8 byte value to data and returns the position of the next empty spot in data */
+static int fast_hash_add(char data[], int pos, uintnat add) {
+  /* CR fix this to handle 32 and 64 bit architectures */
+  data[pos]     = (add >> 56) & 0xFFFF;
+  data[pos + 1] = (add >> 48) & 0xFFFF;
+  data[pos + 2] = (add >> 40) & 0xFFFF;
+  data[pos + 3] = (add >> 32) & 0xFFFF;
+  data[pos + 4] = (add >> 24) & 0xFFFF;
+  data[pos + 5] = (add >> 16) & 0xFFFF;
+  data[pos + 6] = (add >> 8) & 0xFFFF;
+  data[pos + 7] = add & 0xFFFF;
+
+  return (pos + 8);
+}
+
+/* modified form of the traversal function used by caml's own hash.  Adds bytes 
+ * from the traversal of obj to data until all of obj has been traversed, or 
+ * max_length bytes have been added.  data must have room for at least max_length
+ * bytes + 8. */
+static int fast_hash_traverse(value obj, char data[], int pos, int max_length)
+{
+  uintnat l;
+  unsigned char * p;
+  mlsize_t i, j;
+
+  if (pos >= max_length - 1) {
+    return pos;
+  };
+
+  if (Is_long(obj)) {
+    pos = fast_hash_add(data, pos, Long_val(obj));
+    return pos;
+  }
+
+  /* Pointers into the heap are well-structured blocks. So are atoms.
+     We can inspect the block contents. */
+
+  // if (Is_in_value_area (obj)) {
+    switch (Tag_val(obj)) {
+      case String_tag:
+        i = caml_string_length(obj);
+        for (p = &Byte_u(obj, 0); i > 0 && pos < max_length; pos++, i--, p++) {
+          data[pos] = *p;
+        };
+        break;
+      case Double_tag:
+        /* For doubles, we inspect their binary representation, LSB first.
+          The results are consistent among all platforms with IEEE floats. */
+#ifdef ARCH_BIG_ENDIAN
+        for (p = &Byte_u(obj, sizeof(double) - 1), i = sizeof(double);
+            i > 0 && pos < max_length;
+            pos++, p--, i--)
+#else
+        for (p = &Byte_u(obj, 0), i = sizeof(double);
+            i > 0 && pos < max_length;
+            pos++, p++, i--)
+#endif
+          data[pos] = *p;
+        break;
+      case Double_array_tag:
+        for (j = 0; j < Bosize_val(obj); j += sizeof(double)) {
+#ifdef ARCH_BIG_ENDIAN
+        for (p = &Byte_u(obj, j + sizeof(double) - 1), i = sizeof(double);
+            i > 0 && pos < max_length;
+            pos++, p--, i--)
+#else
+        for (p = &Byte_u(obj, j), i = sizeof(double);
+            i > 0 && pos < max_length;
+            pos++, p++, i--)
+#endif
+          data[pos] = *p;
+        }
+        break;
+      case Abstract_tag:
+        /* We don't know anything about the contents of the block.
+          Better do nothing. */
+        break;
+      case Infix_tag:
+        pos = fast_hash_traverse(obj - Infix_offset_val(obj), data, pos, max_length);
+        break;
+      case Forward_tag:
+        pos = fast_hash_traverse(Forward_val (obj), data, pos, max_length);
+      case Object_tag:
+        pos = fast_hash_add(data, pos, Oid_val(obj));
+        break;
+      case Custom_tag:
+        /* If no hashing function provided, do nothing */
+        if (Custom_ops_val(obj)->hash != NULL) {
+          pos = fast_hash_add(data, pos, Custom_ops_val(obj)->hash(obj));
+        }
+        break;
+      default:
+        data[pos] = Tag_val(obj);
+        pos++;
+        i = Wosize_val(obj);
+        while (i != 0 && pos <= max_length - 1) {
+          i--;
+          pos = fast_hash_traverse(Field(obj, i), data, pos, max_length);
+        };
+        break;
+    }
+    return pos;
+  // }
+
+  /* Otherwise, obj is a pointer outside the heap, to an object with
+     a priori unknown structure. Use its physical address as hash key. */
+  //return (fast_hash_add (data, pos, (intnat) obj)); 
+}
+
+value fast_hash(value obj) {
+  static max_length = 30;
+  char data[max_length + 8];
+  int pos;
+
+  pos = fast_hash_traverse(obj, data, 0, max_length);
+  return Val_int(SuperFastHash(data, pos));
+}
+(* Left leaning red-black trees in OCaml.  Ideas and straight code transliteration have
+   been from:
+
+     Left Leaning Red-Black Trees (Sedgewick):
+       http://www.cs.princeton.edu/~rs/talks/LLRB/LLRB.pdf
+
+     Improvments to insertion pattern matching (Yamamoto, Hirai):
+       http://www.mew.org/~kazu/proj/red-black-tree/
+*)
+
+module List = ListLabels
+
+type 'a t =
+  | Red of 'a t * 'a * 'a t
+  | Black of 'a t * 'a * 'a t
+  | Leaf
+
+let empty = Leaf
+
+let balance_left t =
+  match t with
+  | Black (Red (Red (a, x, b), y, c), z, d) -> Red (Black (a, x, b), y, Black (c, z, d))
+  | _ -> t
+;;
+
+let balance_right t =
+  match t with
+  | Black (Red (a, x, b), y, Red (c, z, d)) -> Red (Black (a, x, b), y, Black (c, z, d))
+  | Red (x, y, Red (c, z, d)) -> Red (Red (x, y, c), z, d)
+  | Black (x, y, Red (c, z, d)) -> Black (Red (x, y, c), z, d)
+  | _ -> t
+;;
+
+let rec insert' t v =
+  match t with
+  | Leaf -> Red (Leaf, v, Leaf)
+  (* these two cases are almost identical, but need to be split out because we use the
+     variant constructor to hold the color. *)
+  | Black (l, v', r) ->
+    let c = compare v v' in
+    if c < 0 then
+      balance_left (Black (insert' l v, v', r))
+    else if c > 0 then
+      balance_right (Black (l, v', insert' r v))
+    else
+      t
+  | Red (l, v', r) ->
+    let c = compare v v' in
+    if c < 0 then
+      balance_left (Red (insert' l v, v', r))
+    else if c > 0 then
+      balance_right (Red (l, v', insert' r v))
+    else
+      t
+
+let insert t v =
+  match insert' t v with
+  | Black _ as t  -> t
+  | Red (l, v, r) -> Black (l, v, r)
+  | Leaf           -> assert false
+;;
+
+let rec find t v =
+  match t with
+  | Leaf -> None
+  | Red (l, v', r)
+  | Black (l, v', r) ->
+    let c = compare v v' in
+    if c < 0 then find l v
+    else if c > 0 then find r v
+    else Some v'
+;;
+
+let flip_color t =
+  match t with
+  | Red (l, v, r) -> Black (l, v, r)
+  | Black (l, v, r) -> Red (l, v, r)
+  | Nil -> assert false
+;;
+
+let delete t v =
+
+  private Node moveRedLeft(Node h)
+{
+   colorFlip(h);
+   if (isRed(h.right.left))
+   {
+      h.right = rotateRight(h.right);
+      h = rotateLeft(h);
+      colorFlip(h);
+}
+return h; }
+private Node moveRedRight(Node h)
+{
+    colorFlip(h);
+    if (isRed(h.left.left))
+    {
+       h = rotateRight(h);
+       colorFlip(h);
+    }
+return h; }
+public void delete(Key key)
+{
+   root = delete(root, key);
+   root.color = BLACK;
+}
+private Node delete(Node h, Key key)
+{
+    if (key.compareTo(h.key) < 0)
+        {
+            if (!isRed(h.left) && !isRed(h.left.left))
+                h = moveRedLeft(h);
+            h.left =  delete(h.left, key);
+        }
+else {
+if (isRed(h.left))
+    h = rotateRight(h);
+if (key.compareTo(h.key) == 0 && (h.right == null))
+    return null;
+if (!isRed(h.right) && !isRed(h.right.left))
+    h = moveRedRight(h);
+if (key.compareTo(h.key) == 0)
+    {
+        h.val = get(h.right, min(h.right).key);
+        h.key = min(h.right).key;
+        h.right = deleteMin(h.right);
+    }
+else h.right = delete(h.right, key);
+}
+    return fixUp(h);
+}
+
+
+let main () =
+  Random.self_init ();
+  let vals = ref [] in
+  let rec loop t n =
+    if n = 0 then t
+    else begin
+      let v = Random.int 1_000_000 in
+      vals := v :: !vals;
+      loop (insert t v) (n - 1)
+    end
+  in
+  let t = loop empty 1_000_000 in
+  List.iter !vals ~f:(fun v ->
+    match find t v with
+    | None -> assert false
+    | Some _ -> ())
+;;
+
+let () = main ()

medium/doubly_linked.ml

+module Doubly_linked = struct
+  type 'a node = {
+    mutable left  : 'a t;
+    mutable right : 'a t;
+    value         : 'a
+  }
+  and type 'a t =
+    | Cons of 'a node
+    | Nil
+
+  let empty = Nil
+
+  let singleton v = Cons {left = Nil; right = Nil; value = v}
+  
+  let cons t v =
+    match t with
+    | Cons {left = left} ->
+      assert (!left = Nil);
+      left := Cons {left = nil, right = t, value = v}
+    | Nil -> singleton v
+
+  let to_list t =
+    let rec loop acc t =
+      match t with
+      | Nil -> List.rev acc
+      | Cons {right = right; value = v} -> loop (v :: acc) right
+    in
+    loop [] t
+
+  let insert t dir v =
+    match t with
+    | Nil -> singleton v
+    | Cons node ->
+      begin match dir with
+      | `right ->
+        let new_node = Cons {left = t; right = right; value = v} in
+        right := node;
+        begin match !right with
+        | Nil -> ()
+        | Cons {left = left} ->
+          assert (left == t);
+          left := node;
+        end
+      | `left ->
+        let node = Cons (ref !left, v, ref t) in
+        left := node;
+        begin match !left with
+        | Nil -> ()
+        | Cons (_, _, right) ->
+          assert (!right == t);
+          right := node
+        end
+      end
+
+  let replace t v =
+    match t with
+    | Nil -> singleton v
+    | Cons (left, _, right) ->
+      let node = Cons (ref !left, v, ref !right) in
+      begin match !left with
+      | Nil -> ()
+      | Cons (_, _, right) -> right := node
+      end;
+      begin match !right with
+      | Nil -> ()
+      | Cons (left, _, _) -> left := node
+      end
+
+  let rec fold t ~init ~f =
+    match t with
+    | Nil -> init
+    | Cons (_, v, right) -> fold !right ~init:(f init v) ~f
+
+  let iter t ~f = fold t ~init:() ~f
+end          

medium/functional_array.ml

+module Array = ArrayLabels
+module List = ListLabels
+
+(*
+module Fixed_array : sig
+  type 'a t
+  val of_array : 'a array -> 'a t
+  val of_array_plus_one : 'a array -> 'a -> 'a t
+  val get : 'a t -> int -> 'a
+  val set : 'a t -> int -> 'a -> 'a t
+end = struct
+  type 'a t = 
+
+  let get (t:'a t) i : 'a =
+    assert (i >= 0 && i <= 31);
+    Obj.obj (Obj.field (Obj.repr t) i)
+  ;;
+
+  let set (t:'a t) i (v:'a) : 'a t =
+    assert (i >= 0 && i <= 31);
+    let t1 = Obj.dup (Obj.repr t) in
+    Obj.set_field t1 i (Obj.repr v);
+    Obj.obj t1
+  ;;
+end
+module FA = Fixed_array
+*)
+
+module Tree = struct
+  type 'a t = 
+    | Leaf of 
+       'a * 'a * 'a * 'a * 'a * 'a * 'a * 'a * 
+       'a * 'a * 'a * 'a * 'a * 'a * 'a * 'a * 
+       'a * 'a * 'a * 'a * 'a * 'a * 'a * 'a * 
+       'a * 'a * 'a * 'a * 'a * 'a * 'a * 'a
+    | Fixed_tree of 
+       'a t * 'a t * 'a t * 'a t * 'a t * 'a t * 'a t * 'a t * 
+       'a t * 'a t * 'a t * 'a t * 'a t * 'a t * 'a t * 'a t * 
+       'a t * 'a t * 'a t * 'a t * 'a t * 'a t * 'a t * 'a t * 
+       'a t * 'a t * 'a t * 'a t * 'a t * 'a t * 'a t * 'a t
+    | Tree of 'a t array
+
+  let leaf_get l i =
+    assert (match l with Leaf _ -> true | _ -> false);
+    assert (i >= 0 && i <= 31);
+    Obj.obj (Obj.field (Obj.repr l) i)
+  ;;
+
+  let fixed_tree_get l i =
+    assert (match l with Fixed_tree _ -> true | _ -> false);
+    assert (i >= 0 && i <= 31);
+    Obj.obj (Obj.field (Obj.repr l) i)
+  ;;
+
+  let leaf_set l i v : 'a t =
+    assert (match l with Leaf _ -> true | _ -> false);
+    assert (i >= 0 && i <= 31);
+    let t1 = Obj.dup (Obj.repr l) in
+    Obj.set_field t1 i (Obj.repr v);
+    Obj.obj t1 
+  ;;
+
+  let fixed_tree_set l i v : 'a t =
+    assert (match l with Fixed_tree _ -> true | _ -> false);
+    assert (i >= 0 && i <= 31);
+    let t1 = Obj.dup (Obj.repr l) in
+    Obj.set_field t1 i (Obj.repr v);
+    Obj.obj t1
+  ;;
+
+  let tree_get t i =
+    Array.get t i
+  ;;
+
+  let tree_set t i v =
+    Array.set t i v
+  ;;
+
+
+  let leaf_of_array arr =
+    assert (Array.length arr = 32);
+    Leaf (arr.(0), arr.(1), arr.(2), arr.(3), arr.(4), arr.(5), arr.(6), arr.(7), 
+          arr.(8), arr.(9), arr.(10), arr.(11), arr.(12), arr.(13), arr.(14), 
+          arr.(15), arr.(16), arr.(17), arr.(18), arr.(19), arr.(20), arr.(21), 
+          arr.(22), arr.(23), arr.(24), arr.(25), arr.(26), arr.(27), arr.(28), 
+          arr.(29), arr.(30), arr.(31))
+  ;;
+
+  let leaf_of_array_plus_one arr v =
+    assert (Array.length arr = 31);
+    Leaf (arr.(0), arr.(1), arr.(2), arr.(3), arr.(4), arr.(5), arr.(6), arr.(7), 
+          arr.(8), arr.(9), arr.(10), arr.(11), arr.(12), arr.(13), arr.(14), 
+          arr.(15), arr.(16), arr.(17), arr.(18), arr.(19), arr.(20), arr.(21), 
+          arr.(22), arr.(23), arr.(24), arr.(25), arr.(26), arr.(27), arr.(28), 
+          arr.(29), arr.(30), v)
+  ;;
+
+  let fixed_tree_of_array arr =
+    assert (Array.length arr = 32);
+    Fixed_tree (arr.(0), arr.(1), arr.(2), arr.(3), arr.(4), arr.(5), arr.(6), arr.(7), 
+            arr.(8), arr.(9), arr.(10), arr.(11), arr.(12), arr.(13), arr.(14), 
+            arr.(15), arr.(16), arr.(17), arr.(18), arr.(19), arr.(20), arr.(21), 
+            arr.(22), arr.(23), arr.(24), arr.(25), arr.(26), arr.(27), arr.(28), 
+            arr.(29), arr.(30), arr.(31))
+  ;;
+
+  let fixed_tree_of_array_plus_one arr v =
+    assert (Array.length arr = 31);
+    Fixed_tree (arr.(0), arr.(1), arr.(2), arr.(3), arr.(4), arr.(5), arr.(6), arr.(7), 
+          arr.(8), arr.(9), arr.(10), arr.(11), arr.(12), arr.(13), arr.(14), 
+          arr.(15), arr.(16), arr.(17), arr.(18), arr.(19), arr.(20), arr.(21), 
+          arr.(22), arr.(23), arr.(24), arr.(25), arr.(26), arr.(27), arr.(28), 
+          arr.(29), arr.(30), v)
+  ;;
+end
+module T = Tree
+
+exception Out_of_bounds
+
+type 'a t = {
+  length      : int;
+  height      : int;
+  tail        : 'a array option;
+  tail_offset : int;
+  root        : 'a Tree.t option
+}
+
+let empty = 
+  {
+    length      = 0;
+    height      = 0;
+    tail        = None;
+    tail_offset = 0;
+    root        = None
+  }
+;;
+
+let array_append arr v : 'a array =
+  let length = Array.length arr in
+  let new_arr = Array.make (length + 1) arr.(0) in
+  Array.blit ~src:arr ~src_pos:0 ~dst:new_arr ~dst_pos:0 ~len:length;
+  new_arr.(length) <- v;
+  assert (not (new_arr.(length) == new_arr.(length - 1)));
+  new_arr
+;;
+
+let array_replace arr pos v : 'a array =
+  let length = Array.length arr in
+  let new_arr = Array.make length arr.(0) in
+  Array.blit ~src:arr ~src_pos:0 ~dst:new_arr ~dst_pos:0 ~len:length;
+  new_arr.(pos) <- v;
+  new_arr
+;;
+
+let get t i =
+  if i < 0 || i >= t.length then raise Out_of_bounds;
+  if i >= t.tail_offset then begin
+    match t.tail with
+    | None -> assert false
+    | Some arr -> arr.(i land 0x01f)
+  end else begin
+    let rec loop tree shift =
+      assert (shift >= 0);
+      match tree with
+      | T.Tree arr -> 
+          loop arr.((i lsr shift) land 0x01f) (shift - 5)
+      | T.Fixed_tree _ as t ->
+          loop (T.fixed_tree_get t ((i lsr shift) land 0x01f)) (shift - 5)
+      | T.Leaf _ as t -> 
+          assert (shift = 0);
+          T.leaf_get t (i land 0x01f)
+    in
+    match t.root with
+    | None -> assert false
+    | Some tree -> loop tree (5 * t.height)
+  end
+;;
+
+let set t i v =
+  if i < 0 || i >= t.length then raise Out_of_bounds;
+  if i >= t.tail_offset then begin
+    match t.tail with
+    | None -> assert false
+    | Some arr -> {t with tail = Some (array_replace arr (i land 0x01f) v)}
+  end else begin
+    let rec loop tree shift =
+      assert (shift >= 0);
+      match tree with
+      | T.Tree arr -> 
+          let index = (i lsr shift) land 0x01f in
+          T.Tree (array_replace arr index (loop arr.(index) (shift - 5)))
+      | T.Fixed_tree _ as t ->
+          let index = (i lsr shift) land 0x01f in
+          T.fixed_tree_set t index (loop (T.fixed_tree_get t index) (shift - 5))
+      | T.Leaf _ as t -> 
+          assert (shift = 0);
+          T.leaf_set t (i land 0x01f) v
+    in
+    match t.root with
+    | None -> assert false
+    | Some tree -> {t with root = Some (loop tree (5 * t.height))}
+  end
+;;
+
+let append t elem =
+  match t.tail with
+  | None -> 
+      {t with 
+        tail = Some [| elem |];
+        length = t.length + 1
+      }
+  | Some tail ->
+      let tail_length = Array.length tail in
+      if tail_length < 31 then begin
+        let new_tail = array_append tail elem in
+        {t with 
+          tail = Some new_tail; 
+          length = t.length + 1
+        }
+      end else begin
+        let new_tail = array_append tail elem in
+        let rec extend_branch height =
+          if height = 0 then T.leaf_of_array new_tail
+          else T.Tree [| extend_branch (height - 1) |]
+        in
+        let rec append_tail current_height tree =
+          match tree with
+          | T.Tree arr -> 
+              let length = Array.length arr in
+              begin match arr.(length - 1) with
+              | T.Leaf _ ->
+                  if length < 31 then 
+                    Some (T.Tree (array_append arr (T.leaf_of_array new_tail)))
+                  else if length = 31 then
+                    Some (T.fixed_tree_of_array_plus_one arr 
+                      (T.leaf_of_array new_tail))
+                  else None
+              | (T.Tree _ | T.Fixed_tree _) as next ->
+                  match append_tail (current_height - 1) next with
+                  | None -> 
+                      if length < 31 then
+                        Some (T.Tree (array_append arr 
+                          (extend_branch (current_height - 1))))
+                      else if length = 31 then
+                        Some (T.fixed_tree_of_array_plus_one arr
+                          (extend_branch (current_height - 1)))
+                      else None
+                  | Some new_tree -> 
+                      Some (T.Tree (array_replace arr (length - 1) new_tree))
+              end
+          | T.Fixed_tree _ as ft ->
+              begin match T.fixed_tree_get ft 31 with 
+              | T.Leaf _ -> None
+              | (T.Tree _ | T.Fixed_tree _) as next ->
+                  match append_tail (current_height - 1) next with
+                  | None -> None
+                  | Some new_tree -> 
+                      Some (T.fixed_tree_set ft 31 new_tree)
+              end
+          | T.Leaf _ -> assert false
+        in
+        match t.root with
+        | None -> 
+            {
+              root = Some (T.Tree [| (T.leaf_of_array new_tail) |]);
+              length = t.length + 1;
+              tail = None;
+              tail_offset = t.tail_offset + 32;
+              height = 1;
+            }
+        | Some tree ->
+            let new_root,new_height =
+              match append_tail t.height tree with
+              | Some nt -> nt, t.height
+              | None -> T.Tree [| tree; extend_branch t.height |], t.height + 1
+            in
+            {
+              root = Some new_root;
+              height = new_height;
+              length = t.length + 1;
+              tail = None;
+              tail_offset = t.tail_offset + 32
+            }
+      end
+;;
+
+let iteri t f =
+  let rec loop n =
+    if n < t.length then begin
+      f (get t n) n;
+      loop (n + 1)
+    end
+  in
+  loop 0
+;;
+
+let iter t f =
+  let f' v _ = f v in
+  iteri t f'
+;;
+
+let of_list l = List.fold_left l ~init:empty ~f:(fun t v -> append t v)
+
+let make_data n =
+  let rec loop n acc =
+    if n > 0 then loop (n - 1) (n :: acc)
+    else acc
+  in
+  loop n []
+;;
+
+let time label f =
+  Gc.compact ();
+  let st = Unix.gettimeofday () in
+  for i = 1 to 10 do
+    f ()
+  done;
+  let et = Unix.gettimeofday () in
+  Printf.printf "%s: %f\n" label (et -. st)
+;;
+
+let test () =
+  let test_size = 1_000_000 in
+  let data = make_data test_size in
+  let t = of_list data in
+  let arr = Array.of_list data in
+  Array.iteri arr ~f:(fun i v -> assert ((get t i) = v));
+  let create_test () = of_list data in
+  let create_base () = Array.of_list data in
+  let iter_test () = iter t (fun _ -> ()) in
+  let iter_base () = Array.iter arr ~f:(fun _ -> ()) in
+  let random_indexes = 
+    let acc = ref [] in
+    for i = 1 to 20_000 do
+      acc := Random.int test_size :: !acc
+    done;
+    !acc
+  in
+  let get_test () = List.iter random_indexes ~f:(fun i -> ignore (get t i)) in
+  let get_base () = List.iter random_indexes ~f:(fun i -> ignore arr.(i)) in
+  let set_test () = 
+    let (_ : int t) = List.fold_left random_indexes ~init:t ~f:(fun t' i -> set t' i 7) in
+    ()
+  in
+  let set_base () =
+    List.iter random_indexes ~f:(fun i -> arr.(i) <- 7)
+  in
+  time "create_test" create_test;
+  time "create_base" create_base;
+  time "iter_test" iter_test;
+  time "iter_base" iter_base;
+  time "get_test" get_test;
+  time "get_base" get_base;
+  time "set_test" set_test;
+  time "set_base" set_base
+;;
+
+let () = test ()

medium/functional_array.mli

+type 'a t
+
+val empty : 'a t
+val get : 'a t -> int -> 'a
+val append : 'a t -> 'a -> 'a t
+val set : 'a t -> int -> 'a -> 'a t
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.