Commits

Sébastien Ferré  committed d3620a6

Initial revision

  • Participants
  • Parent commits fdf2472

Comments (0)

Files changed (1)

File persintset.ml

+
+open Persindex
+
+module Make (Intset2 : Intset.T) =
+  struct
+    let chunk = 1024
+
+    let lazy_val = Lazy.lazy_from_val
+    let force = Lazy.force
+
+    module M (* : Intset.T with type t = (int * Intset2.t Lazy.t) list *) =
+      struct
+	type t = (int * Intset2.t Lazy.t) list
+	      (* sorted in decreasing order of first part *)
+	      (* the second part contains only integers x, s.t. x/chunk = first part *)
+	      (* trying to avoid empty intsets as second part *)
+	      
+	let empty = []
+	    
+	let is_empty e = e = []
+
+	let cardinal e =
+	  List.fold_left
+	    (fun res (p,e2) -> res + Intset2.cardinal (force e2))
+	    0 e
+
+	let mem x =
+	  let x1 = x / chunk in
+	  let rec aux = function
+	    | [] -> false
+	    | (p,e2)::l ->
+		if x1 < p then aux l
+		else if x1 = p then Intset2.mem x (force e2)
+		else (* x1 > p *) false
+	  in
+	  aux
+
+	let singleton x =
+	  let e2 = Intset2.singleton x in
+	  [(x / chunk, lazy_val e2)]
+
+	let add x =
+	  let x1 = x / chunk in
+	  let rec aux pe =
+	    match pe with
+	    | [] -> [(x1,lazy_val (Intset2.singleton x))]
+	    | (pos,e2 as pe2)::l ->
+		if x1 < pos then pe2 :: aux l
+		else if x1 = pos then
+		  (pos,lazy_val (Intset2.add x (force e2)))::l
+		else (* x1 > pos *)
+		  (x1,lazy_val (Intset2.singleton x))::pe
+	  in
+	  aux
+	    
+	let remove x =
+	  let x1 = x / chunk in
+	  let rec aux pe =
+	    match pe with
+	    | [] -> []
+	    | (pos,e2 as pe2)::l ->
+		if x1 < pos then
+		  pe2 :: aux l
+		else if x1 = pos then
+		  let new_e2 = Intset2.remove x (force e2) in
+		  if Intset2.is_empty new_e2
+		  then l
+		  else (pos,lazy_val (new_e2))::l
+		else (* x1 > pos *)
+		  pe
+	  in
+	  aux
+		    
+	let rec subset e f =
+	  match e, f with
+	  | [], _ -> true
+	  | _, [] -> false
+	  | (pos,e2)::l, (pos',e2')::l' ->
+	      if pos > pos' then
+		if Intset2.is_empty (force e2)
+		then subset l f
+		else false
+	      else if pos = pos' then
+		Intset2.subset (force e2) (force e2')
+	      else (* pos < pos' *)
+		subset e l'
+
+	let rec union e f =
+	  match e, f with
+	  | [], _ -> f
+	  | _, [] -> e
+	  | (pos,e2 as pe2)::l, (pos',e2' as pe2')::l' ->
+	      if pos > pos' then pe2::union l f
+	      else if pos = pos' then (pos,lazy_val (Intset2.union (force e2) (force e2')))::union l l'
+	      else (* pos < pos' *) pe2'::union e l'
+
+	let rec inter e f =
+	  match e, f with
+	  | [], _ -> []
+	  | _, [] -> []
+	  | (pos,e2)::l, (pos',e2')::l' ->
+	      if pos > pos' then
+		inter l f
+	      else if pos = pos' then
+		let inter_e2 = Intset2.inter (force e2) (force e2') in
+		if Intset2.is_empty inter_e2
+		then inter l l'
+		else (pos,lazy_val inter_e2)::inter l l'
+	      else (* pos < pos' *)
+		inter e l'
+
+	let rec diff e f =
+	  match e, f with
+	  | [], _ -> []
+	  | _, [] -> e
+	  | (pos,e2 as pe2)::l, (pos',e2')::l' ->
+	      if pos > pos' then
+		pe2::diff l f
+	      else if pos = pos' then
+		let new_e2 = Intset2.diff (force e2) (force e2') in
+		if Intset2.is_empty new_e2
+		then diff l l'
+		else (pos,lazy_val new_e2)::diff l l'
+	      else (* pos < pos' *)
+		diff e l'
+
+	let union_r le =
+	  List.fold_left union empty le
+
+	let inter_r = function
+	  | [] -> invalid_arg "inter_r : empty list of sets"
+	  | e::le -> List.fold_left inter e le
+
+	let rec fold f acc = function
+	  | [] -> acc
+	  | (pos,e2)::l ->
+	      fold f (Intset2.fold f acc (force e2)) l
+
+	let map f =
+	  let rec aux acc = function
+	    | [] -> acc
+	    | (pos,e2)::l ->
+		aux (Intset2.fold (fun res x -> f x::res) acc (force e2)) l
+	  in
+	  aux []
+
+	let rec iter f = function
+	  | [] -> ()
+	  | (pos,e2)::l ->
+	      Intset2.iter f (force e2);
+	      iter f l
+
+	let rec filter f = function
+	  | [] -> []
+	  | (pos,e2)::l ->
+	      let new_e2 = Intset2.filter f (force e2) in
+	      if Intset2.is_empty new_e2
+	      then filter f l
+	      else (pos,lazy_val new_e2)::filter f l
+
+	let rec elements = function
+	  | [] -> LSet.empty ()
+	  | (pos,e2)::l ->
+	      LSet.union (Intset2.elements (force e2)) (elements l)
+
+      end
+
+
+    type extid = int
+
+    type t = (int * extid) list
+
+    let empty : t = []
+
+    class c (name : string) (db : database) =
+      object (self)
+	val genid = new Genid.genid (name ^ ".genid") db
+	val id2ext : (extid,Intset2.t) index = new varray_vector_opt 13 chunk (fun id -> Intset2.empty) db
+
+	initializer
+	  id2ext # locate (name ^ "_extid") name "id2ext"
+
+	method sync =
+	  genid # sync;
+	  id2ext # sync
+
+	method unload (p : int) =
+	  id2ext # unload p
+
+	method is_empty : t -> bool =
+	  fun e ->
+	    e = []
+
+	method mem : int -> t -> bool =
+	  fun x ->
+	    let x1 = x / chunk in
+	    let rec aux = function
+	      | [] -> false
+	      | (p,id)::l ->
+		  if x1 < p then aux l
+		  else if x1 = p then Intset2.mem x (id2ext # get id)
+		  else (* x1 > p *) false
+	    in
+	    aux
+
+	method add : int -> t -> t =
+	  fun x ->
+	    let x1 = x / chunk in
+	    let rec aux pe =
+	      match pe with
+	      | [] ->
+		  let new_id = genid # alloc in
+		  id2ext # set new_id (Intset2.singleton x);
+		  (x1,new_id)::[]
+	      | (p,id as pid)::l ->
+		  if x1 < p then
+		    pid::aux l
+		  else if x1 = p then begin 
+		    id2ext # update id (Intset2.add x);
+		    pe end
+		  else (* x1 > p *) begin
+		    let new_id = genid # alloc in
+		    id2ext # set new_id (Intset2.singleton x);
+		    (x1,new_id)::pe end
+	    in
+	    aux
+
+	method remove : int -> t -> t =
+	  fun x ->
+	    let x1 = x / chunk in
+	    let rec aux pe =
+	      match pe with
+	      | [] -> []
+	      | (p,id as pid)::l ->
+		  if x1 < p then
+		    pid::aux l
+		  else if x1 = p then
+		    let ext = Intset2.remove x (id2ext # get id) in
+		    if Intset2.is_empty ext
+		    then begin id2ext # reset id; genid # free id; l end
+		    else begin id2ext # set id ext; pe end
+		  else (* x1 > p *)
+		    pe
+	    in
+	    aux
+
+	method inter : t -> M.t -> M.t =
+	  let rec aux e f =
+	    match e, f with
+	    | [], _ -> []
+	    | _, [] -> []
+	    | (pos,id)::l, (pos',e2')::l' ->
+		if pos > pos' then
+		  aux l f
+		else if pos = pos' then
+		  let inter_e2 = Intset2.inter (id2ext # get id) (force e2') in
+		  if Intset2.is_empty inter_e2
+		  then aux l l'
+		  else (pos,lazy_val inter_e2)::aux l l'
+		else (* pos < pos' *)
+		  aux e l'
+	  in
+	  aux
+
+
+	method as_ext : t -> M.t =
+	  List.map (fun (pos,id) -> (pos, lazy (id2ext # get id)))
+
+      end
+
+  end