Commits

Sébastien Ferré committed fe147dc

simplifications about the definition of links.

  • Participants
  • Parent commits 0c4d665

Comments (0)

Files changed (1)

File suffix_tree.ml

-let taille_alphabet = 4
-
 
 (* type of nodes in suffix trees *)
 type node = {
     seqid : int; (* sequence index in which the positions start and final are defined *)
     mutable start : int;     (* start and final position of the word labelling the node *)
     final : int ref;
-    mutable link : node option; (* suffix link *)
+    mutable link : node; (* suffix link *)
     v : node_value
   }
 and node_value =
       (* for the key '\000', all values are relevant (use Hashtbl.find_all) *)
   | Index of int (* for leaves: position of recognized suffix *)
 
-
 (* type of suffix trees *)
 type t = string array * node
 
 (* the initial root node *)
 let empty : unit -> node =
-  fun () -> {seqid= -1; start=0; final=ref (-1); link=None; v=Children (Hashtbl.create taille_alphabet)}
+  fun () ->
+    let rec root = {seqid= -1; start=0; final=ref (-1); link=root; v=Children (Hashtbl.create 2)} in
+    root
 
 
 (* --------------------------------------------------------------------------------
    creation of new nodes and leaves
    -------------------------------- *)
 
-let add_leaf seqar node seqid start final_ref index =
+let add_leaf (seqar,root) node seqid start final_ref index =
   match node.v with
   | Children h ->
       Hashtbl.add h
 	seqar.(seqid).[start]
-	{seqid=seqid; start=start; final=final_ref; link=None; v=(Index index)}
+	{seqid=seqid; start=start; final=final_ref; link=root; v=(Index index)}
   | Index _ -> raise (Invalid_argument "Suffix_tree.add_leaf: 2nd argument must not be a leaf")
 
 (* make explicit an implicit node by inserting a new node between [explicit] and [child] *)
-let insert_node seqar (explicit,implicit,child) =
+let insert_node (seqar,root) (explicit,implicit,child) =
   let a = subseq_length implicit in
   if a = 0
   then explicit
 	  seqid = child.seqid;
 	  start = child.start;
 	  final = ref (child.start+a-1);
-	  link = None;
+	  link = root;
 	  v = Children (let h' = Hashtbl.create (Hashtbl.length h) in Hashtbl.add h' c_child_new child; h')
 	} in
 	child.start <- child.start+a;
 
 (* add a suffix link from [pred_opt] (if defined) to [explicit] *)
 let add_link root pred_opt explicit =
-  if explicit != root then (* create a new suffix link *)
+  (*if explicit != root then*) (* create a new suffix link *)
     match pred_opt with
-    | Some n -> if n.link = None then n.link <- Some explicit
+    | Some n -> (*if n.link = None then*) n.link <- explicit
     | None -> ()
 
 (* ------------ 
    ------------ *)
 
 (* get the node refered by the suffix link at [n] *)
+(*
 let suffix_link (root : node) (n : node) : node =
   match n.link with
   | None -> root  (* by default, the suffix link points to the root node *)
   | Some n' -> n'
+*)
 
 (* extend suffix_link for implicit nodes *)
 let link (seqar,root) = function  (* TODO *)
   | (explicit,implicit,_) when subseq_is_empty implicit ->
-      let explicit' = suffix_link root explicit in
+      let explicit' = explicit.link (*suffix_link root explicit*) in
       (explicit', subseq_empty, explicit')
   | (explicit,implicit,_) ->
       if explicit == root
 	let implicit' = subseq_sub implicit 1 (subseq_length implicit - 1) in
 	canonical seqar (root, implicit', get_child seqar (root,implicit'))
       else
-	let explicit' = suffix_link root explicit in
+	let explicit' = explicit.link (*suffix_link root explicit*) in
 	canonical seqar (explicit', implicit, get_child seqar (explicit',implicit))
 
 (* --------------------------------------------------------------
       add_link root pred_opt explicit;
       res.startnode <- canonical seqar extended_startnode
   | None -> (* startnode cannot be extended by [c] ... *)
-      let n' = insert_node seqar res.startnode in (* ... so we insert a new node ... *)
+      let n' = insert_node (seqar,root) res.startnode in (* ... so we insert a new node ... *)
       add_link root pred_opt n';  (* ... a suffix link from the last created node (if defined) ... *)
       if seqar.(k).[res.startj] <> '\000' then
-	add_leaf seqar n' k i res.terminal res.startj;  (* ... and a new leaf for the suffix at position [res.startj] *)
+	add_leaf (seqar,root) n' k i res.terminal res.startj;  (* ... and a new leaf for the suffix at position [res.startj] *)
       res.startj <- res.startj + 1; (* prepare for the next suffix *)
       if not (is_root root res.startnode)
       then begin (* while [res.startnode] is not the root, and cannot be extended by [c] ... *)
   | Index i -> (node.seqid, i)
 
 let linked_node (seqar,root : t) (n : node) : node =
-  suffix_link root n
+  n.link (*suffix_link root n*)
 
 let rec implicit_node (seqar,node : t) (word : string) =
   let (explicit, (s,i,len), child) = implicit_node_aux (seqar,node) (word,0,String.length word) in
       if l=[]
       then Leaf (w, index gst n)
       else Node (w, l))
+
+(* applications of suffix trees *)
+
+let exact_matches : t -> string -> (int * int) list =
+  fun gst word ->
+    try
+      let explicit, implicit, child = implicit_node gst word in
+    fold_s_node gst
+	(fun l n -> if l=[] then [index gst n] else List.concat l)
+	child
+    with Not_found -> []
+