Commits

Sébastien Ferré committed 740df39

Aggregations on measures (distinct count, index, sum, avg).

Comments (0)

Files changed (2)

     method refresh =
       answers <- history#current#answers;
       ignore (answers#page_top);
-      label_nb_answers#set_text (Printf.sprintf "%d answers" answers#count);
       self#refresh_aux
 
     method private refresh_aux =
 	    ();
 	let _ =
 	  List.fold_left
-	    ~f:(fun col v ->
+	    ~f:(fun (state,col) v ->
 	      let hidden = answers#get_hidden v in
 	      let order = answers#get_order v in
+	      let aggreg = answers#get_aggreg v in
 	      let menubar = GMenu.menu_bar () in
 	      let menubar_factory = new GMenu.factory menubar in
 	      let menu = menubar_factory#add_submenu v in
 	      GToolbox.build_menu menu
-		~entries:( `C ("hidden", hidden,
-			       (fun b -> answers#set_hidden v b; self#refresh_contents)) ::
-			   `S ::
-			   `R [ ("page rank", order = `PAGE_RANK,
-				 (function true -> answers#set_order v `PAGE_RANK; self#refresh_contents | _ -> ()));
-				("descending", order = `DESC,
-				 (function true -> answers#set_order v `DESC; self#refresh_contents | _ -> ()));
-				("ascending", order = `ASC,
-				 (function true -> answers#set_order v `ASC; self#refresh_contents | _ -> ())) ] ::
-			   (if col = 0
-			   then []
-			   else [`S; `I ("Move left", (fun () -> answers#move_column_left v; self#refresh_aux)) ]) );
+		~entries:( (if v = Lisql.v_count
+		            then []
+		            else `C ("hidden", hidden,
+				     (fun b -> answers#set_hidden v b; self#refresh_contents)) ::
+		                  ( match state with
+				  | `Dimensions ->
+				      `S ::
+				      `R [ ("page rank", order = `PAGE_RANK,
+					    (function true -> answers#set_order v `PAGE_RANK; self#refresh_contents | _ -> ()));
+					   ("increasing", order = `DESC,
+					    (function true -> answers#set_order v `DESC; self#refresh_contents | _ -> ()));
+					   ("decreasing", order = `ASC,
+					    (function true -> answers#set_order v `ASC; self#refresh_contents | _ -> ())) ] ::
+				      [`S]
+				  | `Measures ->
+				      `S ::
+				      `R [ ("distinct count", aggreg = `DISTINCT_COUNT,
+					    (function true -> answers#set_aggreg v `DISTINCT_COUNT; self#refresh_contents | _ -> ()));
+					   ("distribution", aggreg = `INDEX,
+					    (function true -> answers#set_aggreg v `INDEX; self#refresh_contents | _ -> ()));
+					   ("sum", aggreg = `SUM,
+					    (function true -> answers#set_aggreg v `SUM; self#refresh_contents | _ -> ()));
+					   ("average", aggreg = `AVG,
+					    (function true -> answers#set_aggreg v `AVG; self#refresh_contents | _ -> ())) ] ::
+				      [`S])) @
+			   (if col <> 0
+			   then [`I ("Move left", (fun () -> answers#move_column_left v; self#refresh_aux))]
+			   else []) @
+			   (if col <> List.length answers#columns - 1
+			   then [`I ("Move right", (fun () -> answers#move_column_right v; self#refresh_aux))]
+			   else []));
 	      self#add_persistent_widget menubar#coerce;
 	      table#attach ~left:col ~top:0 ~fill:`X menubar#coerce;
-	      col+1)
-	    ~init:0
+	      (if v = Lisql.v_count then `Measures else state), col+1)
+	    ~init:(`Dimensions,0)
 	    lv in
 	let _ =
 	  List.fold_left
-	    ~f:(fun col v ->
-	      let entry = GEdit.entry ~text:(answers#get_pattern v) () in
-	      entry#connect#changed ~callback:(fun () -> answers#set_pattern v entry#text; self#refresh_contents);
-	      self#add_persistent_widget entry#coerce;
-	      table#attach ~left:col ~top:1 ~fill:`X entry#coerce;
-	      col+1)
-	    ~init:0
+	    ~f:(fun (is_dim,col) v ->
+	      let is_dim = if v = Lisql.v_count then false else is_dim in
+	      if is_dim then begin
+		let entry = GEdit.entry ~text:(answers#get_pattern v) ~width_chars:8 () in
+		entry#connect#changed ~callback:(fun () -> answers#set_pattern v entry#text; self#refresh_contents);
+		self#add_persistent_widget entry#coerce;
+		table#attach ~left:col ~top:1 ~fill:`X entry#coerce
+	      end;
+	      (is_dim, col+1))
+	    ~init:(true,0)
 	    lv in
 	self#refresh_contents end
       else begin
       end
 
     method refresh_contents =
-      entry_start#set_text (string_of_int answers#page_start);
-      entry_end#set_text (string_of_int answers#page_end);
       List.iter ~f:(fun w -> if not (self#mem_persistent_widget w) then table#remove w) table#children;
       let _ =
 	answers#fold
 	  (fun row line ->
 	    List.fold_left
 	      ~f:(fun col name_opt ->
-		( match name_opt with
-		| None -> ()
-		| Some name ->
-		    let label_cell = GMisc.label ~text:(preview_of_name ~obs:Tarpit.blind_observer name) ~xalign:0. () in
-		    table#attach ~left:col ~top:row ~fill:`X label_cell#coerce);
+		let text =
+		  match name_opt with
+		  | None -> "-"
+		  | Some name -> preview_of_name ~obs:Tarpit.blind_observer name in
+		let label_cell = GMisc.label ~text ~xalign:0. ~yalign:0. () in
+		table#attach ~left:col ~top:row ~fill:`BOTH label_cell#coerce;
 		col+1)
 	      ~init:0
 	      line;
 	    row+1)
 	  nb_headers in
+      label_nb_answers#set_text (Printf.sprintf "%d answers" answers#count);
+      entry_start#set_text (string_of_int answers#page_start);
+      entry_end#set_text (string_of_int answers#page_end);
       ()
 
     initializer
 *)
 
 module Name = Name.Make
+module Intmap = Intmap.M
 module Ext = Intset.Intmap
 module Rel = Intreln.Intmap
 module Extension = Extension.Make
 
 (* answers *)
 
-
-type order = [ `DESC | `ASC | `PAGE_RANK ]
+type order = [ `PAGE_RANK | `DESC | `ASC ]
 type filter = [ `NONE |`REGEXPS of Str.regexp list ]
-type column = [ `HIDDEN | `SHOWN of filter * order ]
+type aggreg = [ `DISTINCT_COUNT | `INDEX | `SUM | `AVG ]
+type column = [ `HIDDEN | `COUNT | `DIMENSION of filter * order | `MEASURE of aggreg (* list *) ]
 type cube = column list
 
+let v_count = "#"
+  
 let regexp_match re s =
   try ignore (Str.search_forward re s 0); true with _ -> false
 
+let apply_filter (filter : filter) (n_opt : Name.t option) : bool =
+  match n_opt, filter with
+  | _, `NONE -> true
+  | Some (Rdf.URI uri), `REGEXPS lre -> List.for_all (fun re -> regexp_match re uri) lre
+  | Some (Rdf.Literal (str,_)), `REGEXPS lre -> List.for_all (fun re -> regexp_match re str) lre
+  | _ -> false
+
+let compare_order store order : (Name.t option * Extension.oid) -> (Name.t option * Extension.oid) -> int =
+  fun (n1_opt,_) (n2_opt,_) ->
+    match n1_opt, n2_opt with
+    | Some n1, Some n2 ->
+	( match order with
+	| `DESC -> Pervasives.compare n1 n2 (* should use some Name.compare *)
+	| `ASC -> Pervasives.compare n2 n1
+	| `PAGE_RANK -> Pervasives.compare (store#get_pagerank n2) (store#get_pagerank n1))
+    | Some _, None -> -1
+    | None, Some _ -> 1
+    | None, None -> 0
+
+let index col rel : int ref Intmap.t =
+  Rel.fold
+    (fun res l ->
+      let o = List.nth l col in
+      try
+	incr (Intmap.get o res);
+	res
+      with Not_found ->
+	Intmap.set o (ref 1) res)
+    Intmap.empty rel
+
+let sum_count store index : float * int =
+  Intmap.fold
+    (fun (sum,n) o counter ->
+      let c = !counter in
+      if o = 0 then (sum,n)
+      else begin
+	match store#get_name o with
+	| Rdf.Literal (s, dt) ->
+	    (try (sum +. (float_of_int c *. float_of_string s), n + c)
+	    with _ -> (sum,n))
+	| _ -> (sum,n)
+      end)
+    (0., 0) index
+
+let make_aggreg store g index : Name.t option =
+  match g with
+  | `INDEX ->
+      let l_index = Intmap.fold (fun res o counter -> (!counter,o)::res) [] index in
+      let sorted_l_index = List.sort (fun (c1,_) (c2,_) -> Pervasives.compare c2 c1) l_index in
+        (* decreasing count *)
+      ( match sorted_l_index with
+      | [] -> None
+      | item::items ->
+	  let buf = Buffer.create 500 in
+	  let add_item (c,o) =
+	    Buffer.add_string buf
+	      (if o = 0
+	      then Printf.sprintf "- (%d)" c
+	      else Printf.sprintf "%s (%d)"
+		  (Pretty.print_to_string
+		     (store#preview_name ~obs:Tarpit.blind_observer)
+		     (store#get_name o))
+		  c) in
+	  add_item item;
+	  List.iter (fun item -> Buffer.add_char buf '\n'; add_item item) items;
+	  Some (Name.typed_literal (Buffer.contents buf) Xsd.uri_string))
+  | `DISTINCT_COUNT ->
+      Some (Name.typed_literal (string_of_int (Intmap.cardinal index)) Xsd.uri_integer)
+  | `SUM ->
+      let sum, count = sum_count store index in
+      if count = 0
+      then None
+      else Some (Name.typed_literal (string_of_float sum) Xsd.uri_double)
+  | `AVG ->
+      let sum, count = sum_count store index in
+      if count = 0
+      then None
+      else
+	let avg = sum /. (float_of_int count) in
+	Some (Name.typed_literal (string_of_float avg) Xsd.uri_double)
+  | _ -> None
+
 let rec fold_answers store (offset : int ref) (limit : int ref) (cube : cube)
     (f : 'a -> Name.t option list -> 'a) (init : 'a) (rev_line : Name.t option list) rel : 'a =
   match cube with
       else
 	begin decr limit; f init (List.rev rev_line) end
   | `HIDDEN::sub_cube ->
-      let union_sub_rel =
-	Rel.fold_assoc
-	  (fun res _ sub_rel -> Rel.union res sub_rel)
-	  (Rel.empty (List.length sub_cube))
-	  rel in
-      fold_answers store offset limit sub_cube f init (None::rev_line) union_sub_rel
-  | `SHOWN (filter,order)::sub_cube ->
+      fold_answers store offset limit sub_cube f init (None::rev_line) rel
+  | `MEASURE g::sub_cube -> assert false
+  | `COUNT::sub_cube ->
+      if !offset > 0 then
+	begin decr offset; init end
+      else if !limit = 0 then
+	init
+      else begin
+	decr limit;
+	let n_count = Name.typed_literal (string_of_int (Rel.cardinal rel)) Xsd.uri_integer in
+	let line = fold_measures store sub_cube ((Some n_count) :: rev_line) 0 rel in
+	f init line
+      end
+  | `DIMENSION (filter,order)::sub_cube ->
       let key_names =
 	Ext.fold
 	  (fun res oid ->
 	    let n_opt = if oid = 0 then None else Some (store#get_name oid) in
-	    let ok =
-	      match n_opt, filter with
-	      | _, `NONE -> true
-	      | Some (Rdf.URI uri), `REGEXPS lre -> List.for_all (fun re -> regexp_match re uri) lre
-	      | Some (Rdf.Literal (str,_)), `REGEXPS lre -> List.for_all (fun re -> regexp_match re str) lre
-	      | _ -> false in
-	    if ok
+	    if apply_filter filter n_opt
 	    then (n_opt, oid)::res
 	    else res)
 	  [] (Rel.keys rel) in
-      let sorted_key_names = List.sort
-	  (fun (n1_opt,_) (n2_opt,_) ->
-	    match n1_opt, n2_opt with
-	    | Some n1, Some n2 ->
-		( match order with
-		| `DESC -> Pervasives.compare n1 n2 (* should use some Name.compare *)
-		| `ASC -> Pervasives.compare n2 n1
-		| `PAGE_RANK -> Pervasives.compare (store#get_pagerank n2) (store#get_pagerank n1))
-	    | Some _, None -> -1
-	    | None, Some _ -> 1
-	    | None, None -> 0)
-	  key_names in
+      let sorted_key_names = List.sort (compare_order store order) key_names in
       List.fold_left
 	(fun res (n_opt,o) ->
 	  let sub_rel = Rel.assoc o rel in
 	  fold_answers store offset limit sub_cube f res (n_opt::rev_line) sub_rel)
 	init sorted_key_names
+and fold_measures store cube rev_line col rel =
+  match cube with
+  | [] -> List.rev rev_line
+  | `HIDDEN::sub_cube ->
+      fold_measures store sub_cube (None::rev_line) col rel
+  | `MEASURE g::sub_cube ->
+      let index = index col rel in
+      let n_aggreg_opt = make_aggreg store g index in
+      fold_measures store sub_cube (n_aggreg_opt::rev_line) (col+1) rel
+  | _ -> assert false
 
-class answers store place (lv,ext : var list * Extension.t) =
+
+class answers store place (lv, ext : var list * Extension.t) =
   let rel = ext#relation (store :> Extension.store) lv in
   object (self)
     val mutable lines : Rel.t = rel
 
-    val count : int = Rel.cardinal rel
+    val mutable count : int = Rel.cardinal rel
     method count = count
 
-    val mutable columns : Extension.var list = lv
+    val mutable columns : Extension.var list = lv @ [v_count] (* v_count represents COUNT *)
     method columns = columns
     method move_column_left v =
       let state, mask, lv' =
 	List.fold_right
 	  (fun w (state,mask,lv') ->
 	    if state = 1 then
-	      if v = w
-	      then 2, true::mask, lv'
-	      else 1, false::mask, w::lv'
-	    else if state = 2 then 3, false::mask, v::w::lv'
-	    else if state = 3 then 3, true::mask, w::lv'
+	      if v = w then
+		if w = v_count then 22, mask, lv'
+		else 2, true::mask, lv'
+	      else
+		if w = v_count then 1, mask, w::lv'
+		else 1, false::mask, w::lv'
+	    else if state = 2 then
+	      if w = v_count then 33, mask, v::w::lv'
+	      else 3, false::mask, v::w::lv'
+	    else if state = 22 then 33, false::mask, v::w::lv'
+	    else if state = 3 then
+	      if w = v_count then 3, mask, w::lv'
+	      else 3, true::mask, w::lv'
+	    else if state = 33 then 33, true::mask, w::lv'
 	    else assert false)
 	  columns (1,[],[]) in
-      if state = 3 then begin (* otherwise, v missing or first column *)
+      if state = 3 then begin (* otherwise, v missing or first column or only # moved *)
+	columns <- lv';
+	lines <- Rel.group_by mask lines end
+      else if state = 33 then begin
+	columns <- lv'
+      end
+    method move_column_right v =
+      let state, mask, lv' = 
+	List.fold_right
+	  (fun w (state,mask,lv') ->
+	    if state = `Last then
+	      if w = v then
+		if w = v_count then `Nop, mask, w::lv'
+		else `Nop, false::mask, w::lv'
+	      else
+		if w = v_count then `Search, mask, w::lv'
+		else `Search, false::mask, w::lv'
+	    else if state = `Search then
+	      if w = v then
+		if w = v_count then `Nop, mask, (List.hd lv')::w::(List.tl lv')
+		else if List.hd lv' = v_count then `Nop, true::mask, (List.hd lv')::w::(List.tl lv')
+		else `Found, false::true::(List.tl mask), (List.hd lv')::w::(List.tl lv')
+	      else
+		if w = v_count then `Search, mask, w::lv'
+		else `Search, false::mask, w::lv'
+	    else if state = `Found then
+	      if w = v_count then `Found, mask, w::lv'
+	      else `Found, true::mask, w::lv'
+	    else if state = `Nop then
+	      if w = v_count then `Nop, mask, w::lv'
+	      else `Nop, true::mask, w::lv'
+	    else assert false)
+	  columns (`Last,[],[]) in
+      if state = `Found then begin (* otherwise, v missing or first column or only # moved *)
 	columns <- lv';
-	lines <- Rel.group_by mask lines
+	lines <- Rel.group_by mask lines end
+      else if state = `Nop then begin
+	columns <- lv'
       end
 
     val mutable page_start = 1
     val mutable page_size = 10
     method page_start = page_start
-    method page_end = min (page_start + page_size - 1) count
+    method page_end = min (page_start + page_size - 1) self#count
     method page_size = page_size
-    method page_pos pos = min (max 1 pos) count
+    method page_pos pos = min (max 1 pos) self#count
 
     method page_down : bool =
-      if self#page_end < count
+      if self#page_end < self#count
       then begin page_start <- page_start + page_size; true end
       else false
 
       else false
 
     method page_bottom : bool =
-      let pos = ((count - 1) / page_size) * page_size + 1 in
+      let pos = ((self#count - 1) / page_size) * page_size + 1 in
       if pos <> page_start
       then begin page_start <- pos; true end
       else false
       | "" -> `NONE
       | s -> `REGEXPS (List.map Str.regexp_string_case_fold (Str.split (Str.regexp "[ '\",.;]+") s))
 
+    val h_aggreg : (var, aggreg) Hashtbl.t = Hashtbl.create 11
+    method get_aggreg v : aggreg = try Hashtbl.find h_aggreg v with _ -> `DISTINCT_COUNT
+    method set_aggreg v (g : aggreg) =
+      match g with
+      | `DISTINCT_COUNT -> Hashtbl.remove h_aggreg v
+      | _ -> Hashtbl.replace h_aggreg v g
 
     method fold : 'a. ('a -> Name.t option list -> 'a) -> 'a -> 'a =
       fun f init ->
 	let offset = page_start - 1 in
 	let limit = page_size in
-	let cube =
-	  List.map
-	    (fun v ->
-	      if self#get_hidden v
-	      then `HIDDEN
-	      else `SHOWN (self#get_filter v, self#get_order v))
-	    columns in
-	fold_answers store (ref offset) (ref limit) cube f init [] lines
+	let rel = Rel.project
+	    (List.fold_right
+	       (fun v mask ->
+		 if v = v_count then mask
+		 else not (self#get_hidden v) :: mask)
+	       columns [])
+	    lines in
+	let _, mask_dims =
+	  List.fold_right
+	    (fun v (state,mask) ->
+	      if v = v_count then `Dimensions, mask
+	      else if state = `Measures then state, false::mask
+	      else if state = `Dimensions then state, (not (self#get_hidden v))::mask
+	      else assert false)
+	    columns (`Measures,[]) in
+	let rel_dims = Rel.project mask_dims lines in
+	count <- Rel.cardinal rel_dims;
+	let _, cube =
+	  List.fold_right
+	    (fun v (state,cube) ->
+	      if v = v_count then `Dimensions, `COUNT :: cube
+	      else if self#get_hidden v then state, `HIDDEN :: cube
+	      else 
+		match state  with
+		| `Measures -> state, `MEASURE (self#get_aggreg v) :: cube
+		| `Dimensions -> state, `DIMENSION (self#get_filter v, self#get_order v) :: cube)
+	    columns (`Measures,[]) in
+	fold_answers store (ref offset) (ref limit) cube f init [] rel
 
   end
 
       | Rdf.Literal (s, Rdf.Typed t) :: _ when t = Namespace.uri_Assertion ->
 	  let a = Syntax.assertion_of_string s in
 	  focus_of_s a
-      | _ -> focus_top
+      | _ -> focus_what
 
 
     val h_ext_views : (p1, Ext.t Tarpit.view) Hashtbl.t = Hashtbl.create 13