Source

ocaml-lib / persindex.ml

Full commit
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417

module Serialize =
  struct
    class type ['a] t =
      object
	method to_string : 'a -> string
	method from_string : string -> 'a
      end

    let marshal =
      object
	method to_string x = Marshal.to_string x []
	method from_string s = Marshal.from_string s 0
      end

    let unit : unit t =
      object
	method to_string () = ""
	method from_string s = ()
      end

    let int : int t =
      object
	method to_string = string_of_int
	method from_string = int_of_string
      end

    let string : string t =
      object
	method to_string s = s
	method from_string s = s
      end

    let index (create : unit -> ('a,'b) Index.index) : ('a,'b) Index.index t =
      object
	method to_string idx = idx # to_string
	method from_string s =
	  let idx = create () in
	  idx # from_string s;
	  idx
      end

(*
    let obj =
      object
	method to_string x = (x # to_string : string)
	method from_string x (s : string) = x # from_string s; x
      end
*)
  end


(** Prototype of databases. *)
class database =
  object (self)
    method locate : string -> string -> unit =
      fun table field -> ()
	  (** [locate t f] makes sure the database contains a table [t] with field [f]. *)
    method get : string -> string -> string -> string =
      fun table field key -> raise Not_found
	  (** [get t f k] returns the field [f] associated to key [k] in table [t]. Raises [Not_found] if there is no such value. *)
    method set : string -> string -> string -> string -> unit =
      fun table field key data -> ()
	  (** [set t f k d] sets the field [f] associated to key [k] in table [t] as data [d]. *)
    method reset : string -> string -> string -> unit =
      fun table field key -> ()
	  (** [reset t f k] removes the field [f] associated to key [k] in table [t]. *)
    method close : unit = ()
	(** [close] closes the database. *)
  end

(** Implementation of a database with GDBM. *)
class dbm (file : string) =
  let string_of_key (t,f,k) = String.concat "-" [t; f; k] in
  object (self)
    inherit database
	
    val db = Dbm.opendbm file [Dbm.Dbm_create; Dbm.Dbm_rdwr] 0o664

    method get t f k = Dbm.find db (string_of_key (t,f,k))
	
    method set t f k v = Dbm.replace db (string_of_key (t,f,k)) v
	
    method reset t f k = try Dbm.remove db (string_of_key (t,f,k)) with _ -> ()
	
    method close = Dbm.close db
  end

(** Persistent version of an index (in memory) w.r.t. some database.
    The index then plays the role of a cache for the database. *)
class ['a,'b] index ?(keys : 'a Serialize.t = Serialize.marshal) ?(vals : 'b Serialize.t = Serialize.marshal) (idx : ('a,'b) Index.index) (init : 'a -> 'b) (db : database) =
  object (self)
    val mutable table = ""
    val mutable field = ""
    val mutable modified : 'a Bintree.t = Bintree.empty

    method locate : string -> string -> string -> unit =
      (** [locate d s n] locates this index as the field of a table of the database.
	 - [d] specifies the domain of keys, hence the table
	 - [n] specifies the name of the index, under some scope [s], which compose the field *)
      fun domain scope name ->
	table <- domain;
	field <- String.concat ":" [scope; name];
	db # locate table field

    method sync : unit =
      (** [sync] forces the synchronization of the database w.r.t. the cache. *)
      assert (not (List.mem "" [table; field]));
      Bintree.iter
	(fun k ->
	  try
	    let v = idx # get k in
	    db # set table field (keys # to_string k) (vals # to_string v)
	  with Not_found ->
	    db # reset table field (keys # to_string k))
	modified;
      modified <- Bintree.empty

    method unload : int -> unit =
      (** [unload p] randomly unloads p% of index entries from memory into the database. *)
      fun p ->
	idx # iter
	  (fun k v ->
	    if Random.int 100 < p then begin
	      db # set table field (keys # to_string k) (vals # to_string v);
	      idx # reset k;
	      modified <- Bintree.remove k modified
	    end)

    method get : 'a -> 'b =
      (** [get k] retrieves the value associated to the key [k].
	 Firstly in the cache, then in the database.
	 In the latter case, the value is added into the cache. *)
      fun k ->
	try idx # get k
	with Not_found ->
	  let v =
	    try vals # from_string (db # get table field (keys # to_string k))
	    with Not_found -> init k in
	  idx # set k v;
	  modified <- Bintree.add k modified;
	  v

    method set : 'a -> 'b -> unit =
      (** [set k v] associates the value [v] to the key [k].
	  Accesses to the database are delayed until [sync] or [unload] is called. *)
      fun k v ->
	idx # set k v;
	modified <- Bintree.add k modified

    method update : 'a -> ('b -> 'b) -> unit =
      (** [update k f] updates the current value of [k] by applying the function [f].
	  Assumes the current value already exists.
          Write accesses to the database are delayed. *)
      fun k f ->
	let v =
	  try idx # get k
	  with Not_found ->
	    try vals # from_string (db # get table field (keys # to_string k))
	    with Not_found -> init k in
	idx # set k (f v);
	modified <- Bintree.add k modified
(*
	let v' = f (self # get k) in
	self # set k v'
*)

    method reset : 'a -> unit =
      (** [reset k] removes the entry associated to the key [k], if it exists.
	  Accesses to the database are delayed. *)
      fun k ->
	idx # reset k;
	modified <- Bintree.add k modified
  end

let init_fail = fun _ -> raise Not_found

class ['a,'b] hashtbl ?keys ?vals (size : int) (init : 'a -> 'b) = ['a,'b] index ?keys ?vals (new Index.hashtbl size init_fail) init

class ['a] vector ?vals (size : int) (null : 'a) (init : int -> 'a) = [int,'a] index ~keys:Serialize.int ?vals (new Index.vector size null init_fail) init

class ['a] vector_opt ?vals (size : int) (init : int -> 'a) = [int,'a] index ~keys:Serialize.int ?vals (new Index.vector_opt size init_fail) init

class ['a] var ?vals (null : 'a) (init : unit -> 'a) = [unit,'a] index ~keys:Serialize.unit ?vals (new Index.var null init_fail) init

class ['a] var_opt ?vals (init : unit -> 'a) = [unit,'a] index ~keys:Serialize.unit ?vals (new Index.var_opt init_fail) init

class ['a] varray ?vals (size : int) (null : 'a) (init : int -> 'a) = [int,'a] index ~keys:Serialize.int ?vals (new Index.varray size null init_fail) init

class ['a] varray_opt ?vals (size : int) (init : int -> 'a) = [int,'a] index ~keys:Serialize.int ?vals (new Index.varray_opt size init_fail) init


class ['a,'b] index_atom (idx0 : ('a,'b) Index.index) (db : database) =
  let key = "" in
  object (self)
    val mutable table = ""
    val mutable field = ""
    val mutable idx : ('a,'b) Index.index = idx0

    method locate : string -> string -> string -> unit =
      fun domain scope name ->
	table <- scope;
	field <- name;
	try idx # from_string (db # get table field key)
	with Not_found -> db # locate table field

    method sync : unit =
      assert (not (List.mem "" [table; field]));
      db # set table field key (idx # to_string)

    method unload : int -> unit =
      fun p -> ()

    method get = idx # get

    method set = idx # set

    method update = idx # update

    method reset = idx # reset
  end

class ['a,'b] hashtbl_atom (size : int) (init : 'a -> 'b) = ['a,'b] index_atom (new Index.hashtbl size init)

class ['a] vector_atom (size : int) (null : 'a) (init : int -> 'a) = [int,'a] index_atom (new Index.vector size null init)

class ['a] vector_opt_atom (size : int) (init : int -> 'a) = [int,'a] index_atom (new Index.vector_opt size init)

class ['a] var_atom (null : 'a) (init : unit -> 'a) = [unit,'a] index_atom (new Index.var null init)

class ['a] var_opt_atom (init : unit -> 'a) = [unit,'a] index_atom (new Index.var_opt init)

class ['a] varray_atom (size : int) (null : 'a) (init : int -> 'a) = [int,'a] index_atom (new Index.varray size null init)

class ['a] varray_opt_atom (size : int) (init : int -> 'a) = [int,'a] index_atom (new Index.varray_opt size init)


class ['a,'a1,'a2,'b] index2
    ?(keys : 'a1 Serialize.t = Serialize.marshal)
    ?(vals : 'b Serialize.t = Serialize.marshal) (* for compatiblity with other classes *)
    (key12 : 'a -> 'a1 * 'a2)
    (idx : ('a1, ('a2,'b) Index.index) Index.index)
    (init : 'a1 -> ('a2,'b) Index.index)
    (db : database) : ['a,'b] index =
  object (self)
    val mutable table = ""
    val mutable field = ""
    val mutable modified : 'a1 Bintree.t = Bintree.empty

    method locate : string -> string -> string -> unit =
      (** [locate d s n] locates this index as the field of a table of the database.
	 - [d] specifies the domain of keys, hence the table
	 - [n] specifies the name of the index, under some scope [s], which compose the field *)
      fun domain scope name ->
	table <- domain;
	field <- String.concat ":" [scope; name];
	db # locate table field

    method sync : unit =
      (** [sync] forces the synchronization of the database w.r.t. the cache. *)
      assert (not (List.mem "" [table; field]));
      Bintree.iter
	(fun k1 ->
	  try
	    let idx2 = idx # get k1 in
	    db # set table field (keys # to_string k1) (idx2 # to_string)
	  with Not_found ->
	    db # reset table field (keys # to_string k1))
	modified;
      modified <- Bintree.empty

    method unload : int -> unit =
      (** [unload p] randomly unloads p% of index entries from memory into the database. *)
      fun p ->
	idx # iter
	  (fun k1 idx2 ->
	    if Random.int 100 < p then begin
	      db # set table field (keys # to_string k1) (idx2 # to_string);
	      idx # reset k1;
	      modified <- Bintree.remove k1 modified
	    end)

    method private get2 : 'a1 -> ('a2,'b) Index.index =
      fun k1 ->
	try idx # get k1
	with Not_found ->
	  let idx2 = init k1 in
	  (try idx2 # from_string (db # get table field (keys # to_string k1)) with Not_found -> ());
	  idx # set k1 idx2;
	  modified <- Bintree.add k1 modified;
	  idx2

    method get : 'a -> 'b =
      (** [get k] retrieves the value associated to the key [k].
	 Firstly in the cache, then in the database.
	 In the latter case, the value is added into the cache. *)
      fun k ->
	let k1, k2 = key12 k in
	(self # get2 k1) # get k2

    method set : 'a -> 'b -> unit =
      (** [set k v] associates the value [v] to the key [k].
	  Accesses to the database are delayed until [sync] or [unload] is called. *)
      fun k v ->
	let k1, k2 = key12 k in
	(self # get2 k1) # set k2 v;
	modified <- Bintree.add k1 modified

    method update : 'a -> ('b -> 'b) -> unit =
      (** [update k f] updates the current value of [k] by applying the function [f].
	  Assumes the current value already exists.
          Write accesses to the database are delayed. *)
      fun k f ->
	let k1, k2 = key12 k in
	(self # get2 k1) # update k2 f;
	modified <- Bintree.add k1 modified
(*
	let v' = f (self # get k) in
	self # set k v'
*)

    method reset : 'a -> unit =
      (** [reset k] removes the entry associated to the key [k], if it exists.
	  Accesses to the database are delayed. *)
      fun k ->
	let k1, k2 = key12 k in
	(self # get2 k1) # reset k2;
	modified <- Bintree.add k1 modified
  end

let chunk = 2048 (* seems the best value for size2 *)

class ['a] varray_vector ?vals (size1 : int) (size2 : int) (null : 'a) (init : int -> 'a) (db : database) =
  let create () = new Index.hashtbl size2 (fun (k1,k2) -> init (k1 * size2  +  k2)) in
  let init1 = fun k1 -> create () in
  [int,int,int,'a] index2
    ~keys:Serialize.int
    ?vals
    (fun k -> k / size2, k mod size2)
    (new Index.varray_opt size1 (fun _ -> raise Not_found))
    (fun k1 ->
      let k0 = k1 * size2 in
      new Index.vector size2 null (fun k2 -> init (k0 + k2)))
    db

class ['a] varray_vector_opt ?vals (size1 : int) (size2 : int) (init : int -> 'a) (db : database) =
  [int,int,int,'a] index2
    ~keys:Serialize.int
    ?vals
    (fun k -> k / size2, k mod size2)
    (new Index.varray_opt size1 (fun _ -> raise Not_found))
    (fun k1 ->
      let k0 = k1 * size2 in
      new Index.vector_opt size2 (fun k2 -> init (k0 + k2)))
    db

(* A REVOIR

class ['a,'a1,'a2,'b] index2
    (key12 : 'a -> 'a1 * 'a2)
    (pidx : ('a1, ('a1 * 'a2,'b) Index.index) index)
    (db : database) =
  object (self)

    method locate = pidx # locate

    method sync = pidx # sync

    method unload = pidx # unload

    method get : 'a -> 'b =
      fun k ->
	let k1, k2 = key12 k in
	(pidx # get k1) # get (k1,k2)

    method set : 'a -> 'b -> unit =
      fun k v ->
	let k1, k2 = key12 k in
	let v1 = pidx # get k1 in
	v1 # set (k1,k2) v;
	pidx # set k1 v1 (* to notify pidx of the change on v1 *)

    method update : 'a -> ('b -> 'b) -> unit =
      fun k f ->
	let v' = f (self # get k) in
	self # set k v'

    method reset : 'a -> unit =
      fun k ->
	let k1, k2 = key12 k in
	let v1 = pidx # get k1 in
	v1 # reset (k1,k2);
	pidx # set k1 v1 (* to notify pidx of the change on v1 *)
  end


let chunk = 2048 (* seems the best value for size2 *)

class ['a] hashtbl_hashtbl (size1 : int) (size2 : int) (null : 'a) (init : int -> 'a) (db : database) =
  let create () = new Index.hashtbl size2 (fun (k1,k2) -> init (k1 * size2  +  k2)) in
  let init1 = fun k1 -> create () in
  [int,int,int,'a] index2
    (fun k -> k / size2, k mod size2)
    (new hashtbl ~keys:Serialize.int ~vals:(Serialize.index create) (* obj *) size1 init1 db)
    db

class ['a] hashtbl_vector_opt (size1 : int) (size2 : int) (init : int -> 'a) (db : database) =
  let init1 =
    fun k1 ->
      let k0 = k1 * size2 in
      new Index.vector_opt size2 (fun k2 -> init (k0 + k2)) in
  [int,int,int,'a] index2
    (fun k -> k / size2, k mod size2)
    (new hashtbl ~keys:Serialize.int ~vals:Serialize.marshal (* obj *) size1 init1 db)
    db

 A REVOIR *)