PReach / diskfilter.erl

-module(diskfilter).
-export([open/2, keymerge/3, nkeymerge/5]).

-record(q,
 	{ name,      % filename
	  max,       % cache size
	  fd,        
 	  bl = [],   % block list
 	  il = [],   % index list
 	  ql = []   % quick list
	 } ).

open(Name, BlockSize) ->
    {ok, FD} = file:open(Name, [raw, binary, read, write, read_ahead, delayed_write]),
    #q{name = Name, max = BlockSize, fd = FD}.

find_block(BL) ->
    if BL == [] ->
	    0;
       true ->
	    {Last, LL} = lists:last(BL),
	    Last + LL
    end.

keymerge(Q=#q{bl=BL, il=_, fd=FD, max=BlockSize, name=Name, ql=_}, L, NewFun) ->
    {ok, FD2} = file:open(Name ++ ".tmp", [raw, binary, read, write, read_ahead, delayed_write]),
    {Added, NewBL, NewIL} = keymerge(BL, L, [], NewFun, [], [], [], FD, FD2, BlockSize),
    file:close(FD),
    file:delete(Name),
    file:close(FD2),
    file:rename(Name ++ ".tmp", Name),
    {ok, FD3} = file:open(Name, [raw, binary, read, write, read_ahead, delayed_write]),    
    {Q#q{fd=FD3, bl=NewBL, il=NewIL}, Added}.


keymerge([], [], [], _F, Added, NewBL, NewIL, _FD, _FD2, _CacheSize) ->
    {Added, NewBL, NewIL};

keymerge([{Begin, Length}|BL], L, L2, F, Added, NewBL, NewIL, FD, FD2, CacheSize)
  when length(L2) < CacheSize ->
    {ok, Bin} = file:pread(FD, Begin, Length),
    L22 = L2 ++ binary_to_term(Bin),
    keymerge(BL, L, L22, F, Added, NewBL, NewIL, FD, FD2, CacheSize);

keymerge(BL, L, L2, F, Added, NewBL, NewIL, FD, FD2, CacheSize) ->
    {NewL2, NewL, NewBlock, LastEl, L2Added} = nkeymerge(CacheSize, L2, L, F, fun id/1),
    NewBin = term_to_binary(NewBlock),
    NewLen = size(NewBin),
    NewBegin = find_block(NewBL),
    ok = file:pwrite(FD2, NewBegin, NewBin),
    keymerge(BL, NewL, NewL2, F, L2Added ++ Added, NewBL ++ [{NewBegin, NewLen}],
	     NewIL ++ [{hd(NewBlock), LastEl}], FD, FD2, CacheSize).

id(X) -> X.

%% merge n keys from x and y
%% return the remainder of the two lists along with the merged list
%% F is applied to items in Y before they are added to the list
%% we accumulate items added to the merged list from Y in A

nkeymerge(N, X, Y, F, F2) ->
    nkeymerge(N, X, Y, [], [], F, F2).


nkeymerge(0, X, Y, L, Added, _F, _) -> {X, Y, lists:reverse(L), case L of [H|_T] -> H; [] -> [] end, Added};

nkeymerge(_N, [], [], L, Added, _F, _) -> {[], [], lists:reverse(L), case L of [H|_T] -> H; [] -> [] end, Added};

nkeymerge(N, [], [Y1 | Y], L, Added, F, F2) ->
    nkeymerge(N-1, [], Y, [F(Y1) | L], [Y1 | Added], F, F2);

nkeymerge(N, [A | X], [], L, Added, F, F2) ->
    nkeymerge(N-1, X, [], [F2(A) | L], Added, F, F2);

nkeymerge(N, [A | X], [Y1 | Y], L, Added, F, F2) ->
    KY = F(Y1),
    KA = F2(A),
    if KA < KY ->
	    nkeymerge(N-1, X, [Y1 | Y], [F2(A) | L], Added, F, F2);
       KA > KY ->
	    nkeymerge(N-1, [F2(A) | X], Y, [(F(Y1)) | L], [Y1 | Added], F, F2);
       true ->
	    nkeymerge(N-1, X, Y, [F2(A) | L], Added, F, F2)
    end.
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.