Commits

Anonymous committed 3754e9c

balancing, n-run-guarantee-ing partition claim function

Comments (0)

Files changed (1)

src/riak_claim.erl

 
 -module(riak_claim).
 -export([default_wants_claim/1, default_choose_claim/1,
-         never_wants_claim/1]).
+         never_wants_claim/1, random_choose_claim/1]).
 
 -include_lib("eunit/include/eunit.hrl").
 
 %% @spec default_choose_claim(riak_ring()) -> riak_ring()
 %% @doc Choose a partition at random.
 default_choose_claim(Ring) ->
+    TargetN = riak:get_app_env(target_n_val, 3),
+    case meets_target_n(Ring, TargetN) of
+        {true, TailViolations} ->
+            %% if target N is met, then it doesn't matter where
+            %% we claim vnodes, as long as we don't violate the
+            %% target N with any of our additions
+            %% (== claim partitions at least N steps apart)
+            claim_with_n_met(Ring, TailViolations);
+        false ->
+            %% we don't meet target N yet, rebalance
+            claim_rebalance_n(Ring)
+    end.
+
+meets_target_n(Ring, TargetN) ->
+    Owners = lists:keysort(1, riak_ring:all_owners(Ring)),
+    meets_target_n(Owners, TargetN, 0, [], []).
+meets_target_n([{Part,Node}|Rest], TargetN, Index, First, Last) ->
+    case lists:keytake(Node, 1, Last) of
+        {value, {Node, LastIndex, _}, NewLast} ->
+            if Index-LastIndex >= TargetN ->
+                    %% node repeat respects TargetN
+                    meets_target_n(Rest, TargetN, Index+1, First,
+                                   [{Node, Index, Part}|NewLast]);
+               true ->
+                    %% violation of TargetN
+                    false
+            end;
+        false ->
+            %% haven't seen this node yet
+            meets_target_n(Rest, TargetN, Index+1,
+                           [{Node, Index}|First], [{Node, Index, Part}|Last])
+    end;
+meets_target_n([], TargetN, Index, First, Last) ->
+    %% start through end guarantees TargetN
+    %% compute violations at wrap around, but don't fail
+    %% because of them: handle during reclaim
+    Violations = 
+        lists:filter(fun({Node, L, _}) ->
+                             {Node, F} = proplists:lookup(Node, First),
+                             (Index-L)+F < TargetN
+                     end,
+                     Last),
+    {true, [ Part || {_, _, Part} <- Violations ]}.
+
+claim_with_n_met(Ring, TailViolations) ->
+    CurrentOwners = lists:keysort(1, riak_ring:all_owners(Ring)),
+    Node = node(),
+    Nodes = lists:usort([Node|riak_ring:all_members(Ring)]),
+    case lists:sort([ I || {I, N} <- CurrentOwners, N == Node ]) of
+        [] ->
+            %% node hasn't claimed anything yet - just claim stuff
+            Spacing = length(Nodes),
+            [{First,_}|OwnList] =
+                case TailViolations of
+                    [] ->
+                        %% no wrap-around problems - choose whatever
+                        lists:nthtail(Spacing-1, CurrentOwners);
+                    [TV|_] ->
+                        %% attempt to cure a wrap-around problem
+                        tl(lists:dropwhile(
+                             fun({I, _}) -> I /= TV end,
+                             lists:reverse(CurrentOwners)))
+                end,
+            {_, NewRing} = lists:foldl(
+                             fun({I, _}, {0, Acc}) ->
+                                     {Spacing, riak_ring:transfer_node(I, Node, Acc)};
+                                (_, {S, Acc}) ->
+                                     {S-1, Acc}
+                             end,
+                             {Spacing, riak_ring:transfer_node(First, Node, Ring)},
+                             OwnList),
+            NewRing;
+        Mine ->
+            %% node already has claims - respect them
+            %% pick biggest hole & sit in the middle
+            %% rebalance will cure any mistake on the next pass
+            claim_hole(Ring, Mine, CurrentOwners)
+    end.
+
+claim_hole(Ring, Mine, Owners) ->
+    Choices = case find_biggest_hole(Mine) of
+                  {I0, I1} when I0 < I1 ->
+                      %% start-middle of the ring
+                      lists:takewhile(
+                        fun({I, _}) -> I /= I1 end,
+                        tl(lists:dropwhile(
+                             fun({I, _}) -> I /= I0 end,
+                             Owners)));
+                  {I0, I1} when I0 > I1 ->
+                      %% wrap-around end-start of the ring
+                      tl(lists:dropwhile(
+                           fun({I, _}) -> I /= I0 end, Owners))
+                          ++lists:takewhile(
+                              fun({I, _}) -> I /= I1 end, Owners);
+                  {I0, I0} ->
+                      %% node only has one claim
+                      {Start, End} = 
+                          lists:splitwith(
+                            fun({I, _}) -> I /= I0 end,
+                            Owners),
+                      tl(End)++Start
+              end,
+    Half = length(Choices) div 2,
+    {I, _} = lists:nth(Half, Choices),
+    riak_ring:transfer_node(I, node(), Ring).
+
+find_biggest_hole(Mine) ->
+    lists:foldl(fun({I0, I1}, none) ->
+                        {I0, I1};
+                   ({I0, I1}, {C0, C1}) when I0 < I1->
+                        %% start-middle of the ring
+                        if I1-I0 > C1-C0 ->
+                                {I0, I1};
+                           true ->
+                                {C0, C1}
+                        end;
+                   ({I0, I1}, {C0, C1}) ->
+                        %% wrap-around end-start of the ring
+                        Span = I0+trunc(math:pow(2, 160))-1,
+                        if Span > C1-C0 ->
+                                {I0, I1};
+                           true ->
+                                {C0, C1}
+                        end
+                end,
+                none,
+                lists:zip(Mine, tl(Mine)++[hd(Mine)])).
+
+claim_rebalance_n(Ring) ->
+    %% diagonal stripes guarantee most disperse data
+    Nodes = lists:usort([node()|riak_ring:all_members(Ring)]),
+    Partitions = lists:sort([ I || {I, _} <- riak_ring:all_owners(Ring) ]),
+    Zipped = lists:zip(Partitions,
+                       lists:sublist(
+                         lists:flatten(
+                           lists:duplicate(
+                             1+(length(Partitions) div length(Nodes)),
+                             Nodes)),
+                         1, length(Partitions))),
+    lists:foldl(fun({P, N}, Acc) ->
+                        riak_ring:transfer_node(P, N, Acc)
+                end,
+                Ring,
+                Zipped).
+
+random_choose_claim(Ring) ->
     riak_ring:transfer_node(riak_ring:random_other_index(Ring),
-                              node(), Ring).
+                            node(), Ring).
 
 %% @spec never_wants_claim(riak_ring()) -> no
 %% @doc For use by nodes that should not claim any partitions.