neilconway / overlog-paxos
A clean implementation of the Paxos consensus protocol in Overlog, a language for distributed computing.
Clone this repository (size: 4.5 MB): HTTPS / SSH
$ hg clone http://bitbucket.org/neilconway/overlog-paxos/
| commit 29: | 7ba2c54468af |
| parent 28: | 8e5f83fc7ce8 |
| branch: | default |
| tags: | tip |
comments for source and sink
4 months ago
| r29:7ba2c54468af | 188 loc | 5.3 KB | embed / history / annotate / raw / |
|---|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 | program paxos_prepare;
import java.lang.System;
// inserted into from the calling program.
define(start, keys(0), {String});
/**** local_aru block ******/
/*
ARU, or "all received upto", holds the value of the next globally ordered
update we are ready to propose or write. It should be initialized from
stable storage: below, we initialize it to zero because we have none.
When we successfully update our global history, we may increment the ARU.
we do this via an aggregation to force a stratification boundary.
*/
define(local_aru, keys(0), {
String, // host
Integer // local ARU value
});
public
local_aru(Me, 0) :-
paxos::self(Me),
start(),
notin global_history(Me, _, _, _);
/**************************/
/*
* When a node wins leader election, multicast a prepare message to
* the Paxos group containing the new view number and our local ARU.
* The common wisdom says that the prepare phase of Paxos enforces
* all the safety guarantees: an unreliable leader can at worst impact
* liveness. This is not true in this implementation, because the
* leader election module produces our View number. The following
* safety constraints must hold over leader::leader(_, _, View):
* 1. it must be unique to this host.
* 2. it must be monotonically increasing.
*/
define(prepare, {
String, // Agent
String, // Master
Integer, // View Number
Integer // ARU
});
public
prepare(@Him, Me, View, Aru) :-
leader::leader(@Me, Leader, View),
paxos::parliament(@Me, Him),
local_aru(@Me, Aru),
Leader == Me;
/**** datalist block *********************/
/*
* When it receives a prepare message, an agent responds with a
* prepare_ok message. This must contain:
* 1. Any globally ordered updates with SeqNo > the ARU in the prepare message
* 2. Any proposed values the agent has accepted, for previous views,
* for which the same inequality holds.
*
* We eschew nested types, and combine the relevant attributes from the prepare
* message with both lists above.
*/
define(datalist, keys(0, 1, 2), {
String, // Agent
String, // Master
Integer, // View Number
Integer, // ARU requested
Integer, // Sequence Number
String, // Update
String // Type (Bottom, Ordered, Proposed)
});
datalist(Agent, Master, View, Aru, SeqNo, Update, Type) :-
global_history(Agent, SeqNo, _, Update),
datalist(Agent, Master, View, Aru, -1, _, "Bottom"),
SeqNo >= Aru,
Type := "Ordered";
public
datalist(Agent, Master, View, Aru, SeqNo, Update, Type) :-
accept(Agent, M, OldView, SeqNo, Update),
datalist(Agent, Master, View, Aru, -1, _, "Bottom"),
SeqNo >= Aru,
Type := "Proposed";
// if SeqNo < Aru, we ignore the preoposal
// Rather than using explicit negation, we insert a dummy
// tuple into datalist to represent "nothing".
datalist(Agent, Master, View, Aru, -1, "none", "Bottom") :-
prepare(Agent, Master, View, Aru),
leader::last_installed(Agent, LastView),
LastView == View;
define(datalist_length, keys(0, 1), {
String, // Agent
Integer, // ARU
Integer // count of datalist values
});
datalist_length(Agent, Aru, count<SeqNo>) :-
datalist(Agent, _, _, Aru, SeqNo, _, _);
/*********************************************/
/******* prepare_oklist block ****************/
define(prepare_oklist, keys(0, 1, 2, 4, 5, 6), {
String, // Master
Integer, // View number
Integer, // Sequence number
String, // Update
String, // Type
Integer, // Datalist length
String // Agent
});
prepare_oklist(@Master, View, SeqNo, Update, Type, Len, Agent) :-
datalist_length(@Agent, Aru, Len),
datalist(@Agent, Master, View, Aru, SeqNo, Update, Type);
delete
prepare_oklist(Master, View, SeqNo, Update, Type, Len, Agent) :-
prepare_oklist(Master, View, SeqNo, Update, Type, Len, Agent),
global_history(Master, SeqNo, _, Update);
define(prepare_oklist_cnt, keys(0, 1, 2), {
String, // Master
Integer, // View number
String, // Agent
Integer, // Datalist length
Integer // Count
});
prepare_oklist_cnt(Master, View, Agent, Len, count<SeqNo>) :-
prepare_oklist(Master, View, SeqNo, _, Type, Len, Agent);
define(prepare_ok_cnt, keys(0, 1), {
String, // Master
Integer, // View number
Integer // Agent Count
});
// only count agents who have sent prepare_ok messages towards
// the quorum if we're received the whole set of messages from
// that agent.
prepare_ok_cnt(Master, View, count<Agent>) :-
prepare_oklist_cnt(Master, View, Agent, Cnt, Cnt2),
Cnt == Cnt2;
/********************************************/
define(quorum, keys(0, 1), {
String, // Master
Integer // View
});
quorum(Master, View) :-
paxos::priestCnt(Master, PCnt),
leader::leader(Master, Leader, View),
Master == Leader,
prepare_ok_cnt(Master, View, RCnt),
RCnt > (PCnt / 2);
define(accept, keys(0, 1), {
String, // Agent
String, // Master
Integer, // View number
Integer, // Sequence number
String // Update
});
define(global_history, keys(0, 1), {
String, // Master
Integer, // Sequence number
String, // Requestor
String // Update
});
global_history(Agent, SeqNo, Requestor, Update) :-
prepare_oklist(Agent, View, SeqNo, Update, "Ordered", _, Requestor),
notin global_history(Agent, SeqNo);
|
