neilconway / overlog-paxos
A clean implementation of the Paxos consensus protocol in Overlog, a language for distributed computing.
Clone this repository (size: 4.5 MB): HTTPS / SSH
$ hg clone http://bitbucket.org/neilconway/overlog-paxos/
| commit 29: | 7ba2c54468af |
| parent 28: | 8e5f83fc7ce8 |
| branch: | default |
| tags: | tip |
comments for source and sink
5 months ago
| # Introduced |
|---|
1 |
130c49fc932f |
program paxos_prepare; |
2 |
0a66742d2525 |
|
3 |
0a66742d2525 |
import java.lang.System; |
4 |
0a66742d2525 |
|
5 |
0a66742d2525 |
// inserted into from the calling program. |
6 |
0a66742d2525 |
define(start, keys(0), {String}); |
7 |
0a66742d2525 |
|
8 |
0a66742d2525 |
/**** local_aru block ******/ |
9 |
0a66742d2525 |
/* |
10 |
0a66742d2525 |
ARU, or "all received upto", holds the value of the next globally ordered |
11 |
0a66742d2525 |
update we are ready to propose or write. It should be initialized from |
12 |
0a66742d2525 |
stable storage: below, we initialize it to zero because we have none. |
13 |
0a66742d2525 |
|
14 |
0a66742d2525 |
When we successfully update our global history, we may increment the ARU. |
15 |
0a66742d2525 |
we do this via an aggregation to force a stratification boundary. |
16 |
0a66742d2525 |
*/ |
17 |
0a66742d2525 |
|
18 |
130c49fc932f |
define(local_aru, keys(0), { |
19 |
130c49fc932f |
String, // host |
20 |
130c49fc932f |
Integer // local ARU value |
21 |
130c49fc932f |
}); |
22 |
130c49fc932f |
|
23 |
0a66742d2525 |
public |
24 |
0a66742d2525 |
local_aru(Me, 0) :- |
25 |
0a66742d2525 |
paxos::self(Me), |
26 |
0a66742d2525 |
start(), |
27 |
0a66742d2525 |
notin global_history(Me, _, _, _); |
28 |
0a66742d2525 |
|
29 |
0a66742d2525 |
/**************************/ |
30 |
0a66742d2525 |
|
31 |
0a66742d2525 |
/* |
32 |
0a66742d2525 |
* When a node wins leader election, multicast a prepare message to |
33 |
0a66742d2525 |
* the Paxos group containing the new view number and our local ARU. |
34 |
0a66742d2525 |
* The common wisdom says that the prepare phase of Paxos enforces |
35 |
0a66742d2525 |
* all the safety guarantees: an unreliable leader can at worst impact |
36 |
0a66742d2525 |
* liveness. This is not true in this implementation, because the |
37 |
0a66742d2525 |
* leader election module produces our View number. The following |
38 |
0a66742d2525 |
* safety constraints must hold over leader::leader(_, _, View): |
39 |
0a66742d2525 |
* 1. it must be unique to this host. |
40 |
0a66742d2525 |
* 2. it must be monotonically increasing. |
41 |
0a66742d2525 |
*/ |
42 |
0a66742d2525 |
|
43 |
130c49fc932f |
define(prepare, { |
44 |
130c49fc932f |
String, // Agent |
45 |
130c49fc932f |
String, // Master |
46 |
130c49fc932f |
Integer, // View Number |
47 |
130c49fc932f |
Integer // ARU |
48 |
130c49fc932f |
}); |
49 |
7895c7c0deba |
|
50 |
0a66742d2525 |
public |
51 |
0a66742d2525 |
prepare(@Him, Me, View, Aru) :- |
52 |
0a66742d2525 |
leader::leader(@Me, Leader, View), |
53 |
0a66742d2525 |
paxos::parliament(@Me, Him), |
54 |
0a66742d2525 |
local_aru(@Me, Aru), |
55 |
0a66742d2525 |
Leader == Me; |
56 |
0a66742d2525 |
|
57 |
0a66742d2525 |
/**** datalist block *********************/ |
58 |
0a66742d2525 |
/* |
59 |
0a66742d2525 |
* When it receives a prepare message, an agent responds with a |
60 |
0a66742d2525 |
* prepare_ok message. This must contain: |
61 |
0a66742d2525 |
* 1. Any globally ordered updates with SeqNo > the ARU in the prepare message |
62 |
0a66742d2525 |
* 2. Any proposed values the agent has accepted, for previous views, |
63 |
0a66742d2525 |
* for which the same inequality holds. |
64 |
0a66742d2525 |
* |
65 |
0a66742d2525 |
* We eschew nested types, and combine the relevant attributes from the prepare |
66 |
0a66742d2525 |
* message with both lists above. |
67 |
0a66742d2525 |
*/ |
68 |
130c49fc932f |
define(datalist, keys(0, 1, 2), { |
69 |
130c49fc932f |
String, // Agent |
70 |
130c49fc932f |
String, // Master |
71 |
130c49fc932f |
Integer, // View Number |
72 |
130c49fc932f |
Integer, // ARU requested |
73 |
130c49fc932f |
Integer, // Sequence Number |
74 |
130c49fc932f |
String, // Update |
75 |
130c49fc932f |
String // Type (Bottom, Ordered, Proposed) |
76 |
130c49fc932f |
}); |
77 |
130c49fc932f |
|
78 |
0a66742d2525 |
datalist(Agent, Master, View, Aru, SeqNo, Update, Type) :- |
79 |
0a66742d2525 |
global_history(Agent, SeqNo, _, Update), |
80 |
0a66742d2525 |
datalist(Agent, Master, View, Aru, -1, _, "Bottom"), |
81 |
0a66742d2525 |
SeqNo >= Aru, |
82 |
0a66742d2525 |
Type := "Ordered"; |
83 |
0a66742d2525 |
|
84 |
0a66742d2525 |
public |
85 |
0a66742d2525 |
datalist(Agent, Master, View, Aru, SeqNo, Update, Type) :- |
86 |
0a66742d2525 |
accept(Agent, M, OldView, SeqNo, Update), |
87 |
0a66742d2525 |
datalist(Agent, Master, View, Aru, -1, _, "Bottom"), |
88 |
d9c9e6e60ee9 |
SeqNo >= Aru, |
89 |
0a66742d2525 |
Type := "Proposed"; |
90 |
0a66742d2525 |
|
91 |
0a66742d2525 |
// if SeqNo < Aru, we ignore the preoposal |
92 |
0a66742d2525 |
|
93 |
0a66742d2525 |
// Rather than using explicit negation, we insert a dummy |
94 |
0a66742d2525 |
// tuple into datalist to represent "nothing". |
95 |
0a66742d2525 |
datalist(Agent, Master, View, Aru, -1, "none", "Bottom") :- |
96 |
0a66742d2525 |
prepare(Agent, Master, View, Aru), |
97 |
0a66742d2525 |
leader::last_installed(Agent, LastView), |
98 |
0a66742d2525 |
LastView == View; |
99 |
0a66742d2525 |
|
100 |
130c49fc932f |
define(datalist_length, keys(0, 1), { |
101 |
130c49fc932f |
String, // Agent |
102 |
130c49fc932f |
Integer, // ARU |
103 |
130c49fc932f |
Integer // count of datalist values |
104 |
130c49fc932f |
}); |
105 |
7895c7c0deba |
|
106 |
0a66742d2525 |
datalist_length(Agent, Aru, count<SeqNo>) :- |
107 |
0a66742d2525 |
datalist(Agent, _, _, Aru, SeqNo, _, _); |
108 |
0a66742d2525 |
/*********************************************/ |
109 |
0a66742d2525 |
|
110 |
0a66742d2525 |
|
111 |
0a66742d2525 |
|
112 |
0a66742d2525 |
/******* prepare_oklist block ****************/ |
113 |
130c49fc932f |
define(prepare_oklist, keys(0, 1, 2, 4, 5, 6), { |
114 |
130c49fc932f |
String, // Master |
115 |
130c49fc932f |
Integer, // View number |
116 |
130c49fc932f |
Integer, // Sequence number |
117 |
130c49fc932f |
String, // Update |
118 |
130c49fc932f |
String, // Type |
119 |
130c49fc932f |
Integer, // Datalist length |
120 |
130c49fc932f |
String // Agent |
121 |
130c49fc932f |
}); |
122 |
0a66742d2525 |
|
123 |
0a66742d2525 |
prepare_oklist(@Master, View, SeqNo, Update, Type, Len, Agent) :- |
124 |
0a66742d2525 |
datalist_length(@Agent, Aru, Len), |
125 |
0a66742d2525 |
datalist(@Agent, Master, View, Aru, SeqNo, Update, Type); |
126 |
0a66742d2525 |
|
127 |
0a66742d2525 |
|
128 |
0a66742d2525 |
delete |
129 |
0a66742d2525 |
prepare_oklist(Master, View, SeqNo, Update, Type, Len, Agent) :- |
130 |
0a66742d2525 |
prepare_oklist(Master, View, SeqNo, Update, Type, Len, Agent), |
131 |
0a66742d2525 |
global_history(Master, SeqNo, _, Update); |
132 |
0a66742d2525 |
|
133 |
130c49fc932f |
define(prepare_oklist_cnt, keys(0, 1, 2), { |
134 |
130c49fc932f |
String, // Master |
135 |
130c49fc932f |
Integer, // View number |
136 |
130c49fc932f |
String, // Agent |
137 |
130c49fc932f |
Integer, // Datalist length |
138 |
130c49fc932f |
Integer // Count |
139 |
130c49fc932f |
}); |
140 |
130c49fc932f |
|
141 |
0a66742d2525 |
prepare_oklist_cnt(Master, View, Agent, Len, count<SeqNo>) :- |
142 |
0a66742d2525 |
prepare_oklist(Master, View, SeqNo, _, Type, Len, Agent); |
143 |
0a66742d2525 |
|
144 |
130c49fc932f |
define(prepare_ok_cnt, keys(0, 1), { |
145 |
130c49fc932f |
String, // Master |
146 |
130c49fc932f |
Integer, // View number |
147 |
130c49fc932f |
Integer // Agent Count |
148 |
130c49fc932f |
}); |
149 |
0a66742d2525 |
|
150 |
130c49fc932f |
// only count agents who have sent prepare_ok messages towards |
151 |
130c49fc932f |
// the quorum if we're received the whole set of messages from |
152 |
130c49fc932f |
// that agent. |
153 |
0a66742d2525 |
prepare_ok_cnt(Master, View, count<Agent>) :- |
154 |
0a66742d2525 |
prepare_oklist_cnt(Master, View, Agent, Cnt, Cnt2), |
155 |
0a66742d2525 |
Cnt == Cnt2; |
156 |
0a66742d2525 |
/********************************************/ |
157 |
0a66742d2525 |
|
158 |
0a66742d2525 |
|
159 |
130c49fc932f |
define(quorum, keys(0, 1), { |
160 |
130c49fc932f |
String, // Master |
161 |
130c49fc932f |
Integer // View |
162 |
130c49fc932f |
}); |
163 |
130c49fc932f |
|
164 |
0a66742d2525 |
quorum(Master, View) :- |
165 |
0a66742d2525 |
paxos::priestCnt(Master, PCnt), |
166 |
0a66742d2525 |
leader::leader(Master, Leader, View), |
167 |
0a66742d2525 |
Master == Leader, |
168 |
0a66742d2525 |
prepare_ok_cnt(Master, View, RCnt), |
169 |
0a66742d2525 |
RCnt > (PCnt / 2); |
170 |
0a66742d2525 |
|
171 |
130c49fc932f |
define(accept, keys(0, 1), { |
172 |
130c49fc932f |
String, // Agent |
173 |
130c49fc932f |
String, // Master |
174 |
130c49fc932f |
Integer, // View number |
175 |
130c49fc932f |
Integer, // Sequence number |
176 |
130c49fc932f |
String // Update |
177 |
130c49fc932f |
}); |
178 |
0a66742d2525 |
|
179 |
130c49fc932f |
define(global_history, keys(0, 1), { |
180 |
130c49fc932f |
String, // Master |
181 |
130c49fc932f |
Integer, // Sequence number |
182 |
130c49fc932f |
String, // Requestor |
183 |
130c49fc932f |
String // Update |
184 |
130c49fc932f |
}); |
185 |
130c49fc932f |
|
186 |
0a66742d2525 |
global_history(Agent, SeqNo, Requestor, Update) :- |
187 |
0a66742d2525 |
prepare_oklist(Agent, View, SeqNo, Update, "Ordered", _, Requestor), |
188 |
0a66742d2525 |
notin global_history(Agent, SeqNo); |
