neilconway / overlog-paxos

A clean implementation of the Paxos consensus protocol in Overlog, a language for distributed computing.

commit 29: 7ba2c54468af
parent 28: 8e5f83fc7ce8
branch: default
tags: tip
comments for source and sink
Peter Alvaro
5 months ago
    #   Introduced
1
130c49fc932f
program paxos_prepare;
2
0a66742d2525
3
0a66742d2525
import java.lang.System;
4
0a66742d2525
5
0a66742d2525
// inserted into from the calling program.
6
0a66742d2525
define(start, keys(0), {String});
7
0a66742d2525
8
0a66742d2525
/**** local_aru block ******/
9
0a66742d2525
/*
10
0a66742d2525
    ARU, or "all received upto", holds the value of the next globally ordered
11
0a66742d2525
    update we are ready to propose or write.  It should be initialized from
12
0a66742d2525
    stable storage: below, we initialize it to zero because we have none.
13
0a66742d2525
14
0a66742d2525
    When we successfully update our global history, we may increment the ARU.
15
0a66742d2525
    we do this via an aggregation to force a stratification boundary.
16
0a66742d2525
*/
17
0a66742d2525
18
130c49fc932f
define(local_aru, keys(0), {
19
130c49fc932f
  String, // host
20
130c49fc932f
  Integer // local ARU value
21
130c49fc932f
});
22
130c49fc932f
23
0a66742d2525
public
24
0a66742d2525
local_aru(Me, 0) :-
25
0a66742d2525
    paxos::self(Me),
26
0a66742d2525
    start(),
27
0a66742d2525
    notin global_history(Me, _, _, _);
28
0a66742d2525
29
0a66742d2525
/**************************/
30
0a66742d2525
31
0a66742d2525
/*
32
0a66742d2525
 * When a node wins leader election, multicast a prepare message to
33
0a66742d2525
 * the Paxos group containing the new view number and our local ARU.
34
0a66742d2525
 * The common wisdom says that the prepare phase of Paxos enforces
35
0a66742d2525
 * all the safety guarantees: an unreliable leader can at worst impact
36
0a66742d2525
 * liveness.  This is not true in this implementation, because the 
37
0a66742d2525
 * leader election module produces our View number.  The following 
38
0a66742d2525
 * safety constraints must hold over leader::leader(_, _, View):
39
0a66742d2525
 *  1. it must be unique to this host.
40
0a66742d2525
 *  2. it must be monotonically increasing.
41
0a66742d2525
 */
42
0a66742d2525
43
130c49fc932f
define(prepare, {
44
130c49fc932f
  String,  // Agent
45
130c49fc932f
  String,  // Master
46
130c49fc932f
  Integer, // View Number
47
130c49fc932f
  Integer  // ARU
48
130c49fc932f
});
49
7895c7c0deba
50
0a66742d2525
public
51
0a66742d2525
prepare(@Him, Me, View, Aru) :-
52
0a66742d2525
    leader::leader(@Me, Leader, View),
53
0a66742d2525
    paxos::parliament(@Me, Him),
54
0a66742d2525
    local_aru(@Me, Aru),
55
0a66742d2525
    Leader == Me;
56
0a66742d2525
57
0a66742d2525
/**** datalist block *********************/
58
0a66742d2525
/*
59
0a66742d2525
 * When it receives a prepare message, an agent responds with a
60
0a66742d2525
 * prepare_ok message. This must contain:
61
0a66742d2525
 *      1. Any globally ordered updates with SeqNo > the ARU in the prepare message
62
0a66742d2525
 *      2. Any proposed values the agent has accepted, for previous views, 
63
0a66742d2525
 *         for which the same inequality holds.
64
0a66742d2525
 *
65
0a66742d2525
 * We eschew nested types, and combine the relevant attributes from the prepare
66
0a66742d2525
 * message with both lists above.
67
0a66742d2525
 */
68
130c49fc932f
define(datalist, keys(0, 1, 2), {
69
130c49fc932f
  String,   // Agent
70
130c49fc932f
  String,   // Master
71
130c49fc932f
  Integer,  // View Number
72
130c49fc932f
  Integer,  // ARU requested
73
130c49fc932f
  Integer,  // Sequence Number
74
130c49fc932f
  String,   // Update
75
130c49fc932f
  String    // Type (Bottom, Ordered, Proposed)
76
130c49fc932f
});
77
130c49fc932f
78
0a66742d2525
datalist(Agent, Master, View, Aru, SeqNo, Update, Type) :-
79
0a66742d2525
    global_history(Agent, SeqNo, _, Update),
80
0a66742d2525
    datalist(Agent, Master, View, Aru, -1, _, "Bottom"),
81
0a66742d2525
    SeqNo >= Aru,
82
0a66742d2525
    Type := "Ordered";
83
0a66742d2525
84
0a66742d2525
public
85
0a66742d2525
datalist(Agent, Master, View, Aru, SeqNo, Update, Type) :-
86
0a66742d2525
    accept(Agent, M, OldView, SeqNo, Update),
87
0a66742d2525
    datalist(Agent, Master, View, Aru, -1, _, "Bottom"),
88
d9c9e6e60ee9
    SeqNo >= Aru,
89
0a66742d2525
    Type := "Proposed";
90
0a66742d2525
91
0a66742d2525
// if SeqNo < Aru, we ignore the preoposal
92
0a66742d2525
93
0a66742d2525
// Rather than using explicit negation, we insert a dummy
94
0a66742d2525
// tuple into datalist to represent "nothing".
95
0a66742d2525
datalist(Agent, Master, View, Aru, -1, "none", "Bottom") :-
96
0a66742d2525
    prepare(Agent, Master, View, Aru),
97
0a66742d2525
    leader::last_installed(Agent, LastView),
98
0a66742d2525
    LastView == View;
99
0a66742d2525
100
130c49fc932f
define(datalist_length, keys(0, 1), {
101
130c49fc932f
  String,   // Agent
102
130c49fc932f
  Integer,  // ARU
103
130c49fc932f
  Integer   // count of datalist values
104
130c49fc932f
});
105
7895c7c0deba
106
0a66742d2525
datalist_length(Agent, Aru, count<SeqNo>) :-
107
0a66742d2525
    datalist(Agent, _, _, Aru, SeqNo, _, _);
108
0a66742d2525
/*********************************************/
109
0a66742d2525
110
0a66742d2525
111
0a66742d2525
112
0a66742d2525
/******* prepare_oklist block ****************/
113
130c49fc932f
define(prepare_oklist, keys(0, 1, 2, 4, 5, 6), {
114
130c49fc932f
  String,   // Master
115
130c49fc932f
  Integer,  // View number
116
130c49fc932f
  Integer,  // Sequence number
117
130c49fc932f
  String,   // Update
118
130c49fc932f
  String,   // Type
119
130c49fc932f
  Integer,  // Datalist length
120
130c49fc932f
  String    // Agent
121
130c49fc932f
});
122
0a66742d2525
123
0a66742d2525
prepare_oklist(@Master, View, SeqNo, Update, Type, Len, Agent) :-
124
0a66742d2525
    datalist_length(@Agent, Aru, Len),
125
0a66742d2525
    datalist(@Agent, Master, View, Aru, SeqNo, Update, Type);
126
0a66742d2525
127
0a66742d2525
128
0a66742d2525
delete
129
0a66742d2525
prepare_oklist(Master, View, SeqNo, Update, Type, Len, Agent) :-
130
0a66742d2525
    prepare_oklist(Master, View, SeqNo, Update, Type, Len, Agent),
131
0a66742d2525
    global_history(Master, SeqNo, _, Update);
132
0a66742d2525
133
130c49fc932f
define(prepare_oklist_cnt, keys(0, 1, 2), {
134
130c49fc932f
  String,   // Master
135
130c49fc932f
  Integer,  // View number
136
130c49fc932f
  String,   // Agent
137
130c49fc932f
  Integer,  // Datalist length
138
130c49fc932f
  Integer   // Count
139
130c49fc932f
});
140
130c49fc932f
141
0a66742d2525
prepare_oklist_cnt(Master, View, Agent, Len, count<SeqNo>) :-
142
0a66742d2525
    prepare_oklist(Master, View, SeqNo, _, Type, Len, Agent);
143
0a66742d2525
144
130c49fc932f
define(prepare_ok_cnt, keys(0, 1), {
145
130c49fc932f
  String,   // Master
146
130c49fc932f
  Integer,  // View number
147
130c49fc932f
  Integer   // Agent Count
148
130c49fc932f
});
149
0a66742d2525
150
130c49fc932f
// only count agents who have sent prepare_ok messages towards
151
130c49fc932f
// the quorum if we're received the whole set of messages from
152
130c49fc932f
// that agent.
153
0a66742d2525
prepare_ok_cnt(Master, View, count<Agent>) :-
154
0a66742d2525
    prepare_oklist_cnt(Master, View, Agent, Cnt, Cnt2),
155
0a66742d2525
    Cnt == Cnt2;
156
0a66742d2525
/********************************************/
157
0a66742d2525
158
0a66742d2525
159
130c49fc932f
define(quorum, keys(0, 1), {
160
130c49fc932f
  String,   // Master
161
130c49fc932f
  Integer   // View
162
130c49fc932f
});
163
130c49fc932f
164
0a66742d2525
quorum(Master, View) :-
165
0a66742d2525
    paxos::priestCnt(Master, PCnt),
166
0a66742d2525
    leader::leader(Master, Leader, View),
167
0a66742d2525
    Master == Leader,
168
0a66742d2525
    prepare_ok_cnt(Master, View, RCnt),
169
0a66742d2525
    RCnt > (PCnt / 2);
170
0a66742d2525
171
130c49fc932f
define(accept, keys(0, 1), {
172
130c49fc932f
  String,   // Agent
173
130c49fc932f
  String,   // Master
174
130c49fc932f
  Integer,  // View number
175
130c49fc932f
  Integer,  // Sequence number
176
130c49fc932f
  String    // Update
177
130c49fc932f
});
178
0a66742d2525
179
130c49fc932f
define(global_history, keys(0, 1), {
180
130c49fc932f
  String,   // Master
181
130c49fc932f
  Integer,  // Sequence number
182
130c49fc932f
  String,   // Requestor
183
130c49fc932f
  String    // Update
184
130c49fc932f
});
185
130c49fc932f
186
0a66742d2525
global_history(Agent, SeqNo, Requestor, Update) :-
187
0a66742d2525
    prepare_oklist(Agent, View, SeqNo, Update, "Ordered", _, Requestor),
188
0a66742d2525
    notin global_history(Agent, SeqNo);