1 #include "hmbdc/Copyright.hpp" 3 #include "hmbdc/pattern/LockFreeBufferMisc.hpp" 4 #include "hmbdc/Exception.hpp" 5 #include "hmbdc/Compile.hpp" 7 #include <boost/smart_ptr/detail/yield_k.hpp> 17 #define HMBDC_YIELD(x) boost::detail::yield(x) 20 namespace hmbdc {
namespace pattern {
22 namespace memringbuffer_detail {
25 : std::atomic_flag{0} {
27 constexpr
int f[] = ATOMIC_FLAG_INIT;
28 static_assert(0 == f[0],
"");
31 while (test_and_set(std::memory_order_acquire)) {
32 std::this_thread::yield();
37 clear(std::memory_order_release);
41 template<u
int16_t parallel_consumer_count,
typename SeqT =
size_t>
44 using Sequence = SeqT;
45 enum {PARALLEL_CONSUMER_COUNT = parallel_consumer_count,};
47 const size_t CAPACITY;
48 const size_t VALUE_TYPE_SIZE;
51 const Sequence READ_SEQ_MAX;
55 Sequence toBeClaimedSeq_
56 __attribute__((__aligned__(SMP_CACHE_BYTES)));
57 Sequence readSeq_[PARALLEL_CONSUMER_COUNT]
58 __attribute__((__aligned__(SMP_CACHE_BYTES)));
59 Sequence readSeqLastPurge_[PARALLEL_CONSUMER_COUNT]
60 __attribute__((__aligned__(SMP_CACHE_BYTES)));
64 inline __attribute__ ((always_inline))
65 Sequence readSeqLow()
const HMBDC_RESTRICT {
66 Sequence res = readSeq_[0];
68 i < PARALLEL_CONSUMER_COUNT; ++i)
69 if (res > readSeq_[i]) res = readSeq_[i];
73 inline __attribute__ ((always_inline))
74 uint16_t findSlowestReader()
const HMBDC_RESTRICT {
75 Sequence smallest = readSeq_[0];
76 uint16_t smallestLoc = 0;
77 for (uint16_t i = 1; i < PARALLEL_CONSUMER_COUNT; ++i)
78 if (smallest > readSeq_[i]) {
79 smallest = readSeq_[i];
86 using value_type =
void *;
91 static size_t footprint(uint32_t valueTypeSizePower2Num, uint32_t ringSizePower2Num) {
93 valueTypeSizePower2Num, ringSizePower2Num) + SMP_CACHE_BYTES;
96 template <
typename Allocator = os::DefaultAllocator>
97 MemRingBuffer(uint32_t valueTypeSizePower2Num, uint32_t ringSizePower2Num
98 , Allocator& allocator = os::DefaultAllocator::instance())
99 : CAPACITY(1u << ringSizePower2Num)
100 , VALUE_TYPE_SIZE((1u << valueTypeSizePower2Num) -
sizeof(Sequence))
101 , READ_SEQ_MAX(std::numeric_limits<Sequence>::max() - CAPACITY - 1000u)
104 , buffer_(valueTypeSizePower2Num, ringSizePower2Num
106 , toBeClaimedSeq_(0u) {
107 std::fill_n(readSeq_, (
int)PARALLEL_CONSUMER_COUNT, 0);
108 std::fill_n(readSeqLastPurge_, (
int)PARALLEL_CONSUMER_COUNT, READ_SEQ_MAX);
109 for (
auto i = CAPACITY; i != 0 ; --i) {
110 *buffer_.getSeq(i - 1) = std::numeric_limits<Sequence>::max();
114 void put(
void const* HMBDC_RESTRICT item,
size_t sizeHint = 0) HMBDC_RESTRICT {
116 Sequence seq = __atomic_fetch_add(&toBeClaimedSeq_, 1, __ATOMIC_RELAXED);
118 seq >= CAPACITY + readSeqLow();
122 size_t index = seq & MASK;
123 memcpy(buffer_ + index
124 , item, sizeHint ? sizeHint : VALUE_TYPE_SIZE);
126 __atomic_thread_fence(__ATOMIC_ACQUIRE);
127 *buffer_.getSeq(index) = seq;
130 bool tryPut(
void const* HMBDC_RESTRICT item,
size_t sizeHint = 0) HMBDC_RESTRICT {
132 __atomic_thread_fence(__ATOMIC_ACQUIRE);
133 for(
auto seq = toBeClaimedSeq_;
134 seq < CAPACITY + readSeqLow();
135 seq = toBeClaimedSeq_) {
137 if (hmbdc_likely(__atomic_compare_exchange_n (
138 &toBeClaimedSeq_, &seq, seq + 1,
true, __ATOMIC_RELAXED, __ATOMIC_RELAXED))) {
139 size_t index = seq & MASK;
140 memcpy(buffer_ + index
141 , item, sizeHint ? sizeHint : VALUE_TYPE_SIZE);
143 __atomic_thread_fence(__ATOMIC_ACQUIRE);
144 *buffer_.getSeq(index) = seq;
151 void killPut(
void const* HMBDC_RESTRICT item,
size_t sizeHint = 0) HMBDC_RESTRICT {
153 Sequence seq = __atomic_fetch_add(&toBeClaimedSeq_, 1, __ATOMIC_RELAXED);
154 while (seq >= CAPACITY + readSeqLow()) {
155 uint16_t slowLoc = findSlowestReader();
159 size_t index = seq & MASK;
160 memcpy(buffer_ + index, item, sizeHint ? sizeHint : VALUE_TYPE_SIZE);
161 __atomic_thread_fence(__ATOMIC_ACQUIRE);
162 *buffer_.getSeq(index) = seq;
165 bool isFull()
const {
166 return toBeClaimedSeq_ >= CAPACITY + readSeqLow();
170 Sequence readSeq(uint16_t PARALLEL_CONSUMER_INDEX)
const HMBDC_RESTRICT {
171 return readSeq_[PARALLEL_CONSUMER_INDEX];
176 Sequence seq = __atomic_fetch_add(&toBeClaimedSeq_, 1, __ATOMIC_RELAXED);
178 seq >= CAPACITY + readSeqLow();
185 iterator tryClaim() HMBDC_RESTRICT {
187 __atomic_thread_fence(__ATOMIC_ACQUIRE);
188 for(
auto seq = toBeClaimedSeq_;
189 seq < CAPACITY + readSeqLow();
190 seq = toBeClaimedSeq_) {
192 if (hmbdc_likely(__atomic_compare_exchange_n (
193 &toBeClaimedSeq_, &seq, seq + 1,
true, __ATOMIC_RELAXED, __ATOMIC_RELAXED))) {
210 __atomic_thread_fence(__ATOMIC_ACQUIRE);
211 for(
auto seq = toBeClaimedSeq_;
212 seq + n - 1 < CAPACITY + readSeqLow();
213 seq = toBeClaimedSeq_) {
215 if (hmbdc_likely(__atomic_compare_exchange_n (
216 &toBeClaimedSeq_, &seq, seq + n,
true, __ATOMIC_RELAXED, __ATOMIC_RELAXED))) {
223 iterator claim(
size_t n) HMBDC_RESTRICT {
225 Sequence seq = __atomic_fetch_add(&toBeClaimedSeq_, n, __ATOMIC_RELAXED);
227 seq + n > CAPACITY + readSeqLow();
234 iterator killClaim() HMBDC_RESTRICT {
236 Sequence seq = __atomic_fetch_add(&toBeClaimedSeq_, 1, __ATOMIC_RELAXED);
237 while (seq >= CAPACITY + readSeqLow()) {
238 uint16_t slowLoc = findSlowestReader();
241 return iterator(buffer_, seq);
244 iterator killClaim(
size_t n) HMBDC_RESTRICT {
246 Sequence seq = __atomic_fetch_add(&toBeClaimedSeq_, n, __ATOMIC_RELAXED);
247 while (seq + n > CAPACITY + readSeqLow()) {
248 uint16_t slowLoc = findSlowestReader();
251 return iterator(buffer_, seq);
254 void commit(iterator it) HMBDC_RESTRICT {
255 __sync_synchronize();
257 *buffer_.getSeq(*it - buffer_) = it.seq_;
260 void commit(iterator from,
size_t n) HMBDC_RESTRICT {
261 __sync_synchronize();
263 for (
size_t i = 0; i < n; ++i) {
264 *buffer_.getSeq(*from - buffer_) = from.seq_;
269 void markDead(uint16_t parallel_consumer_index) HMBDC_RESTRICT {
270 if (parallel_consumer_index < PARALLEL_CONSUMER_COUNT) {
271 readSeq_[parallel_consumer_index] = READ_SEQ_MAX;
272 __sync_synchronize();
278 unusedConsumerIndexes()
const {
279 std::vector<uint16_t> res;
280 for (uint16_t i = 0; i < PARALLEL_CONSUMER_COUNT; ++i) {
281 if (readSeq_[i] == READ_SEQ_MAX) {
288 void take(uint16_t PARALLEL_CONSUMER_INDEX,
void * HMBDC_RESTRICT item,
size_t sizeHint = 0) HMBDC_RESTRICT {
289 auto seq = readSeq_[PARALLEL_CONSUMER_INDEX];
290 if (hmbdc_unlikely(seq >= READ_SEQ_MAX)) {
291 HMBDC_THROW(DeadConsumer, PARALLEL_CONSUMER_INDEX);
293 size_t index = seq & MASK;
295 seq != *buffer_.getSeq(index);
300 memcpy(item, buffer_ + index, sizeHint ? sizeHint : VALUE_TYPE_SIZE);
302 __atomic_fetch_add(readSeq_ + PARALLEL_CONSUMER_INDEX, 1, __ATOMIC_RELEASE);
305 void takeReentrant(uint16_t PARALLEL_CONSUMER_INDEX,
void * HMBDC_RESTRICT item,
size_t sizeHint = 0) HMBDC_RESTRICT {
306 std::lock_guard<my_spin_lock> guard(locks_[PARALLEL_CONSUMER_INDEX]);
307 take(PARALLEL_CONSUMER_INDEX, item, sizeHint);
310 iterator peek(uint16_t PARALLEL_CONSUMER_INDEX)
const HMBDC_RESTRICT {
311 auto readSeq = readSeq_[PARALLEL_CONSUMER_INDEX];
312 if (hmbdc_unlikely(readSeq >= READ_SEQ_MAX)) {
313 HMBDC_THROW(DeadConsumer, PARALLEL_CONSUMER_INDEX);
315 if (readSeq == *buffer_.getSeq(readSeq & MASK)) {
316 return iterator(buffer_, readSeq);
321 size_t peek(uint16_t PARALLEL_CONSUMER_INDEX, iterator& begin, iterator& end
322 ,
size_t maxPeekSize = std::numeric_limits<size_t>::max())
const {
323 Sequence readSeq = readSeq_[PARALLEL_CONSUMER_INDEX];
324 if (hmbdc_unlikely(readSeq >= READ_SEQ_MAX)) {
325 HMBDC_THROW(DeadConsumer, PARALLEL_CONSUMER_INDEX);
327 begin = iterator(buffer_, readSeq);
328 while (readSeq == *buffer_.getSeq(readSeq & MASK)
332 end = iterator(buffer_, readSeq);
333 return readSeq - readSeq_[PARALLEL_CONSUMER_INDEX];
336 size_t peekSome(uint16_t PARALLEL_CONSUMER_INDEX, iterator& begin, iterator& end
337 ,
size_t maxPeekSize = std::numeric_limits<size_t>::max())
const {
340 !(res = peek(PARALLEL_CONSUMER_INDEX, begin, end, maxPeekSize))
348 void waste(uint16_t PARALLEL_CONSUMER_INDEX,
size_t size) HMBDC_RESTRICT {
349 Sequence seq = readSeq_[PARALLEL_CONSUMER_INDEX];
350 if (hmbdc_unlikely(seq >= READ_SEQ_MAX)) {
351 HMBDC_THROW(DeadConsumer, PARALLEL_CONSUMER_INDEX);
354 seq + size > toBeClaimedSeq_;
359 __atomic_fetch_add(readSeq_ + PARALLEL_CONSUMER_INDEX, size, __ATOMIC_RELEASE);
370 void wasteAfterPeek(uint16_t PARALLEL_CONSUMER_INDEX,
size_t size) HMBDC_RESTRICT
374 __atomic_fetch_add(readSeq_ + PARALLEL_CONSUMER_INDEX, size, __ATOMIC_RELEASE);
377 Sequence catchUpWith(uint16_t PARALLEL_CONSUMER_INDEX, uint16_t WITH_PARALLEL_CONSUMER_INDEX) {
378 readSeq_[PARALLEL_CONSUMER_INDEX] = readSeq_[WITH_PARALLEL_CONSUMER_INDEX];
380 __atomic_thread_fence(__ATOMIC_ACQ_REL);
381 return readSeq_[PARALLEL_CONSUMER_INDEX];
384 void catchUpTo(uint16_t PARALLEL_CONSUMER_INDEX, Sequence newSeq) {
385 if (readSeq_[PARALLEL_CONSUMER_INDEX] <= newSeq) {
386 readSeq_[PARALLEL_CONSUMER_INDEX] = newSeq;
389 __atomic_thread_fence(__ATOMIC_RELEASE);
392 size_t remainingSize(uint16_t index)
const HMBDC_RESTRICT {
394 __atomic_thread_fence(__ATOMIC_ACQUIRE);
395 Sequence r = readSeq_[index];
396 Sequence w = toBeClaimedSeq_;
397 return w > r ? w - r : 0;
399 size_t remainingSize() const HMBDC_RESTRICT {
401 __atomic_thread_fence(__ATOMIC_ACQUIRE);
402 Sequence r = readSeqLow();
403 Sequence w = toBeClaimedSeq_;
404 return w > r ? w - r : 0;
407 void reset(uint16_t PARALLEL_CONSUMER_INDEX) {
410 readSeq_[PARALLEL_CONSUMER_INDEX] = toBeClaimedSeq_;
411 index = readSeq_[PARALLEL_CONSUMER_INDEX] & MASK;
413 __atomic_thread_fence(__ATOMIC_ACQ_REL);
414 }
while (*buffer_.getSeq(index) == readSeq_[PARALLEL_CONSUMER_INDEX]);
417 size_t parallelConsumerAlive()
const {
418 return count_if(readSeq_, readSeq_ + PARALLEL_CONSUMER_COUNT
419 , [m = READ_SEQ_MAX](Sequence s) {
431 __sync_synchronize();
433 for (uint16_t i = 0; i < PARALLEL_CONSUMER_COUNT; ++i) {
434 auto seq = readSeq_[i];
435 if (seq < READ_SEQ_MAX) {
436 size_t index = seq & MASK;
437 if (hmbdc_unlikely(readSeqLastPurge_[i] == seq)) {
438 if (i == findSlowestReader()) {
441 }
else if (hmbdc_unlikely(seq == *buffer_.getSeq(index))) {
442 readSeqLastPurge_[i] = seq;
446 for (uint16_t i = 0; res && i < PARALLEL_CONSUMER_COUNT; ++i) {
447 if (res & (1ul << i)) markDead(i);
457 #include "hmbdc/pattern/MemRingBuffer2.hpp" 459 namespace hmbdc {
namespace pattern {
460 template<u
int16_t PARALLEL_CONSUMER_COUNT,
typename SeqT =
size_t>
461 using MemRingBuffer = memringbuffer_detail::MemRingBuffer<PARALLEL_CONSUMER_COUNT, SeqT>;
void wasteAfterPeek(uint16_t PARALLEL_CONSUMER_INDEX, size_t size) HMBDC_RESTRICT
for batching process, mark the next items consumed
Definition: MemRingBuffer.hpp:370
Definition: LockFreeBufferMisc.hpp:23
Definition: MemRingBuffer.hpp:23
Definition: MemRingBuffer.hpp:42
iterator tryClaim(size_t n) HMBDC_RESTRICT
claim slots in the ring buffer for write, return empty iterator when not possible - this call does no...
Definition: MemRingBuffer.hpp:208
uint64_t purge()
call this peroidically to mark the stuck consumers dead
Definition: MemRingBuffer.hpp:430
Definition: LockFreeBufferMisc.hpp:89