hmbdc
simplify-high-performance-messaging-programming
SendTransportEngine.hpp
1 #include "hmbdc/Copyright.hpp"
2 #pragma once
3 
4 
5 #define NETMAP_WITH_LIBS
6 #include <net/netmap_user.h>
7 #undef NETMAP_WITH_LIBS
8 
9 #include "hmbdc/tips/netmap/Messages.hpp"
10 #include "hmbdc/tips/netmap/DefaultUserConfig.hpp"
11 #include "hmbdc/app/Base.hpp"
12 #include "hmbdc/pattern/MonoLockFreeBuffer.hpp"
13 #include "hmbdc/comm/eth/Misc.h"
14 #include "hmbdc/time/Rater.hpp"
15 #include "hmbdc/numeric/BitMath.hpp"
16 
17 #include <memory>
18 #include <type_traits>
19 #include <mutex>
20 
21 
22 #include <netinet/ether.h> /* ether_aton */
23 #include <linux/if_packet.h> /* sockaddr_ll */
24 #include <sys/sysctl.h> /* sysctl */
25 #include <ifaddrs.h> /* getifaddrs */
26 
27 #include <poll.h>
28 
29 namespace hmbdc { namespace tips { namespace netmap {
30 
31 struct NetContext;
32 struct Sender;
33 
34 namespace sendtransportengine_detail {
35 HMBDC_CLASS_HAS_DECLARE(hmbdc_net_queued_ts);
36 
37 /**
38  * @brief power a netmap port sending functions
39  * @details this needs to be created using NetContext and start in an tips::Context
40  *
41  */
43 : hmbdc::app::Client<SendTransportEngine> {
45  size_t bufferedMessageCount() const {
46  return buffer_.remainingSize();
47  }
48 
49  size_t subscribingPartyDetectedCount(uint16_t tag) const {
50  return 0; //not supported
51  }
52 
53  template <MessageTupleC Messages, typename Node>
54  void advertiseFor(Node const& node, uint16_t mod, uint16_t res) {
55  //only applies to connection oriented transport
56  }
57 
58 /**
59  * @brief if the user choose no to have a Context to manage and run the engine
60  * this method can be called from any thread from time to time to have the engine
61  * do its job
62  * @details thread safe and non-blocking, if the engine is already being powered by a Context,
63  * this method has no effect
64  */
65  void rotate() {
67  }
68 
69  void stop();
70 
72  bool droppedCb() override;
73 
74  /*virtual*/
75  void invokedCb(size_t) HMBDC_RESTRICT override {
76  for (int i = nmd_->first_tx_ring; i <= nmd_->last_tx_ring; i++) {
77  struct netmap_ring * txring = NETMAP_TXRING(nmd_->nifp, i);
78  if (nm_ring_empty(txring))
79  continue;
80 
81  sendPackets(txring);
82  }
83  if (hmbdc_unlikely(ioctl(nmd_->fd, NIOCTXSYNC, NULL) < 0)) {
84  HMBDC_THROW(std::runtime_error, "IO error");
85  }
86  }
87 
88  void stoppedCb(std::exception const& e) override;
89 
90  char const* hmbdcName() const {
91  return this->hmbdcName_.c_str();
92  }
93 
94  std::tuple<char const*, int> schedSpec() const {
95  return std::make_tuple(this->schedPolicy_.c_str(), this->schedPriority_);
96  }
97 
98  template <typename Message>
99  void queue(Message&& msg) HMBDC_RESTRICT {
100  auto n = 1;
101  auto it = buffer_.claim(n);
102  queue(it, std::forward<Message>(msg));
103  buffer_.commit(it, n);
104  }
105 
106  template <typename Message>
107  bool tryQueue(Message&& msg) HMBDC_RESTRICT {
108  auto n = 1;
109  auto it = buffer_.tryClaim(n);
110  if (it) {
111  queue(it, std::forward<Message>(msg));
112  buffer_.commit(it, n);
113  return true;
114  }
115  return false;
116  }
117 
118  template <typename M, typename... Messages>
119  void queue(pattern::MonoLockFreeBuffer::iterator it, M&& msg, Messages&&... msgs) {
120  using Message = typename std::decay<M>::type;
121  static_assert(std::is_trivially_destructible<Message>::value, "cannot send message with dtor");
122  if (hmbdc_unlikely(sizeof(Message) > maxMessageSize_)) {
123  HMBDC_THROW(std::out_of_range, "maxMessageSize too small to hold a message when constructing SendTransportEngine");
124  }
125  auto s = *it;
126  auto tmh = TransportMessageHeader::copyTo(s, std::forward<M>(msg));
127 
128  if constexpr (has_hmbdc_net_queued_ts<Message>::value) {
129  tmh->template wrapped<Message>().hmbdc_net_queued_ts = hmbdc::time::SysTime::now();
130  } else (void)tmh;
131  queue(++it, std::forward<Messages>(msgs)...);
132  }
133 
134  template <typename Message, typename ... Args>
135  void queueInPlace(Args&&... args) HMBDC_RESTRICT {
136  static_assert(std::is_trivially_destructible<Message>::value, "cannot send message with dtor");
137  if (hmbdc_unlikely(sizeof(Message) > maxMessageSize_)) {
138  HMBDC_THROW(std::out_of_range, "maxMessageSize too small to hold a message when constructing SendTransportEngine");
139  }
140  auto s = buffer_.claim();
141  TransportMessageHeader::copyToInPlace<Message>(*s, std::forward<Args>(args)...);
142  buffer_.commit(s);
143  }
144 
145  // void queueBytes(uint16_t tag, void const* bytes, size_t len);
146  void queue(pattern::MonoLockFreeBuffer::iterator it) {}
147 
148 private:
149  uint16_t outBufferSizePower2();
150  void sendPackets(struct netmap_ring *);
151 
152  void getMacAddresses();
153  static
154  void initializePacket(hmbdc::comm::eth::pkt *, int, std::string, std::string, ether_addr, ether_addr, uint16_t, uint16_t);
155 
156  static
157  void updatePacket(hmbdc::comm::eth::pkt *, size_t, bool = true);
158 
159  hmbdc::app::Config const config_;
160  std::string hmbdcName_;
161  std::string schedPolicy_;
162  int schedPriority_;
163  size_t maxMessageSize_;
164 
165 
166  struct nm_desc *nmd_;
167  pattern::MonoLockFreeBuffer buffer_;
168  int virtHeader_; //v hdr len
169 
170 
171  ether_addr srcEthAddr_;
172  ether_addr dstEthAddr_;
173  hmbdc::comm::eth::pkt precalculatedPacketHead_;
174  bool doChecksum_;
175  hmbdc::time::Rater rater_;
176  size_t maxSendBatch_;
177  uint16_t mtu_;
178 };
179 
180 } //sendtransportengine_detail
181 
182 using SendTransportEngine = sendtransportengine_detail::SendTransportEngine;
183 
184 }}}
185 
186 namespace hmbdc { namespace tips { namespace netmap {
187 
188 namespace sendtransportengine_detail {
189 
190 inline
191 uint16_t
192 SendTransportEngine::
193 outBufferSizePower2() {
194  auto res = config_.getExt<uint16_t>("outBufferSizePower2");
195  if (res) {
196  return res;
197  }
198  res = hmbdc::numeric::log2Upper(16ul * 1024ul / (8ul + maxMessageSize_));
199  HMBDC_LOG_N("auto set --outBufferSizePower2=", res);
200  return res;
201 }
202 
203 inline
204 SendTransportEngine::
205 SendTransportEngine(hmbdc::app::Config cfg, size_t maxMessageSize)
206 : config_((cfg.setAdditionalFallbackConfig(hmbdc::app::Config(DefaultUserConfig))
207  , cfg.resetSection("tx", false)))
208 , maxMessageSize_(maxMessageSize)
209 , nmd_(nullptr)
210 , buffer_(maxMessageSize + sizeof(TransportMessageHeader) + sizeof(hmbdc::app::MessageHead)
211  , outBufferSizePower2())
212 , virtHeader_(0)
213 , srcEthAddr_{{0}}
214 , dstEthAddr_{{0}}
215 , doChecksum_(config_.getExt<bool>("doChecksum"))
216 , rater_(hmbdc::time::Duration::seconds(1u)
217  , config_.getExt<size_t>("sendBytesPerSec")
218  , config_.getExt<size_t>("sendBytesBurst")
219  , config_.getExt<size_t>("sendBytesBurst") != 0ul //no rate control by default
220 )
221 , maxSendBatch_(config_.getExt<size_t>("maxSendBatch"))
222 , mtu_(config_.getExt<size_t>("mtu")) {
223  mtu_ -= (8u + 20u); // 8bytes udp header and 20bytes ip header
224  config_(hmbdcName_, "hmbdcName")
225  (schedPolicy_, "schedPolicy")
226  (schedPriority_, "schedPriority")
227  ;
228 
229  memcpy(&srcEthAddr_, ether_aton(config_.getExt<std::string>("srcEthAddr").c_str())
230  , sizeof(srcEthAddr_));
231  memcpy(&dstEthAddr_, ether_aton(config_.getExt<std::string>("dstEthAddr").c_str())
232  , sizeof(dstEthAddr_));
233  getMacAddresses();
234 
235  struct nmreq baseNmd;
236  bzero(&baseNmd, sizeof(baseNmd));
237  baseNmd.nr_flags |= NR_ACCEPT_VNET_HDR;
238  config_(baseNmd.nr_tx_slots, "nmTxSlots");
239  config_(baseNmd.nr_rx_slots, "nmRxSlots");
240  config_(baseNmd.nr_tx_rings, "nmTxRings");
241  config_(baseNmd.nr_rx_rings, "nmRxRings");
242 
243  auto nmport = config_.getExt<std::string>("netmapPort");
244  uint32_t flags = config_.getExt<uint32_t>("nmOpenFlags");
245  nmd_ = nm_open(nmport.c_str(), &baseNmd, flags, NULL);
246  if (!nmd_) {
247  HMBDC_THROW(std::runtime_error, "cannot open " << nmport);
248  }
249 
250  if (nmd_->first_tx_ring != nmd_->last_tx_ring) {
251  HMBDC_LOG_W("multiple tx rings exist on ", nmport, " the recv side could receive out of order messages."
252  " to avoid it, use more specific netmapPort with the ring number. for example: netmap::p2p1-2");
253  }
254 
255  struct nmreq req;
256  memset(&req, 0, sizeof(req));
257  bcopy(nmd_->req.nr_name, req.nr_name, sizeof(req.nr_name));
258  req.nr_version = NETMAP_API;
259  req.nr_cmd = NETMAP_VNET_HDR_GET;
260  int err = ioctl(nmd_->fd, NIOCREGIF, &req);
261  if (err) {
262  HMBDC_THROW(std::runtime_error, "Unable to get virtio-net header length");
263  }
264  virtHeader_ = req.nr_arg1;
265 
266  initializePacket(&precalculatedPacketHead_
267  , config_.getExt<uint16_t>("ttl")
268  , config_.getExt<std::string>("srcIp")
269  , config_.getExt<std::string>("dstIp")
270  , srcEthAddr_
271  , dstEthAddr_
272  , config_.getExt<uint16_t>("srcPort")
273  , config_.getExt<uint16_t>("dstPort")
274  );
275  sleep(config_.getExt<int>("nmResetWaitSec"));
276  //cleanup rings
277  if (hmbdc_unlikely(ioctl(nmd_->fd, NIOCTXSYNC, NULL) < 0)) {
278  HMBDC_THROW(std::runtime_error, "IO error");
279  }
280  for (int i = nmd_->first_tx_ring; i <= nmd_->last_tx_ring; i++) {
281  struct netmap_ring * txring = NETMAP_TXRING(nmd_->nifp, i);
282  txring->head = txring->cur = txring->tail;
283  }
284 }
285 
286 inline
287 void
288 SendTransportEngine::
289 stop() {
290  buffer_.reset();
291 };
292 
293 inline
294 SendTransportEngine::
295 ~SendTransportEngine() {
296  nm_close(nmd_);
297 }
298 
299 /*virtual*/
300 inline
301 bool
304  buffer_.reset();
305  return true;
306 }
307 
308 inline
309 void
311 stoppedCb(std::exception const& e) {
312  HMBDC_LOG_C(e.what());
313 };
314 
315 inline
316 void
317 SendTransportEngine::
318 sendPackets(struct netmap_ring * HMBDC_RESTRICT ring) HMBDC_RESTRICT {
319  uint32_t cur = ring->cur;
320  if (hmbdc_unlikely(cur == ring->tail)) return;
322  auto limit = maxSendBatch_ * ((ring->tail - cur) % ring->num_slots);
323  if (hmbdc_unlikely(!(buffer_.peek(begin, end, limit)))) {
324  return;
325  }
326  bool slotInited = false;
327  auto it = begin;
328  auto batch = maxSendBatch_;
329  struct netmap_slot *slot = &ring->slot[cur];
330  uint32_t slotLen = 0;
331  char *p = NETMAP_BUF(ring, slot->buf_idx);
332  hmbdc::comm::eth::pkt* currentPktPtr = (hmbdc::comm::eth::pkt*)(p + virtHeader_
334  uint16_t slotLenMax = std::min(mtu_, (uint16_t)ring->nr_buf_size);
335  for (; it != end;) {
336  auto th = reinterpret_cast<TransportMessageHeader*>(*it);
337  if (hmbdc_likely(rater_.check(th->wireSize()))) {
338  if (!slotInited) {
339  auto wireSize = (uint16_t)(
340  sizeof(ether_header) + sizeof(ip) + sizeof(udphdr) + virtHeader_
341  );
342  memcpy(p, ((char*)&precalculatedPacketHead_) + sizeof(hmbdc::comm::eth::virt_header)
343  - virtHeader_, wireSize);
344  slotLen = wireSize;
345  slotInited = true;
346  }
347  auto wireSize = th->wireSize();
348  if (slotLen + wireSize <= slotLenMax) {
349  memcpy(p + slotLen, th, (int)wireSize);
350  slotLen += wireSize;
351  rater_.commit();
352  batch--;
353  ++it;
354  } else {
355  batch = 0; //this batch is done
356  }
357  if (!batch) {
358  slot->len = slotLen;
359  size_t wireSizeExcludingHead = slotLen
360  - (sizeof(ether_header) + sizeof(ip) + sizeof(udphdr) + virtHeader_);
361  updatePacket(currentPktPtr, wireSizeExcludingHead, doChecksum_);
362  cur = nm_ring_next(ring, cur);
363  slotLen = 0;
364  if (cur == ring->tail) break;
365  slot = &ring->slot[cur];
366  p = NETMAP_BUF(ring, slot->buf_idx);
367  currentPktPtr = (hmbdc::comm::eth::pkt*)(p + virtHeader_
369  batch = maxSendBatch_;
370  slotInited = false;
371  }
372  } else {
373  break;
374  }
375  }
376  if (slotLen) {
377  slot->len = slotLen;
378  size_t wireSizeExcludingHead = slotLen
379  - (sizeof(ether_header) + sizeof(ip) + sizeof(udphdr) + virtHeader_);
380  updatePacket(currentPktPtr, wireSizeExcludingHead, doChecksum_);
381  cur = nm_ring_next(ring, cur);
382  }
383 
384  ring->head = ring->cur = cur;
385  buffer_.wasteAfterPeek(begin, it - begin, true);
386 }
387 
388 inline
389 void
390 SendTransportEngine::
391 getMacAddresses() {
392  auto nmport = config_.getExt<std::string>("netmapPort");
393 
394  if (strncmp(nmport.c_str(), "vale", 4) == 0) return;
395 
396  if (nmport.find_first_of(":") == std::string::npos) {
397  HMBDC_THROW(std::runtime_error
398  , "wrong netmapPort format (examples: netmap:eth0, netmap:eth0-0)");
399  }
400  auto iface = nmport.substr(nmport.find_first_of(":"));
401  iface = iface.substr(1, iface.find_first_of("-^") - 1);
402 
403 
404  struct ifaddrs *ifaphead, *ifap;
405  int l = sizeof(ifap->ifa_name);
406 
407  if (getifaddrs(&ifaphead) != 0) {
408  HMBDC_THROW(std::runtime_error, "getifaddrs failed for" << iface);
409  }
410  for (ifap = ifaphead; ifap; ifap = ifap->ifa_next) {
411  struct sockaddr_ll *sll =
412  (struct sockaddr_ll *)ifap->ifa_addr;
413  uint8_t *mac;
414 
415  if (!sll || sll->sll_family != AF_PACKET)
416  continue;
417  if (strncmp(ifap->ifa_name, iface.c_str(), l) != 0)
418  continue;
419  mac = (uint8_t *)(sll->sll_addr);
420 
421  char srcEthAddrStr[20];
422  sprintf(srcEthAddrStr, "%02x:%02x:%02x:%02x:%02x:%02x",
423  mac[0], mac[1], mac[2],
424  mac[3], mac[4], mac[5]);
425  memcpy(&srcEthAddr_, ether_aton(srcEthAddrStr), sizeof(srcEthAddr_)); //6 bytes
426  break;
427  }
428  freeifaddrs(ifaphead);
429  if (!ifap) {
430  HMBDC_THROW(std::runtime_error, "no local interface named " << iface);
431  }
432 }
433 
434 inline
435 void
436 SendTransportEngine::
437 initializePacket(hmbdc::comm::eth::pkt *pkt, int ttl, std::string srcIpStr, std::string dstIpStr
438  , ether_addr srcEthAddr, ether_addr dstEthAddr, uint16_t srcPort, uint16_t dstPort) {
439  struct ether_header *eh;
440  struct ip *ip;
441  struct udphdr *udp;
442  uint32_t a, b, c, d;
443  sscanf(srcIpStr.c_str(), "%d.%d.%d.%d", &a, &b, &c, &d);
444  auto srcIp = (a << 24u) + (b << 16u) + (c << 8u) + d;
445  sscanf(dstIpStr.c_str(), "%d.%d.%d.%d", &a, &b, &c, &d);
446  auto dstIp = (a << 24u) + (b << 16u) + (c << 8u) + d;
447 
448  /* prepare the headers */
449  eh = &pkt->eh;
450  bcopy(&srcEthAddr, eh->ether_shost, 6);
451  bcopy(&dstEthAddr, eh->ether_dhost, 6);
452 
453  eh->ether_type = htons(ETHERTYPE_IP);
454 
455 #pragma GCC diagnostic push
456 #if defined __clang__ || __GNUC_PREREQ(9,0)
457 #pragma GCC diagnostic ignored "-Waddress-of-packed-member"
458 #endif
459  ip = &pkt->ipv4.ip;
460  udp = &pkt->ipv4.udp;
461  ip->ip_v = IPVERSION;
462  ip->ip_hl = sizeof(*ip) >> 2;
463  ip->ip_id = 0;
464  ip->ip_tos = IPTOS_LOWDELAY;
465  ip->ip_len = 0; //zero so chksum can happen in ip_sum
466  ip->ip_id = 0;
467  ip->ip_off = htons(IP_DF); /* Don't fragment */
468  ip->ip_ttl = ttl;
469  ip->ip_p = IPPROTO_UDP;
470  ip->ip_dst.s_addr = htonl(dstIp);
471  ip->ip_src.s_addr = htonl(srcIp);
472  ip->ip_sum = 0;
473  ip->ip_len = sizeof(*ip) + sizeof(udphdr); //ip->ip_len is unknown, put known part
474  udp->source = htons(srcPort);
475  udp->dest = htons(dstPort);
476  udp->len = sizeof(udphdr); //put known part
477  udp->check = 0;
478 
479  bzero(&pkt->vh, sizeof(pkt->vh));
480 }
481 
482 inline
483 void
484 SendTransportEngine::
485 updatePacket(hmbdc::comm::eth::pkt *packet, size_t payloadWireSize, bool doChecksum) {
486  packet->ipv4.ip.ip_len += payloadWireSize; //already has sizeof(ip) + sizeof(udphdr);
487  packet->ipv4.ip.ip_len = ntohs(packet->ipv4.ip.ip_len);
488  if (doChecksum) {
489  packet->ipv4.ip.ip_sum = hmbdc::comm::eth::wrapsum(hmbdc::comm::eth::checksum(
490  &packet->ipv4.ip, sizeof(packet->ipv4.ip), 0));
491  }
492 
493  packet->ipv4.udp.len += payloadWireSize;
494  packet->ipv4.udp.len = htons(packet->ipv4.udp.len);
495  if (doChecksum) {
496  auto udp = &packet->ipv4.udp;
497  packet->ipv4.udp.check = hmbdc::comm::eth::wrapsum(
498  hmbdc::comm::eth::checksum(udp, sizeof(*udp), /* udp header */
499  hmbdc::comm::eth::checksum(packet->ipv4.body, payloadWireSize, /* udp payload */
500  hmbdc::comm::eth::checksum(&packet->ipv4.ip.ip_src, 2 * sizeof(packet->ipv4.ip.ip_src), /* pseudo header */
501  IPPROTO_UDP + (u_int32_t)ntohs(udp->len)))));
502  }
503 }
504 
505 #pragma GCC diagnostic pop
506 
507 } //sendtransportengine_detail
508 
509 }}}
T getExt(const path_type &param, bool throwIfMissing=true) const
get a value from the config
Definition: Config.hpp:238
void invokedCb(size_t) HMBDC_RESTRICT override
this callback is called all the time (frequently) - the exact timing is after a batch of messages are...
Definition: SendTransportEngine.hpp:75
power a netmap port sending functions
Definition: SendTransportEngine.hpp:42
void stoppedCb(std::exception const &e) override
callback called when this Client is taken out of message dispatching
Definition: SendTransportEngine.hpp:311
void rotate()
if the user choose no to have a Context to manage and run the engine this method can be called from a...
Definition: SendTransportEngine.hpp:65
class to hold an hmbdc configuration
Definition: Config.hpp:44
Definition: Misc.h:55
Definition: Misc.h:51
bool droppedCb() override
callback called after the Client is safely taken out of the Context
Definition: SendTransportEngine.hpp:303
a Node is a thread of execution that can suscribe and receive Messages
Definition: Node.hpp:51
A Client represents a thread of execution/a task. The execution is managed by a Context. a Client object could participate in message dispatching as the receiver of specifed message types.
Definition: Client.hpp:128
Definition: Rater.hpp:11
Definition: Base.hpp:12
Definition: LockFreeBufferMisc.hpp:89