hmbdc
simplify-high-performance-messaging-programming
NmSendTransport.hpp
1 #include "hmbdc/Copyright.hpp"
2 #pragma once
3 
4 #define NETMAP_WITH_LIBS
5 #include <net/netmap_user.h>
6 #undef NETMAP_WITH_LIBS
7 
8 #include "hmbdc/tips/rnetmap/Transport.hpp"
9 #include "hmbdc/tips/rnetmap/Messages.hpp"
10 #include "hmbdc/tips/reliable/BackupSendServerT.hpp"
11 #include "hmbdc/app/Base.hpp"
12 #include "hmbdc/comm/eth/Misc.h"
13 #include "hmbdc/comm/inet/Misc.hpp"
14 #include "hmbdc/time/Time.hpp"
15 #include "hmbdc/time/Rater.hpp"
16 #include "hmbdc/pattern/LockFreeBufferT.hpp"
17 
18 
19 #include <boost/bind.hpp>
20 #include <memory>
21 
22 #include <iostream>
23 
24 #include <netinet/ether.h> /* ether_aton */
25 #include <linux/if_packet.h> /* sockaddr_ll */
26 #include <sys/sysctl.h> /* sysctl */
27 #include <ifaddrs.h> /* getifaddrs */
28 
29 #include <poll.h>
30 
31 namespace hmbdc { namespace tips { namespace rnetmap {
32 
33 namespace nmsendtransport_detail {
34 
36 using ToCleanupAttQueue = reliable::ToCleanupAttQueue;
37 
39 : Transport {
41  , size_t maxMessageSize
42  , Buffer& buffer
43  , hmbdc::time::Rater& rater
44  , ToCleanupAttQueue& toCleanupAttQueue)
45  : Transport(cfg)
46  , nmd_(nullptr)
47  , virtHeader_(0)
48  , srcEthAddr_{{0}}
49  , dstEthAddr_{{0}}
50  , doChecksum_(config_.getExt<bool>("doChecksum"))
51  , maxMessageSize_(maxMessageSize)
52  , buffer_(buffer)
53  , rater_(rater)
54  , toCleanupAttQueue_(toCleanupAttQueue)
55  , maxSendBatch_(config_.getExt<size_t>("maxSendBatch"))
56  , adPending_(false)
57  , seqAlertPending_(false)
58  , seqAlert_(nullptr)
59  , startSending_(false) {
60  if (maxMessageSize_ > mtu_) {
61  HMBDC_THROW(std::out_of_range, "mtu needs to >= " << maxMessageSize_);
62  }
63  if (maxMessageSize_ > TransportMessageHeader::maxPayloadSize()) {
64  HMBDC_THROW(std::out_of_range, "maxMessageSize_ needs to <=" << TransportMessageHeader::maxPayloadSize());
65  }
66 
67  memcpy(&srcEthAddr_, ether_aton(config_.getExt<std::string>("srcEthAddr").c_str())
68  , sizeof(srcEthAddr_));
69  memcpy(&dstEthAddr_, ether_aton(config_.getExt<std::string>("dstEthAddr").c_str())
70  , sizeof(dstEthAddr_));
71  getMacAddresses();
72 
73  struct nmreq baseNmd;
74  bzero(&baseNmd, sizeof(baseNmd));
75  baseNmd.nr_flags |= NR_ACCEPT_VNET_HDR;
76  config_(baseNmd.nr_tx_slots, "nmTxSlots");
77  config_(baseNmd.nr_rx_slots, "nmRxSlots");
78  config_(baseNmd.nr_tx_rings, "nmTxRings");
79  config_(baseNmd.nr_rx_rings, "nmRxRings");
80 
81  auto nmport = cfg.getExt<std::string>("netmapPort");
82  nmd_ = nm_open(nmport.c_str(), &baseNmd
83  , cfg.getExt<uint64_t>("nmOpenFlags"), NULL);
84  if (!nmd_) {
85  HMBDC_THROW(std::runtime_error, "cannot open " << nmport);
86  }
87  if (nmd_->first_tx_ring != nmd_->last_tx_ring) {
88  HMBDC_THROW(std::out_of_range
89  , "multiple tx rings exist on " << nmport
90  << ". use more specific netmapPort. for example: netmap::p2p1-2 ");
91  }
92  struct nmreq req;
93  memset(&req, 0, sizeof(req));
94  bcopy(nmd_->req.nr_name, req.nr_name, sizeof(req.nr_name));
95  req.nr_version = NETMAP_API;
96  req.nr_cmd = NETMAP_VNET_HDR_GET;
97  int err = ioctl(nmd_->fd, NIOCREGIF, &req);
98  if (err) {
99  HMBDC_THROW(std::runtime_error, "Unable to get virtio-net header length");
100  }
101  virtHeader_ = req.nr_arg1;
102  auto srcIpStr = config_.getExt<std::string>("srcIp");
103  if (srcIpStr == "tcpIfaceAddr") {
104  srcIpStr = hmbdc::comm::inet::getLocalIpMatchMask(config_.getExt<std::string>("tcpIfaceAddr"));
105  }
106  initializePacket(&precalculatedPacketHead_
107  , config_.getExt<uint16_t>("ttl")
108  , srcIpStr
109  , config_.getExt<std::string>("dstIp")
110  , srcEthAddr_
111  , dstEthAddr_
112  , config_.getExt<uint16_t>("srcPort")
113  , config_.getExt<uint16_t>("dstPort")
114  );
115  sleep(config_.getExt<int>("nmResetWaitSec"));
116  //cleanup rings
117  if (hmbdc_unlikely(ioctl(nmd_->fd, NIOCTXSYNC, NULL) < 0)) {
118  HMBDC_THROW(std::runtime_error, "IO error");
119  }
120  struct netmap_ring * txring = NETMAP_TXRING(nmd_->nifp, nmd_->first_tx_ring);
121  txring->head = txring->cur = txring->tail;
122 
123  auto addr = seqAlertBuf_;
124  auto h = new (addr) TransportMessageHeader;
125  new (addr + sizeof(TransportMessageHeader)) app::MessageWrap<SeqAlert>();
126  h->messagePayloadLen = sizeof(app::MessageWrap<SeqAlert>);
127  h->setSeq(std::numeric_limits<HMBDC_SEQ_TYPE>::max());
128  seqAlert_ = &(h->wrapped<SeqAlert>());
129  }
130 
131  ~NmSendTransport(){
132  nm_close(nmd_);
133  }
134 
135  void startSend(){
136  startSending_ = true;
137  }
138 
139  template <typename AdvertisingMessages>
140  void setAds(AdvertisingMessages const& ads) {
141  decltype(adBufs_) newAdBufs;
142  for (auto const& ad : ads) {
143  newAdBufs.emplace_back();
144  auto addr = newAdBufs.rbegin()->data();
145  auto h = new (addr) TransportMessageHeader;
146  new (addr + sizeof(TransportMessageHeader))
148  h->messagePayloadLen = sizeof(app::MessageWrap<TypeTagBackupSource>);
149  h->setSeq(std::numeric_limits<HMBDC_SEQ_TYPE>::max());
150  }
151  std::swap(adBufs_, newAdBufs);
152  }
153 
154  void setAdPending() {
155  adPending_ = true;
156  }
157 
158  void setSeqAlertPending() {
159  seqAlertPending_ = true;
160  }
161 
162  void runOnce(size_t sessionCount) HMBDC_RESTRICT {
163  struct netmap_ring * txring = NETMAP_TXRING(nmd_->nifp, nmd_->first_tx_ring);
164  if (!nm_ring_empty(txring)) {
165  sendPackets(txring, sessionCount);
166  }
167  if (hmbdc_unlikely(ioctl(nmd_->fd, NIOCTXSYNC, NULL) < 0)) {
168  HMBDC_THROW(std::runtime_error, "IO error");
169  }
170  }
171 
172 private:
173  struct nm_desc *nmd_;
174  int virtHeader_; //v hdr len
175  ether_addr srcEthAddr_;
176  ether_addr dstEthAddr_;
177  hmbdc::comm::eth::pkt precalculatedPacketHead_;
178  bool doChecksum_;
179 
181  size_t maxMessageSize_;
182 
183  Buffer& buffer_;
184  hmbdc::time::Rater& HMBDC_RESTRICT rater_;
185  ToCleanupAttQueue& HMBDC_RESTRICT toCleanupAttQueue_;
186  size_t maxSendBatch_;
187  std::vector<std::array<char, sizeof(TransportMessageHeader)
188  + sizeof(app::MessageWrap<TypeTagBackupSource>)>> adBufs_;
189  bool adPending_;
190  char seqAlertBuf_[sizeof(TransportMessageHeader) + sizeof(app::MessageWrap<SeqAlert>)];
191  bool seqAlertPending_;
192  SeqAlert* seqAlert_;
193  bool startSending_;
194 
195  void sendPackets(struct netmap_ring * HMBDC_RESTRICT ring, size_t sessionCount) HMBDC_RESTRICT {
196  uint32_t cur = ring->cur;
197  size_t slotRemaining = 0;
198  bool slotNotInited = true;
199  auto batch = maxSendBatch_;
200  hmbdc::comm::eth::pkt* currentPktPtr = nullptr;
201  size_t slotWireSize = 0;
202  struct netmap_slot *slot = &ring->slot[cur];
203  typename Buffer::iterator begin, end;
204  auto limit = maxSendBatch_ * ((ring->tail - ring->cur) % ring->num_slots);
205  buffer_.peek(0, begin, end, limit);
206  if (hmbdc_unlikely(startSending_ && !sessionCount)) {
207  buffer_.wasteAfterPeek(0u, end - begin);
208  begin = end;
209  }
210  auto it = begin;
211  for(;;) {
212  bool processBuffer = false;
213  TransportMessageHeader* th = nullptr;
214  size_t wireSize = 0;
215  if (hmbdc_unlikely(adPending_)) {
216  for (auto& adBuf : adBufs_) {
217  th = reinterpret_cast<TransportMessageHeader*>(adBuf.data());
218  wireSize += sizeof(adBuf);
219  }
220  adPending_ = false;
221  } else if (it != end && startSending_) {
222  th = reinterpret_cast<TransportMessageHeader*>(*it);
223  wireSize = th->wireSize();
224  processBuffer = true;
225  } else if (hmbdc_unlikely(seqAlertPending_)) {
226  if (begin == end) {
227  th = reinterpret_cast<TransportMessageHeader*>(seqAlertBuf_);
228  wireSize = sizeof(seqAlertBuf_);
229  }
230  seqAlertPending_ = false;
231  }
232  //else wireSize == 0
233  bool raterOk = wireSize && (!processBuffer || rater_.check(wireSize));
234  if (!raterOk) break;
235  char *p = NETMAP_BUF(ring, slot->buf_idx);
236  if (slotNotInited) {
237  slotRemaining = std::min((size_t)ring->nr_buf_size, mtu_);
238  auto headWireSize = (uint16_t)(
239  sizeof(ether_header) + sizeof(::ip) + sizeof(udphdr) + virtHeader_
240  );
241  memcpy(p, ((char*)&precalculatedPacketHead_) + sizeof(hmbdc::comm::eth::virt_header)
242  - virtHeader_, headWireSize);
243  currentPktPtr = (hmbdc::comm::eth::pkt*)(p + virtHeader_ - sizeof(hmbdc::comm::eth::virt_header));
244  slotRemaining -= headWireSize;
245  slotWireSize = headWireSize;
246  slotNotInited = false;
247  }
248  if (slotRemaining >= wireSize) {
249  if (hmbdc_likely(processBuffer)) {
250  if (hmbdc_unlikely(th->typeTag() == app::MemorySeg::typeTag)) {
251  auto l = (int)(wireSize - th->wireSizeMemorySeg());
252  memcpy(p + slotWireSize, th->wireBytes(), l);
253  memcpy(p + slotWireSize + l, th->wireBytesMemorySeg(), th->wireSizeMemorySeg());
254  } else {
255  if (hmbdc_unlikely(th->typeTag() == app::StartMemorySegTrain::typeTag)) {
256  auto& trainHead = th->template wrapped<app::StartMemorySegTrain>();
257  auto itActual = it; itActual.seq_ += trainHead.segCount + 1;
258  auto actual = static_cast<TransportMessageHeader*>(*itActual);
259  toCleanupAttQueue_.push_back(std::make_tuple(itActual.seq_
260  , &actual->template wrapped<app::hasMemoryAttachment>()
261  , trainHead.att.afterConsumedCleanupFunc));
262  }
263  memcpy(p + slotWireSize, th, (int)wireSize);
264  }
265 
266  seqAlert_->expectSeq = it.seq_ + 1;
267  ++it;
268  rater_.commit();
269  } else {
270  memcpy(p + slotWireSize, th, (int)wireSize);
271  }
272  slotRemaining -= wireSize;
273  slotWireSize += wireSize;
274  batch--;
275  } else {
276  batch = 0; //this batch is done
277  }
278  if (!batch || it == end) {
279  size_t wireSizeExcludingHead = slotWireSize
280  - (sizeof(ether_header) + sizeof(::ip) + sizeof(udphdr) + virtHeader_);
281  updatePacket(currentPktPtr, wireSizeExcludingHead, doChecksum_);
282  slot->len = slotWireSize;
283  cur = nm_ring_next(ring, cur);
284  slot = &ring->slot[cur];
285  slotWireSize = 0;
286  if (cur == ring->tail || !raterOk) break;
287  //new slot now
288  batch = maxSendBatch_;
289  slotNotInited = true;
290  }
291  if (it == end) break;
292  }
293 
294  if (slotWireSize) {
295  size_t wireSizeExcludingHead = slotWireSize
296  - (sizeof(ether_header) + sizeof(::ip) + sizeof(udphdr) + virtHeader_);
297  updatePacket(currentPktPtr, wireSizeExcludingHead, doChecksum_);
298  slot->len = slotWireSize;
299  cur = nm_ring_next(ring, cur);
300  }
301 
302  ring->head = ring->cur = cur;
303  buffer_.wasteAfterPeek(0u, it - begin);
304  }
305 
306  void getMacAddresses() {
307  auto nmport = config_.getExt<std::string>("netmapPort");
308 
309  if (strncmp(nmport.c_str(), "vale", 4) == 0) return;
310 
311  if (nmport.find_first_of(":") == std::string::npos) {
312  HMBDC_THROW(std::runtime_error
313  , "wrong netmapPort format " << nmport << " (examples: netmap:eth0, netmap:eth0-0)");
314  }
315  auto iface = nmport.substr(nmport.find_first_of(":"));
316  iface = iface.substr(1, iface.find_first_of("-^") - 1);
317 
318 
319  struct ifaddrs *ifaphead, *ifap;
320  int l = sizeof(ifap->ifa_name);
321 
322  if (getifaddrs(&ifaphead) != 0) {
323  HMBDC_THROW(std::runtime_error, "getifaddrs failed for" << iface);
324  }
325  for (ifap = ifaphead; ifap; ifap = ifap->ifa_next) {
326  struct sockaddr_ll *sll =
327  (struct sockaddr_ll *)ifap->ifa_addr;
328  uint8_t *mac;
329 
330  if (!sll || sll->sll_family != AF_PACKET)
331  continue;
332  if (strncmp(ifap->ifa_name, iface.c_str(), l) != 0)
333  continue;
334  mac = (uint8_t *)(sll->sll_addr);
335 
336  char srcEthAddrStr[20];
337  sprintf(srcEthAddrStr, "%02x:%02x:%02x:%02x:%02x:%02x",
338  mac[0], mac[1], mac[2],
339  mac[3], mac[4], mac[5]);
340  memcpy(&srcEthAddr_, ether_aton(srcEthAddrStr), sizeof(srcEthAddr_)); //6 bytes
341  break;
342  }
343  freeifaddrs(ifaphead);
344  if (!ifap) {
345  HMBDC_THROW(std::runtime_error, "no local interface named " << iface);
346  }
347  }
348  static
349  void initializePacket(hmbdc::comm::eth::pkt *pkt, int ttl, std::string srcIpStr, std::string dstIpStr
350  , ether_addr srcEthAddr, ether_addr dstEthAddr, uint16_t srcPort, uint16_t dstPort) {
351  struct ether_header *eh;
352  struct ip *ip;
353  struct udphdr *udp;
354  uint32_t a, b, c, d;
355  sscanf(srcIpStr.c_str(), "%d.%d.%d.%d", &a, &b, &c, &d);
356  auto srcIp = (a << 24u) + (b << 16u) + (c << 8u) + d;
357  sscanf(dstIpStr.c_str(), "%d.%d.%d.%d", &a, &b, &c, &d);
358  auto dstIp = (a << 24u) + (b << 16u) + (c << 8u) + d;
359 
360  /* prepare the headers */
361  eh = &pkt->eh;
362  bcopy(&srcEthAddr, eh->ether_shost, 6);
363  bcopy(&dstEthAddr, eh->ether_dhost, 6);
364 
365  eh->ether_type = htons(ETHERTYPE_IP);
366 
367 #pragma GCC diagnostic push
368 #if defined __clang__ || __GNUC_PREREQ(9,0)
369 #pragma GCC diagnostic ignored "-Waddress-of-packed-member"
370 #endif
371  ip = &pkt->ipv4.ip;
372  udp = &pkt->ipv4.udp;
373  ip->ip_v = IPVERSION;
374  ip->ip_hl = sizeof(*ip) >> 2;
375  ip->ip_id = 0;
376  ip->ip_tos = IPTOS_LOWDELAY;
377  ip->ip_len = 0; //zero so chksum can happen in ip_sum
378  ip->ip_id = 0;
379  ip->ip_off = htons(IP_DF); /* Don't fragment */
380  ip->ip_ttl = ttl;
381  ip->ip_p = IPPROTO_UDP;
382  ip->ip_dst.s_addr = htonl(dstIp);
383  ip->ip_src.s_addr = htonl(srcIp);
384  ip->ip_sum = 0;
385  ip->ip_len = sizeof(*ip) + sizeof(udphdr); //ip->ip_len is unknown, put known part
386  udp->source = htons(srcPort);
387  udp->dest = htons(dstPort);
388  udp->len = sizeof(udphdr); //put known part
389  udp->check = 0;
390 
391  bzero(&pkt->vh, sizeof(pkt->vh));
392  }
393 
394  static
395  void updatePacket(hmbdc::comm::eth::pkt *packet, size_t payloadWireSize, bool doChecksum = true) {
396  packet->ipv4.ip.ip_len += payloadWireSize; //already has sizeof(ip) + sizeof(udphdr);
397  packet->ipv4.ip.ip_len = ntohs(packet->ipv4.ip.ip_len);
398  if (doChecksum) {
399  packet->ipv4.ip.ip_sum = hmbdc::comm::eth::wrapsum(
400  hmbdc::comm::eth::checksum(&packet->ipv4.ip, sizeof(packet->ipv4.ip), 0));
401  }
402 
403  packet->ipv4.udp.len += payloadWireSize;
404  packet->ipv4.udp.len = htons(packet->ipv4.udp.len);
405  if (doChecksum) {
406  auto udp = &packet->ipv4.udp;
407  packet->ipv4.udp.check = hmbdc::comm::eth::wrapsum(
408  hmbdc::comm::eth::checksum(udp, sizeof(*udp), /* udp header */
409  hmbdc::comm::eth::checksum(packet->ipv4.body, payloadWireSize, /* udp payload */
410  hmbdc::comm::eth::checksum(&packet->ipv4.ip.ip_src, 2 * sizeof(packet->ipv4.ip.ip_src), /* pseudo header */
411  IPPROTO_UDP + (u_int32_t)ntohs(udp->len)))));
412  }
413  }
414 };
415 
416 #pragma GCC diagnostic pop
417 } //nmsendtransport_detail
419 }}}
T getExt(const path_type &param, bool throwIfMissing=true) const
get a value from the config
Definition: Config.hpp:238
Definition: Messages.hpp:185
class to hold an hmbdc configuration
Definition: Config.hpp:44
Definition: Misc.h:55
Definition: Misc.h:51
Definition: Message.hpp:263
Definition: Rater.hpp:11
Definition: Base.hpp:12
Definition: LockFreeBufferMisc.hpp:89