hmbdc
simplify-high-performance-messaging-programming
NmRecvTransport.hpp
1 #include "hmbdc/Copyright.hpp"
2 #pragma once
3 #include "hmbdc/tips/rnetmap/Transport.hpp"
4 #include "hmbdc/app/Logger.hpp"
5 #include "hmbdc/comm/inet/Misc.hpp"
6 #include "hmbdc/MetaUtils.hpp"
7 
8 #include <boost/bind.hpp>
9 #include <boost/lexical_cast.hpp>
10 
11 #include <memory>
12 #include <type_traits>
13 
14 namespace hmbdc { namespace tips { namespace rnetmap {
15 
16 namespace nmrecvtransport_detail {
17 /**
18  * @brief impl class
19  *
20  * @tparam OutputBuffer type of buffer to hold resulting network messages
21  * @tparam MsgArbitrator arbitrator to decide drop or keep messages, suited to arbitrate
22  * between different recv transport. By default, keeping all
23  */
24 template <typename OutputBuffer, typename Ep2SessionDict>
26 : Transport {
27  using SELF = NmRecvTransport;
28 
31  , TypeTagSet const& subscriptions
32  , Ep2SessionDict& sessionDict)
33  : Transport(cfg)
34  , doChecksum_(config_.getExt<bool>("doChecksum"))
35  , busyWait_(true)
36  , pollWaitTimeMillisec_(0)
37  , data_(nullptr)
38  , cmdBuffer_(cmdBuffer)
39  , bufCur_(nullptr)
40  , subscriptions_(subscriptions)
41  , sessionDict_(sessionDict) {
42  auto nmport = config_.getExt<std::string>("netmapPort");
43  busyWait_ = config_.getExt<bool>("busyWait");
44  if (!busyWait_) pollWaitTimeMillisec_ = config_.getExt<int>("pollWaitTimeMillisec");
45 
46  struct nmreq baseNmd;
47  bzero(&baseNmd, sizeof(baseNmd));
48  baseNmd.nr_flags |= NR_ACCEPT_VNET_HDR;
49  config_(baseNmd.nr_tx_slots, "nmTxSlots");
50  config_(baseNmd.nr_rx_slots, "nmRxSlots");
51  config_(baseNmd.nr_tx_rings, "nmTxRings");
52  config_(baseNmd.nr_rx_rings, "nmRxRings");
53 
54  nmd_ = nm_open(nmport.c_str(), &baseNmd, config_.getExt<int>("nmOpenFlags"), NULL);
55  if (!nmd_) {
56  HMBDC_THROW(std::runtime_error, "cannot open " << nmport);
57  }
58 
59  struct nmreq req;
60  memset(&req, 0, sizeof(req));
61  bcopy(nmd_->req.nr_name, req.nr_name, sizeof(req.nr_name));
62  req.nr_version = NETMAP_API;
63  req.nr_cmd = NETMAP_VNET_HDR_GET;
64  int err = ioctl(nmd_->fd, NIOCREGIF, &req);
65  if (err) {
66  HMBDC_THROW(std::runtime_error, "Unable to get virtio-net header length");
67  }
68  virtHeader_ = req.nr_arg1;
69 
70  //setting up poll - might be useful or not
71  pfd_.fd = nmd_->fd;
72  pfd_.events = POLLIN;
73  sleep(config_.getExt<int>("nmResetWaitSec"));
74  //cleanup rings
75  if (hmbdc_unlikely(ioctl(nmd_->fd, NIOCRXSYNC, NULL) < 0)) {
76  HMBDC_THROW(std::runtime_error, "IO error");
77  }
78  for (int i = nmd_->first_rx_ring; i <= nmd_->last_rx_ring; i++) {
79  struct netmap_ring * rxring = NETMAP_TXRING(nmd_->nifp, i);
80  if (nm_ring_empty(rxring))
81  continue;
82  rxring->head = rxring->cur = rxring->tail;
83  }
84  }
85 
86  ~NmRecvTransport(){
87  nm_close(nmd_);
88  }
89 
90  void runOnce() HMBDC_RESTRICT {
91  syncNetmap();
92  for (int i = nmd_->first_rx_ring; i <= nmd_->last_rx_ring; i++) {
93  struct netmap_ring * rxring = NETMAP_RXRING(nmd_->nifp, i);
94  if (nm_ring_empty(rxring))
95  continue;
96 
97  recvPackets(rxring);
98  }
99  }
100 
101 private:
102  /**
103  * @brief sync using busy wait or poll depending on config
104  * @details it turns out busy wait performance is very poor when using vale
105  * poll works mostly, but it works well only when an enough timeout is given
106  * less than 10 milli wont work well
107  */
108  void syncNetmap() HMBDC_RESTRICT {
109  if (hmbdc_likely(busyWait_)) {
110  if (hmbdc_unlikely(ioctl(nmd_->fd, NIOCRXSYNC, NULL) < 0)) {
111  HMBDC_THROW(std::runtime_error, "IO error");
112  } else {
113  return;
114  }
115  } else {
116  auto res = poll(&pfd_, 1, pollWaitTimeMillisec_);
117  if (hmbdc_unlikely( res < 0)) {
118  HMBDC_THROW(std::runtime_error, "IO error errno=" << errno);
119  } else {
120  return;
121  }
122  }
123  }
124 
125  void recvPackets(struct netmap_ring * HMBDC_RESTRICT ring) HMBDC_RESTRICT {
126  auto cur = ring->cur;
127 
128  while(cur != ring->tail) {
129  struct netmap_slot *slot = &ring->slot[cur];
130  auto *buf = (uint8_t*)NETMAP_BUF(ring, slot->buf_idx);
131  auto* p = reinterpret_cast<hmbdc::comm::eth::pkt*>(
132  buf + virtHeader_ - sizeof(hmbdc::comm::eth::virt_header));
133 
134  if (p->ipv4.ip.ip_p == IPPROTO_UDP
135  && p->ipv4.ip.ip_off == htons(IP_DF)) {
136  if (!data_) {
137  data_ = (uint8_t*)p->ipv4.body;
138 #pragma GCC diagnostic push
139 #if defined __clang__ || __GNUC_PREREQ(9,0)
140 #pragma GCC diagnostic ignored "-Waddress-of-packed-member"
141 #endif
142  auto ip = &p->ipv4.ip;
143  senderEndpoint_ = ip->ip_src.s_addr;
144  }
145  while (data_ + sizeof(TransportMessageHeader) < buf + slot->len) {
146  auto header = reinterpret_cast<TransportMessageHeader*>(data_);
147  // HMBDC_LOG_DEBUG(header->typeTag());
148  // HMBDC_LOG_DEBUG(seq);
149  if (data_ + header->wireSize() <= buf + slot->len) {
150  if (hmbdc_unlikely(header->typeTag() == TypeTagBackupSource::typeTag)) {
151  auto it = cmdBuffer_.claim();
152  char* b = static_cast<char*>(*it);
153  size_t l = header->messagePayloadLen;
154  l = std::min(cmdBuffer_.maxItemSize(), l);
155  memcpy(b, header->payload(), l);
156  auto& bts = reinterpret_cast<app::MessageWrap<TypeTagBackupSource>*>(b)->payload;
157  bts.sendFrom = senderEndpoint_;
158  cmdBuffer_.commit(it);
159  } else {
160  auto session = sessionDict_.find(senderEndpoint_);
161  if (hmbdc_unlikely(session != sessionDict_.end())) {
162  auto a = session->second->accept(header);
163  if (a == 0) {
164  return; //wait for backup
165  }
166  }
167  } //else drop and move to next msg
168  data_ += header->wireSize();
169  } else {
170  break;
171  }
172  }
173  }
174  data_ = nullptr;
175  cur = nm_ring_next(ring, cur);
176  }
177 
178  ring->head = ring->cur = cur;
179  }
180 
181  static
182  bool verifyChecksum(hmbdc::comm::eth::pkt* HMBDC_RESTRICT packet, size_t payloadWireSize) {
183  {
184  auto tmp = packet->ipv4.ip.ip_sum;
185  packet->ipv4.ip.ip_sum = 0;
186  if (tmp != hmbdc::comm::eth::wrapsum(
187  hmbdc::comm::eth::checksum(&packet->ipv4.ip, sizeof(packet->ipv4.ip), 0))) {
188  return false;
189  }
190  packet->ipv4.ip.ip_sum = tmp;
191  }
192 
193  {
194  auto tmp = packet->ipv4.udp.check;
195  packet->ipv4.udp.check = 0;
196 
197  auto udp = &packet->ipv4.udp;
198  if (tmp != hmbdc::comm::eth::wrapsum(
199  hmbdc::comm::eth::checksum(udp, sizeof(*udp), /* udp header */
200  hmbdc::comm::eth::checksum(packet->ipv4.body, payloadWireSize, /* udp payload */
201  hmbdc::comm::eth::checksum(&packet->ipv4.ip.ip_src, 2 * sizeof(packet->ipv4.ip.ip_src), /* pseudo header */
202  IPPROTO_UDP + (u_int32_t)ntohs(udp->len)))))) {
203  return false;
204  }
205 #pragma GCC diagnostic pop
206  packet->ipv4.udp.check = tmp;
207  return true;
208  }
209  }
210 
211  struct nm_desc *nmd_;
212  int virtHeader_; //v hdr len
213  bool doChecksum_;
214  struct pollfd pfd_;
215  bool busyWait_;
216  int pollWaitTimeMillisec_;
217  uint8_t* data_;
218 
220  size_t maxItemSize_;
221  uint32_t senderEndpoint_;
222  char buf_[4*1024];
223  char* bufCur_;
224  TypeTagSet const& subscriptions_;
225  Ep2SessionDict& sessionDict_;
226 };
227 
228 } //nmrecvtransport_detail
229 template <typename OutputBuffer, typename Ep2SessionDict>
230 using NmRecvTransport = nmrecvtransport_detail::NmRecvTransport<OutputBuffer, Ep2SessionDict>;
231 }}}
T getExt(const path_type &param, bool throwIfMissing=true) const
get a value from the config
Definition: Config.hpp:238
Definition: MonoLockFreeBuffer.hpp:16
class to hold an hmbdc configuration
Definition: Config.hpp:44
Definition: Misc.h:55
impl class
Definition: NmRecvTransport.hpp:25
Definition: Misc.h:51
void syncNetmap() HMBDC_RESTRICT
sync using busy wait or poll depending on config
Definition: NmRecvTransport.hpp:108
Definition: TypeTagSet.hpp:138
Definition: Message.hpp:263
Definition: Base.hpp:12