2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 #define DPDK_NB_RX_DESC_DEFAULT 512
17 #define DPDK_NB_TX_DESC_DEFAULT 512
18 #define DPDK_NB_RX_DESC_VIRTIO 256
19 #define DPDK_NB_TX_DESC_VIRTIO 256
20 #define DPDK_NB_RX_DESC_10GE 2048
21 #define DPDK_NB_TX_DESC_10GE 2048
22 #define DPDK_NB_RX_DESC_40GE (4096-128)
23 #define DPDK_NB_TX_DESC_40GE 2048
25 #if RTE_VERSION >= RTE_VERSION_NUM(2, 2, 0, 0)
26 #define foreach_dpdk_counter \
27 _ (tx_frames_ok, opackets) \
28 _ (tx_bytes_ok, obytes) \
29 _ (tx_errors, oerrors) \
30 _ (tx_loopback_frames_ok, olbpackets) \
31 _ (tx_loopback_bytes_ok, olbbytes) \
32 _ (rx_frames_ok, ipackets) \
33 _ (rx_bytes_ok, ibytes) \
34 _ (rx_errors, ierrors) \
35 _ (rx_missed, imissed) \
36 _ (rx_multicast_frames_ok, imcasts) \
37 _ (rx_no_bufs, rx_nombuf) \
38 _ (rx_loopback_frames_ok, ilbpackets) \
39 _ (rx_loopback_bytes_ok, ilbbytes)
41 #define foreach_dpdk_counter \
42 _ (tx_frames_ok, opackets) \
43 _ (tx_bytes_ok, obytes) \
44 _ (tx_errors, oerrors) \
45 _ (tx_loopback_frames_ok, olbpackets) \
46 _ (tx_loopback_bytes_ok, olbbytes) \
47 _ (rx_frames_ok, ipackets) \
48 _ (rx_bytes_ok, ibytes) \
49 _ (rx_errors, ierrors) \
50 _ (rx_missed, imissed) \
51 _ (rx_bad_crc, ibadcrc) \
52 _ (rx_bad_length, ibadlen) \
53 _ (rx_multicast_frames_ok, imcasts) \
54 _ (rx_no_bufs, rx_nombuf) \
55 _ (rx_filter_match, fdirmatch) \
56 _ (rx_filter_miss, fdirmiss) \
57 _ (tx_pause_xon, tx_pause_xon) \
58 _ (rx_pause_xon, rx_pause_xon) \
59 _ (tx_pause_xoff, tx_pause_xoff) \
60 _ (rx_pause_xoff, rx_pause_xoff) \
61 _ (rx_loopback_frames_ok, ilbpackets) \
62 _ (rx_loopback_bytes_ok, ilbbytes)
65 #define foreach_dpdk_q_counter \
66 _ (rx_frames_ok, q_ipackets) \
67 _ (tx_frames_ok, q_opackets) \
68 _ (rx_bytes_ok, q_ibytes) \
69 _ (tx_bytes_ok, q_obytes) \
70 _ (rx_errors, q_errors)
72 #define foreach_dpdk_rss_hf \
73 _(ETH_RSS_IPV4, "ipv4") \
74 _(ETH_RSS_FRAG_IPV4, "ipv4-frag") \
75 _(ETH_RSS_NONFRAG_IPV4_TCP, "ipv4-tcp") \
76 _(ETH_RSS_NONFRAG_IPV4_UDP, "ipv4-udp") \
77 _(ETH_RSS_NONFRAG_IPV4_SCTP, "ipv4-sctp") \
78 _(ETH_RSS_NONFRAG_IPV4_OTHER, "ipv4-other") \
79 _(ETH_RSS_IPV6, "ipv6") \
80 _(ETH_RSS_FRAG_IPV6, "ipv6-frag") \
81 _(ETH_RSS_NONFRAG_IPV6_TCP, "ipv6-tcp") \
82 _(ETH_RSS_NONFRAG_IPV6_UDP, "ipv6-udp") \
83 _(ETH_RSS_NONFRAG_IPV6_SCTP, "ipv6-sctp") \
84 _(ETH_RSS_NONFRAG_IPV6_OTHER, "ipv6-other") \
85 _(ETH_RSS_L2_PAYLOAD, "l2-payload") \
86 _(ETH_RSS_IPV6_EX, "ipv6-ex") \
87 _(ETH_RSS_IPV6_TCP_EX, "ipv6-tcp-ex") \
88 _(ETH_RSS_IPV6_UDP_EX, "ipv6-udp-ex")
90 #define foreach_dpdk_rx_offload_caps \
91 _(DEV_RX_OFFLOAD_VLAN_STRIP, "vlan-strip") \
92 _(DEV_RX_OFFLOAD_IPV4_CKSUM, "ipv4-cksum") \
93 _(DEV_RX_OFFLOAD_UDP_CKSUM , "udp-cksum") \
94 _(DEV_RX_OFFLOAD_TCP_CKSUM , "tcp-cksum") \
95 _(DEV_RX_OFFLOAD_TCP_LRO , "rcp-lro") \
96 _(DEV_RX_OFFLOAD_QINQ_STRIP, "qinq-strip")
98 #define foreach_dpdk_tx_offload_caps \
99 _(DEV_TX_OFFLOAD_VLAN_INSERT, "vlan-insert") \
100 _(DEV_TX_OFFLOAD_IPV4_CKSUM, "ipv4-cksum") \
101 _(DEV_TX_OFFLOAD_UDP_CKSUM , "udp-cksum") \
102 _(DEV_TX_OFFLOAD_TCP_CKSUM , "tcp-cksum") \
103 _(DEV_TX_OFFLOAD_SCTP_CKSUM , "sctp-cksum") \
104 _(DEV_TX_OFFLOAD_TCP_TSO , "tcp-tso") \
105 _(DEV_TX_OFFLOAD_UDP_TSO , "udp-tso") \
106 _(DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM, "outer-ipv4-cksum") \
107 _(DEV_TX_OFFLOAD_QINQ_INSERT, "qinq-insert")
109 #if RTE_VERSION >= RTE_VERSION_NUM(2, 1, 0, 0)
111 #define foreach_dpdk_pkt_rx_offload_flag \
112 _ (PKT_RX_VLAN_PKT, "RX packet is a 802.1q VLAN packet") \
113 _ (PKT_RX_RSS_HASH, "RX packet with RSS hash result") \
114 _ (PKT_RX_FDIR, "RX packet with FDIR infos") \
115 _ (PKT_RX_L4_CKSUM_BAD, "L4 cksum of RX pkt. is not OK") \
116 _ (PKT_RX_IP_CKSUM_BAD, "IP cksum of RX pkt. is not OK") \
117 _ (PKT_RX_IEEE1588_PTP, "RX IEEE1588 L2 Ethernet PT Packet") \
118 _ (PKT_RX_IEEE1588_TMST, "RX IEEE1588 L2/L4 timestamped packet")
120 #define foreach_dpdk_pkt_type \
121 _ (L2, ETHER, "Ethernet packet") \
122 _ (L2, ETHER_TIMESYNC, "Ethernet packet for time sync") \
123 _ (L2, ETHER_ARP, "ARP packet") \
124 _ (L2, ETHER_LLDP, "LLDP (Link Layer Discovery Protocol) packet") \
125 _ (L3, IPV4, "IPv4 packet without extension headers") \
126 _ (L3, IPV4_EXT, "IPv4 packet with extension headers") \
127 _ (L3, IPV4_EXT_UNKNOWN, "IPv4 packet with or without extension headers") \
128 _ (L3, IPV6, "IPv6 packet without extension headers") \
129 _ (L3, IPV6_EXT, "IPv6 packet with extension headers") \
130 _ (L3, IPV6_EXT_UNKNOWN, "IPv6 packet with or without extension headers") \
131 _ (L4, TCP, "TCP packet") \
132 _ (L4, UDP, "UDP packet") \
133 _ (L4, FRAG, "Fragmented IP packet") \
134 _ (L4, SCTP, "SCTP (Stream Control Transmission Protocol) packet") \
135 _ (L4, ICMP, "ICMP packet") \
136 _ (L4, NONFRAG, "Non-fragmented IP packet") \
137 _ (TUNNEL, GRE, "GRE tunneling packet") \
138 _ (TUNNEL, VXLAN, "VXLAN tunneling packet") \
139 _ (TUNNEL, NVGRE, "NVGRE Tunneling packet") \
140 _ (TUNNEL, GENEVE, "GENEVE Tunneling packet") \
141 _ (TUNNEL, GRENAT, "Teredo, VXLAN or GRE Tunneling packet") \
142 _ (INNER_L2, ETHER, "Inner Ethernet packet") \
143 _ (INNER_L2, ETHER_VLAN, "Inner Ethernet packet with VLAN") \
144 _ (INNER_L3, IPV4, "Inner IPv4 packet without extension headers") \
145 _ (INNER_L3, IPV4_EXT, "Inner IPv4 packet with extension headers") \
146 _ (INNER_L3, IPV4_EXT_UNKNOWN, "Inner IPv4 packet with or without extension headers") \
147 _ (INNER_L3, IPV6, "Inner IPv6 packet without extension headers") \
148 _ (INNER_L3, IPV6_EXT, "Inner IPv6 packet with extension headers") \
149 _ (INNER_L3, IPV6_EXT_UNKNOWN, "Inner IPv6 packet with or without extension headers") \
150 _ (INNER_L4, TCP, "Inner TCP packet") \
151 _ (INNER_L4, UDP, "Inner UDP packet") \
152 _ (INNER_L4, FRAG, "Inner fagmented IP packet") \
153 _ (INNER_L4, SCTP, "Inner SCTP (Stream Control Transmission Protocol) packet") \
154 _ (INNER_L4, ICMP, "Inner ICMP packet") \
155 _ (INNER_L4, NONFRAG, "Inner non-fragmented IP packet")
158 #define foreach_dpdk_pkt_rx_offload_flag \
159 _ (PKT_RX_VLAN_PKT, "RX packet is a 802.1q VLAN packet") \
160 _ (PKT_RX_RSS_HASH, "RX packet with RSS hash result") \
161 _ (PKT_RX_FDIR, "RX packet with FDIR infos") \
162 _ (PKT_RX_L4_CKSUM_BAD, "L4 cksum of RX pkt. is not OK") \
163 _ (PKT_RX_IP_CKSUM_BAD, "IP cksum of RX pkt. is not OK") \
164 _ (PKT_RX_IPV4_HDR, "RX packet with IPv4 header") \
165 _ (PKT_RX_IPV4_HDR_EXT, "RX packet with extended IPv4 header") \
166 _ (PKT_RX_IPV6_HDR, "RX packet with IPv6 header") \
167 _ (PKT_RX_IPV6_HDR_EXT, "RX packet with extended IPv6 header") \
168 _ (PKT_RX_IEEE1588_PTP, "RX IEEE1588 L2 Ethernet PT Packet") \
169 _ (PKT_RX_IEEE1588_TMST, "RX IEEE1588 L2/L4 timestamped packet")
171 #define foreach_dpdk_pkt_type /* Dummy */
172 #endif /* RTE_VERSION */
174 #define foreach_dpdk_pkt_tx_offload_flag \
175 _ (PKT_TX_VLAN_PKT, "TX packet is a 802.1q VLAN packet") \
176 _ (PKT_TX_IP_CKSUM, "IP cksum of TX pkt. computed by NIC") \
177 _ (PKT_TX_TCP_CKSUM, "TCP cksum of TX pkt. computed by NIC") \
178 _ (PKT_TX_SCTP_CKSUM, "SCTP cksum of TX pkt. computed by NIC") \
179 _ (PKT_TX_IEEE1588_TMST, "TX IEEE1588 packet to timestamp")
181 #define foreach_dpdk_pkt_offload_flag \
182 foreach_dpdk_pkt_rx_offload_flag \
183 foreach_dpdk_pkt_tx_offload_flag
185 static inline u8 * format_dpdk_pkt_types (u8 * s, va_list * va)
187 u32 *pkt_types = va_arg (*va, u32 *);
188 uword indent __attribute__((unused)) = format_get_indent (s) + 2;
193 s = format (s, "Packet Types");
196 if ((*pkt_types & RTE_PTYPE_##L##_MASK) == RTE_PTYPE_##L##_##F) \
198 s = format (s, "\n%U%s (0x%04x) %s", format_white_space, indent, \
199 "RTE_PTYPE_" #L "_" #F, RTE_PTYPE_##L##_##F, S); \
202 foreach_dpdk_pkt_type
209 static inline u8 * format_dpdk_pkt_offload_flags (u8 * s, va_list * va)
211 u16 *ol_flags = va_arg (*va, u16 *);
212 uword indent = format_get_indent (s) + 2;
217 s = format (s, "Packet Offload Flags");
222 s = format (s, "\n%U%s (0x%04x) %s", \
223 format_white_space, indent, #F, F, S); \
226 foreach_dpdk_pkt_offload_flag
233 static inline u8 * format_dpdk_rte_mbuf (u8 * s, va_list * va)
235 struct rte_mbuf * mb = va_arg (*va, struct rte_mbuf *);
236 uword indent = format_get_indent (s) + 2;
238 s = format (s, "PKT MBUF: port %d, nb_segs %d, pkt_len %d"
239 "\n%Ubuf_len %d, data_len %d, ol_flags 0x%x,"
240 "\n%Upacket_type 0x%x",
241 mb->port, mb->nb_segs, mb->pkt_len,
242 format_white_space, indent,
243 mb->buf_len, mb->data_len, mb->ol_flags,
244 format_white_space, indent,
248 s = format (s, "\n%U%U", format_white_space, indent,
249 format_dpdk_pkt_offload_flags, &mb->ol_flags);
252 s = format (s, "\n%U%U", format_white_space, indent,
253 format_dpdk_pkt_types, &mb->packet_type);
257 #ifdef RTE_LIBRTE_MBUF_EXT_RX_OLFLAGS
258 #define foreach_dpdk_pkt_ext_rx_offload_flag \
259 _ (PKT_EXT_RX_PKT_ERROR, "RX Packet Error") \
260 _ (PKT_EXT_RX_BAD_FCS, "RX Bad FCS checksum") \
261 _ (PKT_EXT_RX_UDP, "RX packet with UDP L4 header") \
262 _ (PKT_EXT_RX_TCP, "RX packet with TCP L4 header") \
263 _ (PKT_EXT_RX_IPV4_FRAGMENT, "RX packet IPv4 Fragment")
265 #define foreach_dpdk_pkt_ext_offload_flag \
266 foreach_dpdk_pkt_rx_offload_flag \
267 foreach_dpdk_pkt_ext_rx_offload_flag
269 static inline u8 * format_dpdk_pkt_rx_offload_flags (u8 * s, va_list * va)
271 u16 *ol_flags = va_arg (*va, u16 *);
272 uword indent = format_get_indent (s) + 2;
277 s = format (s, "Packet RX Offload Flags");
282 s = format (s, "\n%U%s (0x%04x) %s", \
283 format_white_space, indent, #F, F, S); \
286 foreach_dpdk_pkt_ext_offload_flag
293 static inline u8 * format_dpdk_rx_rte_mbuf (u8 * s, va_list * va)
295 struct rte_mbuf * mb = va_arg (*va, struct rte_mbuf *);
296 uword indent = format_get_indent (s) + 2;
299 * Note: Assumes mb is head of pkt chain -- port, nb_segs, & pkt_len
300 * are only valid for the 1st mbuf segment.
302 s = format (s, "PKT MBUF: port %d, nb_segs %d, pkt_len %d"
303 "\n%Ubuf_len %d, data_len %d, ol_flags 0x%x"
304 "\n%Upacket_type 0x%x",
305 mb->port, mb->nb_segs, mb->pkt_len,
306 format_white_space, indent,
307 mb->buf_len, mb->data_len, mb->ol_flags,
308 format_white_space, indent,
312 s = format (s, "\n%U%U", format_white_space, indent,
313 format_dpdk_pkt_rx_offload_flags, &mb->ol_flags);
316 s = format (s, "\n%U%U", format_white_space, indent,
317 format_dpdk_pkt_types, &mb->packet_type);
320 #endif /* RTE_LIBRTE_MBUF_EXT_RX_OLFLAGS */
322 /* These args appear by themselves */
323 #define foreach_eal_double_hyphen_predicate_arg \
331 #define foreach_eal_single_hyphen_mandatory_arg \
335 #define foreach_eal_single_hyphen_arg \
337 _(mem-alloc-request, m) \
340 /* These args are preceeded by "--" and followed by a single string */
341 #define foreach_eal_double_hyphen_arg \
349 dpdk_rx_burst ( dpdk_main_t * dm, dpdk_device_t * xd, u16 queue_id)
355 n_left = VLIB_FRAME_SIZE;
358 if (PREDICT_TRUE(xd->dev_type == VNET_DPDK_DEV_ETH))
362 n_this_chunk = rte_eth_rx_burst (xd->device_index, queue_id,
363 xd->rx_vectors[queue_id] + n_buffers, n_left);
364 n_buffers += n_this_chunk;
365 n_left -= n_this_chunk;
367 /* Empirically, DPDK r1.8 produces vectors w/ 32 or fewer elts */
368 if (n_this_chunk < 32)
372 else if (xd->dev_type == VNET_DPDK_DEV_VHOST_USER)
374 vlib_main_t * vm = vlib_get_main();
375 vlib_buffer_main_t * bm = vm->buffer_main;
376 unsigned socket_id = rte_socket_id();
379 #if RTE_VERSION >= RTE_VERSION_NUM(2, 2, 0, 0)
380 offset = queue_id * VIRTIO_QNUM;
382 struct vhost_virtqueue *vq =
383 xd->vu_vhost_dev.virtqueue[offset + VIRTIO_TXQ];
385 if (PREDICT_FALSE(!vq->enabled))
388 if (PREDICT_FALSE(!xd->vu_is_running))
392 n_buffers = rte_vhost_dequeue_burst(&xd->vu_vhost_dev, offset + VIRTIO_TXQ,
393 bm->pktmbuf_pools[socket_id],
394 xd->rx_vectors[queue_id], VLIB_FRAME_SIZE);
396 f64 now = vlib_time_now (vm);
398 /* send pending interrupts if needed */
399 if (dpdk_vhost_user_want_interrupt(xd, offset + VIRTIO_TXQ)) {
400 dpdk_vu_vring *vring = &(xd->vu_intf->vrings[offset + VIRTIO_TXQ]);
401 vring->n_since_last_int += n_buffers;
403 if ((vring->n_since_last_int && (vring->int_deadline < now))
404 || (vring->n_since_last_int > dm->vhost_coalesce_frames))
405 dpdk_vhost_user_send_interrupt(vm, xd, offset + VIRTIO_TXQ);
408 if (dpdk_vhost_user_want_interrupt(xd, offset + VIRTIO_RXQ)) {
409 dpdk_vu_vring *vring = &(xd->vu_intf->vrings[offset + VIRTIO_RXQ]);
410 if (vring->n_since_last_int && (vring->int_deadline < now))
411 dpdk_vhost_user_send_interrupt(vm, xd, offset + VIRTIO_RXQ);
415 #ifdef RTE_LIBRTE_KNI
416 else if (xd->dev_type == VNET_DPDK_DEV_KNI)
418 n_buffers = rte_kni_rx_burst(xd->kni, xd->rx_vectors[queue_id], VLIB_FRAME_SIZE);
419 rte_kni_handle_request(xd->kni);
432 dpdk_update_counters (dpdk_device_t * xd, f64 now)
434 vlib_simple_counter_main_t * cm;
435 vnet_main_t * vnm = vnet_get_main();
436 u32 my_cpu = os_get_cpu_number();
437 u64 rxerrors, last_rxerrors;
440 /* only update counters for PMD interfaces */
441 if (xd->dev_type != VNET_DPDK_DEV_ETH)
445 * DAW-FIXME: VMXNET3 device stop/start doesn't work,
446 * therefore fake the stop in the dpdk driver by
447 * silently dropping all of the incoming pkts instead of
448 * stopping the driver / hardware.
450 if (xd->admin_up != 0xff)
452 xd->time_last_stats_update = now ? now : xd->time_last_stats_update;
453 memcpy (&xd->last_stats, &xd->stats, sizeof (xd->last_stats));
454 rte_eth_stats_get (xd->device_index, &xd->stats);
456 /* maybe bump interface rx no buffer counter */
457 if (PREDICT_FALSE (xd->stats.rx_nombuf != xd->last_stats.rx_nombuf))
459 cm = vec_elt_at_index (vnm->interface_main.sw_if_counters,
460 VNET_INTERFACE_COUNTER_RX_NO_BUF);
462 vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index,
463 xd->stats.rx_nombuf -
464 xd->last_stats.rx_nombuf);
467 /* missed pkt counter */
468 if (PREDICT_FALSE (xd->stats.imissed != xd->last_stats.imissed))
470 cm = vec_elt_at_index (vnm->interface_main.sw_if_counters,
471 VNET_INTERFACE_COUNTER_RX_MISS);
473 vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index,
475 xd->last_stats.imissed);
477 #if RTE_VERSION >= RTE_VERSION_NUM(2, 2, 0, 0)
478 rxerrors = xd->stats.ierrors;
479 last_rxerrors = xd->last_stats.ierrors;
481 rxerrors = xd->stats.ibadcrc
482 + xd->stats.ibadlen + xd->stats.ierrors;
483 last_rxerrors = xd->last_stats.ibadcrc
484 + xd->last_stats.ibadlen + xd->last_stats.ierrors;
487 if (PREDICT_FALSE (rxerrors != last_rxerrors))
489 cm = vec_elt_at_index (vnm->interface_main.sw_if_counters,
490 VNET_INTERFACE_COUNTER_RX_ERROR);
492 vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index,
493 rxerrors - last_rxerrors);
497 if ((len = rte_eth_xstats_get(xd->device_index, NULL, 0)) > 0)
499 vec_validate(xd->xstats, len - 1);
500 len = rte_eth_xstats_get(xd->device_index, xd->xstats, vec_len(xd->xstats));
501 ASSERT(vec_len(xd->xstats) == len);
502 _vec_len(xd->xstats) = len;