#undef _
};
-static struct rte_mbuf * dpdk_replicate_packet_mb (vlib_buffer_t * b)
+clib_error_t *
+dpdk_set_mac_address (vnet_hw_interface_t * hi, char * address)
+{
+ int error;
+ dpdk_main_t * dm = &dpdk_main;
+ dpdk_device_t * xd = vec_elt_at_index (dm->devices, hi->dev_instance);
+
+ error=rte_eth_dev_default_mac_addr_set(xd->device_index,
+ (struct ether_addr *) address);
+
+ if (error) {
+ return clib_error_return (0, "mac address set failed: %d", error);
+ } else {
+ return NULL;
+ }
+}
+
+clib_error_t *
+dpdk_set_mc_filter (vnet_hw_interface_t * hi,
+ struct ether_addr mc_addr_vec[], int naddr)
+{
+ int error;
+ dpdk_main_t * dm = &dpdk_main;
+ dpdk_device_t * xd = vec_elt_at_index (dm->devices, hi->dev_instance);
+
+ error=rte_eth_dev_set_mc_addr_list(xd->device_index, mc_addr_vec, naddr);
+
+ if (error) {
+ return clib_error_return (0, "mc addr list failed: %d", error);
+ } else {
+ return NULL;
+ }
+}
+
+struct rte_mbuf * dpdk_replicate_packet_mb (vlib_buffer_t * b)
{
vlib_main_t * vm = vlib_get_main();
vlib_buffer_main_t * bm = vm->buffer_main;
unsigned socket_id = rte_socket_id();
ASSERT (bm->pktmbuf_pools[socket_id]);
- pkt_mb = ((struct rte_mbuf *)b)-1;
+ pkt_mb = rte_mbuf_from_vlib_buffer(b);
nb_segs = pkt_mb->nb_segs;
for (nb_segs_left = nb_segs; nb_segs_left; nb_segs_left--)
{
rte_pktmbuf_data_len (new_mb) = pkt_mb->data_len;
copy_bytes = pkt_mb->data_len + RTE_PKTMBUF_HEADROOM;
ASSERT(copy_bytes <= pkt_mb->buf_len);
- memcpy(new_mb->buf_addr, pkt_mb->buf_addr, copy_bytes);
+ clib_memcpy(new_mb->buf_addr, pkt_mb->buf_addr, copy_bytes);
prev_mb_next = &new_mb->next;
pkt_mb = pkt_mb->next;
return first_mb;
}
-typedef struct {
- u32 buffer_index;
- u16 device_index;
- u8 queue_index;
- struct rte_mbuf mb;
- /* Copy of VLIB buffer; packet data stored in pre_data. */
- vlib_buffer_t buffer;
-} dpdk_tx_dma_trace_t;
+struct rte_mbuf * dpdk_zerocopy_replicate_packet_mb (vlib_buffer_t * b)
+{
+ vlib_main_t * vm = vlib_get_main();
+ vlib_buffer_main_t * bm = vm->buffer_main;
+ struct rte_mbuf * first_mb = 0, * new_mb, * pkt_mb, ** prev_mb_next = 0;
+ u8 nb_segs, nb_segs_left;
+ unsigned socket_id = rte_socket_id();
+
+ ASSERT (bm->pktmbuf_pools[socket_id]);
+ pkt_mb = rte_mbuf_from_vlib_buffer(b);
+ nb_segs = pkt_mb->nb_segs;
+ for (nb_segs_left = nb_segs; nb_segs_left; nb_segs_left--)
+ {
+ if (PREDICT_FALSE(pkt_mb == 0))
+ {
+ clib_warning ("Missing %d mbuf chain segment(s): "
+ "(nb_segs = %d, nb_segs_left = %d)!",
+ nb_segs - nb_segs_left, nb_segs, nb_segs_left);
+ if (first_mb)
+ rte_pktmbuf_free(first_mb);
+ return NULL;
+ }
+ new_mb = rte_pktmbuf_clone(pkt_mb, bm->pktmbuf_pools[socket_id]);
+ if (PREDICT_FALSE(new_mb == 0))
+ {
+ if (first_mb)
+ rte_pktmbuf_free(first_mb);
+ return NULL;
+ }
+
+ /*
+ * Copy packet info into 1st segment.
+ */
+ if (first_mb == 0)
+ {
+ first_mb = new_mb;
+ rte_pktmbuf_pkt_len (first_mb) = pkt_mb->pkt_len;
+ first_mb->nb_segs = pkt_mb->nb_segs;
+ first_mb->port = pkt_mb->port;
+#ifdef DAW_FIXME // TX Offload support TBD
+ first_mb->vlan_macip = pkt_mb->vlan_macip;
+ first_mb->hash = pkt_mb->hash;
+ first_mb->ol_flags = pkt_mb->ol_flags
+#endif
+ }
+ else
+ {
+ ASSERT(prev_mb_next != 0);
+ *prev_mb_next = new_mb;
+ }
+
+ /*
+ * Copy packet segment data into new mbuf segment.
+ */
+ rte_pktmbuf_data_len (new_mb) = pkt_mb->data_len;
+
+ prev_mb_next = &new_mb->next;
+ pkt_mb = pkt_mb->next;
+ }
+
+ ASSERT(pkt_mb == 0);
+ __rte_mbuf_sanity_check(first_mb, 1);
+
+ return first_mb;
+
+
+}
static void
dpdk_tx_trace_buffer (dpdk_main_t * dm,
dpdk_tx_dma_trace_t * t0;
struct rte_mbuf * mb;
- mb = ((struct rte_mbuf *)buffer)-1;
+ mb = rte_mbuf_from_vlib_buffer(buffer);
t0 = vlib_add_trace (vm, node, buffer, sizeof (t0[0]));
t0->queue_index = queue_id;
t0->device_index = xd->device_index;
t0->buffer_index = buffer_index;
- memcpy (&t0->mb, mb, sizeof (t0->mb));
- memcpy (&t0->buffer, buffer, sizeof (buffer[0]) - sizeof (buffer->pre_data));
- memcpy (t0->buffer.pre_data, buffer->data + buffer->current_data,
+ clib_memcpy (&t0->mb, mb, sizeof (t0->mb));
+ clib_memcpy (&t0->buffer, buffer, sizeof (buffer[0]) - sizeof (buffer->pre_data));
+ clib_memcpy (t0->buffer.pre_data, buffer->data + buffer->current_data,
sizeof (t0->buffer.pre_data));
}
* This device only supports one TX queue,
* and we're running multi-threaded...
*/
- if (PREDICT_FALSE(xd->lockp != 0))
+ if (PREDICT_FALSE(xd->dev_type != VNET_DPDK_DEV_VHOST_USER &&
+ xd->lockp != 0))
{
queue_id = queue_id % xd->tx_q_used;
while (__sync_lock_test_and_set (xd->lockp[queue_id], 1))
else if (xd->dev_type == VNET_DPDK_DEV_VHOST_USER)
{
u32 offset = 0;
+ if (xd->need_txlock) {
+ queue_id = 0;
+ while (__sync_lock_test_and_set (xd->lockp[queue_id], 1));
+ }
#if RTE_VERSION >= RTE_VERSION_NUM(2, 2, 0, 0)
- if (PREDICT_TRUE(xd->lockp == NULL)) {
+ else {
dpdk_device_and_queue_t * dq;
vec_foreach (dq, dm->devices_by_cpu[vm->cpu_index])
{
}
assert (dq);
offset = dq->queue_id * VIRTIO_QNUM;
- } else {
- offset = queue_id * VIRTIO_QNUM;
}
#endif
if (PREDICT_TRUE(tx_head > tx_tail))
{
+ int i; u32 bytes = 0;
+ struct rte_mbuf **pkts = &tx_vector[tx_tail];
+ for (i = 0; i < (tx_head - tx_tail); i++) {
+ struct rte_mbuf *buff = pkts[i];
+ bytes += rte_pktmbuf_data_len(buff);
+ }
+
/* no wrap, transmit in one burst */
rv = rte_vhost_enqueue_burst(&xd->vu_vhost_dev, offset + VIRTIO_RXQ,
&tx_vector[tx_tail],
(uint16_t) (tx_head-tx_tail));
if (PREDICT_TRUE(rv > 0))
{
+ dpdk_vu_vring *vring = &(xd->vu_intf->vrings[offset + VIRTIO_TXQ]);
+ vring->packets += rv;
+ vring->bytes += bytes;
+
if (dpdk_vhost_user_want_interrupt(xd, offset + VIRTIO_RXQ)) {
- dpdk_vu_vring *vring = &(xd->vu_intf->vrings[offset + VIRTIO_RXQ]);
+ vring = &(xd->vu_intf->vrings[offset + VIRTIO_RXQ]);
vring->n_since_last_int += rv;
f64 now = vlib_time_now (vm);
* so we can try to transmit the rest. If we didn't transmit
* everything, stop now.
*/
+ int i; u32 bytes = 0;
+ struct rte_mbuf **pkts = &tx_vector[tx_tail];
+ for (i = 0; i < (DPDK_TX_RING_SIZE - tx_tail); i++) {
+ struct rte_mbuf *buff = pkts[i];
+ bytes += rte_pktmbuf_data_len(buff);
+ }
rv = rte_vhost_enqueue_burst(&xd->vu_vhost_dev, offset + VIRTIO_RXQ,
&tx_vector[tx_tail],
(uint16_t) (DPDK_TX_RING_SIZE - tx_tail));
if (PREDICT_TRUE(rv > 0))
{
+ dpdk_vu_vring *vring = &(xd->vu_intf->vrings[offset + VIRTIO_TXQ]);
+ vring->packets += rv;
+ vring->bytes += bytes;
+
if (dpdk_vhost_user_want_interrupt(xd, offset + VIRTIO_RXQ)) {
- dpdk_vu_vring *vring = &(xd->vu_intf->vrings[offset + VIRTIO_RXQ]);
+ vring = &(xd->vu_intf->vrings[offset + VIRTIO_RXQ]);
vring->n_since_last_int += rv;
f64 now = vlib_time_now (vm);
n_retry = (rv == DPDK_TX_RING_SIZE - tx_tail) ? 1 : 0;
}
+
+ if (xd->need_txlock)
+ *xd->lockp[queue_id] = 0;
}
#if RTE_LIBRTE_KNI
else if (xd->dev_type == VNET_DPDK_DEV_KNI)
rv = 0;
}
- if (PREDICT_FALSE(xd->lockp != 0))
+ if (PREDICT_FALSE(xd->dev_type != VNET_DPDK_DEV_VHOST_USER &&
+ xd->lockp != 0))
*xd->lockp[queue_id] = 0;
if (PREDICT_FALSE(rv < 0))
{
u32 bi0 = from[n_packets];
vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
- struct rte_mbuf *mb0 = ((struct rte_mbuf *)b0) - 1;
+ struct rte_mbuf *mb0 = rte_mbuf_from_vlib_buffer(b0);
rte_pktmbuf_free (mb0);
}
return n_on_ring;
pref0 = vlib_get_buffer (vm, pi0);
pref1 = vlib_get_buffer (vm, pi1);
- prefmb0 = ((struct rte_mbuf *)pref0) - 1;
- prefmb1 = ((struct rte_mbuf *)pref1) - 1;
-
+ prefmb0 = rte_mbuf_from_vlib_buffer(pref0);
+ prefmb1 = rte_mbuf_from_vlib_buffer(pref1);
+
CLIB_PREFETCH(prefmb0, CLIB_CACHE_LINE_BYTES, LOAD);
CLIB_PREFETCH(pref0, CLIB_CACHE_LINE_BYTES, LOAD);
CLIB_PREFETCH(prefmb1, CLIB_CACHE_LINE_BYTES, LOAD);
b0 = vlib_get_buffer (vm, bi0);
b1 = vlib_get_buffer (vm, bi1);
- mb0 = ((struct rte_mbuf *)b0) - 1;
- mb1 = ((struct rte_mbuf *)b1) - 1;
+ mb0 = rte_mbuf_from_vlib_buffer(b0);
+ mb1 = rte_mbuf_from_vlib_buffer(b1);
any_clone = b0->clone_count | b1->clone_count;
if (PREDICT_FALSE(any_clone != 0))
{
if (PREDICT_FALSE(b0->clone_count != 0))
- {
- struct rte_mbuf * mb0_new = dpdk_replicate_packet_mb (b0);
- if (PREDICT_FALSE(mb0_new == 0))
- {
- vlib_error_count (vm, node->node_index,
- DPDK_TX_FUNC_ERROR_REPL_FAIL, 1);
- b0->flags |= VLIB_BUFFER_REPL_FAIL;
- }
- else
- mb0 = mb0_new;
- vec_add1 (dm->recycle[my_cpu], bi0);
- }
+ {
+ struct rte_mbuf * mb0_new = dpdk_replicate_packet_mb (b0);
+ if (PREDICT_FALSE(mb0_new == 0))
+ {
+ vlib_error_count (vm, node->node_index,
+ DPDK_TX_FUNC_ERROR_REPL_FAIL, 1);
+ b0->flags |= VLIB_BUFFER_REPL_FAIL;
+ }
+ else
+ mb0 = mb0_new;
+ vec_add1 (dm->recycle[my_cpu], bi0);
+ }
if (PREDICT_FALSE(b1->clone_count != 0))
- {
- struct rte_mbuf * mb1_new = dpdk_replicate_packet_mb (b1);
- if (PREDICT_FALSE(mb1_new == 0))
- {
- vlib_error_count (vm, node->node_index,
- DPDK_TX_FUNC_ERROR_REPL_FAIL, 1);
- b1->flags |= VLIB_BUFFER_REPL_FAIL;
- }
- else
- mb1 = mb1_new;
- vec_add1 (dm->recycle[my_cpu], bi1);
- }
- }
+ {
+ struct rte_mbuf * mb1_new = dpdk_replicate_packet_mb (b1);
+ if (PREDICT_FALSE(mb1_new == 0))
+ {
+ vlib_error_count (vm, node->node_index,
+ DPDK_TX_FUNC_ERROR_REPL_FAIL, 1);
+ b1->flags |= VLIB_BUFFER_REPL_FAIL;
+ }
+ else
+ mb1 = mb1_new;
+ vec_add1 (dm->recycle[my_cpu], bi1);
+ }
+ }
delta0 = PREDICT_FALSE(b0->flags & VLIB_BUFFER_REPL_FAIL) ? 0 :
- vlib_buffer_length_in_chain (vm, b0) - (i16) mb0->pkt_len;
+ vlib_buffer_length_in_chain (vm, b0) - (i16) mb0->pkt_len;
delta1 = PREDICT_FALSE(b1->flags & VLIB_BUFFER_REPL_FAIL) ? 0 :
- vlib_buffer_length_in_chain (vm, b1) - (i16) mb1->pkt_len;
+ vlib_buffer_length_in_chain (vm, b1) - (i16) mb1->pkt_len;
new_data_len0 = (u16)((i16) mb0->data_len + delta0);
new_data_len1 = (u16)((i16) mb1->data_len + delta1);
mb1->pkt_len = new_pkt_len1;
mb0->data_off = (PREDICT_FALSE(b0->flags & VLIB_BUFFER_REPL_FAIL)) ?
- mb0->data_off : (u16)(RTE_PKTMBUF_HEADROOM + b0->current_data);
+ mb0->data_off : (u16)(RTE_PKTMBUF_HEADROOM + b0->current_data);
mb1->data_off = (PREDICT_FALSE(b1->flags & VLIB_BUFFER_REPL_FAIL)) ?
- mb1->data_off : (u16)(RTE_PKTMBUF_HEADROOM + b1->current_data);
+ mb1->data_off : (u16)(RTE_PKTMBUF_HEADROOM + b1->current_data);
if (PREDICT_FALSE(node->flags & VLIB_NODE_FLAG_TRACE))
- {
+ {
if (b0->flags & VLIB_BUFFER_IS_TRACED)
- dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi0, b0);
+ dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi0, b0);
if (b1->flags & VLIB_BUFFER_IS_TRACED)
- dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi1, b1);
- }
+ dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi1, b1);
+ }
if (PREDICT_TRUE(any_clone == 0))
{
- tx_vector[i % DPDK_TX_RING_SIZE] = mb0;
+ tx_vector[i % DPDK_TX_RING_SIZE] = mb0;
i++;
- tx_vector[i % DPDK_TX_RING_SIZE] = mb1;
+ tx_vector[i % DPDK_TX_RING_SIZE] = mb1;
i++;
}
else
/* cloning was done, need to check for failure */
if (PREDICT_TRUE((b0->flags & VLIB_BUFFER_REPL_FAIL) == 0))
{
- tx_vector[i % DPDK_TX_RING_SIZE] = mb0;
+ tx_vector[i % DPDK_TX_RING_SIZE] = mb0;
i++;
}
if (PREDICT_TRUE((b1->flags & VLIB_BUFFER_REPL_FAIL) == 0))
{
- tx_vector[i % DPDK_TX_RING_SIZE] = mb1;
+ tx_vector[i % DPDK_TX_RING_SIZE] = mb1;
i++;
}
}
-
+
n_left -= 2;
}
while (n_left > 0)
b0 = vlib_get_buffer (vm, bi0);
- mb0 = ((struct rte_mbuf *)b0) - 1;
+ mb0 = rte_mbuf_from_vlib_buffer(b0);
if (PREDICT_FALSE(b0->clone_count != 0))
- {
- struct rte_mbuf * mb0_new = dpdk_replicate_packet_mb (b0);
- if (PREDICT_FALSE(mb0_new == 0))
- {
- vlib_error_count (vm, node->node_index,
- DPDK_TX_FUNC_ERROR_REPL_FAIL, 1);
- b0->flags |= VLIB_BUFFER_REPL_FAIL;
- }
- else
- mb0 = mb0_new;
- vec_add1 (dm->recycle[my_cpu], bi0);
- }
+ {
+ struct rte_mbuf * mb0_new = dpdk_replicate_packet_mb (b0);
+ if (PREDICT_FALSE(mb0_new == 0))
+ {
+ vlib_error_count (vm, node->node_index,
+ DPDK_TX_FUNC_ERROR_REPL_FAIL, 1);
+ b0->flags |= VLIB_BUFFER_REPL_FAIL;
+ }
+ else
+ mb0 = mb0_new;
+ vec_add1 (dm->recycle[my_cpu], bi0);
+ }
delta0 = PREDICT_FALSE(b0->flags & VLIB_BUFFER_REPL_FAIL) ? 0 :
- vlib_buffer_length_in_chain (vm, b0) - (i16) mb0->pkt_len;
+ vlib_buffer_length_in_chain (vm, b0) - (i16) mb0->pkt_len;
new_data_len0 = (u16)((i16) mb0->data_len + delta0);
new_pkt_len0 = (u16)((i16) mb0->pkt_len + delta0);
mb0->data_len = new_data_len0;
mb0->pkt_len = new_pkt_len0;
mb0->data_off = (PREDICT_FALSE(b0->flags & VLIB_BUFFER_REPL_FAIL)) ?
- mb0->data_off : (u16)(RTE_PKTMBUF_HEADROOM + b0->current_data);
+ mb0->data_off : (u16)(RTE_PKTMBUF_HEADROOM + b0->current_data);
if (PREDICT_FALSE(node->flags & VLIB_NODE_FLAG_TRACE))
- if (b0->flags & VLIB_BUFFER_IS_TRACED)
- dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi0, b0);
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi0, b0);
if (PREDICT_TRUE((b0->flags & VLIB_BUFFER_REPL_FAIL) == 0))
{
- tx_vector[i % DPDK_TX_RING_SIZE] = mb0;
+ tx_vector[i % DPDK_TX_RING_SIZE] = mb0;
i++;
}
n_left--;
vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index, n_packets);
vlib_error_count (vm, node->node_index, DPDK_TX_FUNC_ERROR_PKT_DROP,
- n_packets);
+ n_packets);
while (n_packets--)
rte_pktmbuf_free (tx_vector[ring->tx_tail + n_packets]);
}
/* Reset head/tail to avoid unnecessary wrap */
- ring->tx_head = 0;
- ring->tx_tail = 0;
+ ring->tx_head = 0;
+ ring->tx_tail = 0;
}
/* Recycle replicated buffers */
dpdk_device_t * xd = vec_elt_at_index (dm->devices, hi->dev_instance);
if (!xd || xd->dev_type != VNET_DPDK_DEV_VHOST_USER) {
- clib_warning("cannot renumber non-vhost-user interface (sw_if_index: %d)",
- hi->sw_if_index);
- return 0;
+ clib_warning("cannot renumber non-vhost-user interface (sw_if_index: %d)",
+ hi->sw_if_index);
+ return 0;
}
xd->vu_if_id = new_dev_instance;
return 0;
}
-static u8 * format_dpdk_device_name (u8 * s, va_list * args)
-{
- dpdk_main_t * dm = &dpdk_main;
- char *devname_format;
- char *device_name;
- u32 i = va_arg (*args, u32);
- struct rte_eth_dev_info dev_info;
- u8 * ret;
-
- if (dm->interface_name_format_decimal)
- devname_format = "%s%d/%d/%d";
- else
- devname_format = "%s%x/%x/%x";
-
-#ifdef RTE_LIBRTE_KNI
- if (dm->devices[i].dev_type == VNET_DPDK_DEV_KNI) {
- return format(s, "kni%d", dm->devices[i].kni_port_id);
- } else
-#endif
- if (dm->devices[i].dev_type == VNET_DPDK_DEV_VHOST_USER) {
- return format(s, "VirtualEthernet0/0/%d", dm->devices[i].vu_if_id);
- }
- switch (dm->devices[i].port_type)
- {
- case VNET_DPDK_PORT_TYPE_ETH_1G:
- device_name = "GigabitEthernet";
- break;
-
- case VNET_DPDK_PORT_TYPE_ETH_10G:
- device_name = "TenGigabitEthernet";
- break;
-
- case VNET_DPDK_PORT_TYPE_ETH_40G:
- device_name = "FortyGigabitEthernet";
- break;
-
- case VNET_DPDK_PORT_TYPE_ETH_SWITCH:
- device_name = "EthernetSwitch";
- break;
-
- #ifdef NETMAP
- case VNET_DPDK_PORT_TYPE_NETMAP:
- rte_eth_dev_info_get(i, &dev_info);
- return format(s, "netmap:%s", dev_info.driver_name);
- #endif
-
- case VNET_DPDK_PORT_TYPE_AF_PACKET:
- rte_eth_dev_info_get(i, &dev_info);
- return format(s, "af_packet%d", dm->devices[i].af_packet_port_id);
-
- default:
- case VNET_DPDK_PORT_TYPE_UNKNOWN:
- device_name = "UnknownEthernet";
- break;
- }
-
- rte_eth_dev_info_get(i, &dev_info);
- ret = format (s, devname_format, device_name, dev_info.pci_dev->addr.bus,
- dev_info.pci_dev->addr.devid,
- dev_info.pci_dev->addr.function);
-
- /* address Chelsio cards which share PCI address */
- if (dm->devices[i].pmd == VNET_DPDK_PMD_CXGBE) {
- struct rte_eth_dev_info di;
-
- di.pci_dev = 0;
- rte_eth_dev_info_get(i+1, &di);
- if (di.pci_dev && memcmp(&dev_info.pci_dev->addr, &di.pci_dev->addr,
- sizeof(struct rte_pci_addr)) == 0)
- return format(ret, "/0");
-
- di.pci_dev = 0;
- rte_eth_dev_info_get(i-1, &di);
- if (di.pci_dev && memcmp(&dev_info.pci_dev->addr, &di.pci_dev->addr,
- sizeof(struct rte_pci_addr)) == 0)
- return format(ret, "/1");
- }
- return ret;
-}
-
-static u8 * format_dpdk_device_type (u8 * s, va_list * args)
-{
- dpdk_main_t * dm = &dpdk_main;
- char *dev_type;
- u32 i = va_arg (*args, u32);
-
- if (dm->devices[i].dev_type == VNET_DPDK_DEV_KNI) {
- return format(s, "Kernel NIC Interface");
- } else if (dm->devices[i].dev_type == VNET_DPDK_DEV_VHOST_USER) {
- return format(s, "vhost-user interface");
- }
-
- switch (dm->devices[i].pmd)
- {
- case VNET_DPDK_PMD_E1000EM:
- dev_type = "Intel 82540EM (e1000)";
- break;
-
- case VNET_DPDK_PMD_IGB:
- dev_type = "Intel e1000";
- break;
-
- case VNET_DPDK_PMD_I40E:
- dev_type = "Intel X710/XL710 Family";
- break;
-
- case VNET_DPDK_PMD_I40EVF:
- dev_type = "Intel X710/XL710 Family VF";
- break;
-
- case VNET_DPDK_PMD_FM10K:
- dev_type = "Intel FM10000 Family Ethernet Switch";
- break;
-
- case VNET_DPDK_PMD_IGBVF:
- dev_type = "Intel e1000 VF";
- break;
-
- case VNET_DPDK_PMD_VIRTIO:
- dev_type = "Red Hat Virtio";
- break;
-
- case VNET_DPDK_PMD_IXGBEVF:
- dev_type = "Intel 82599 VF";
- break;
-
- case VNET_DPDK_PMD_IXGBE:
- dev_type = "Intel 82599";
- break;
-
- case VNET_DPDK_PMD_VICE:
- case VNET_DPDK_PMD_ENIC:
- dev_type = "Cisco VIC";
- break;
-
- case VNET_DPDK_PMD_CXGBE:
- dev_type = "Chelsio T4/T5";
- break;
-
- case VNET_DPDK_PMD_VMXNET3:
- dev_type = "VMware VMXNET3";
- break;
-
-#ifdef NETMAP
- case VNET_DPDK_PMD_NETMAP:
- dev_type = "Netmap/Vale";
- break;
-#endif
-
- case VNET_DPDK_PMD_AF_PACKET:
- dev_type = "af_packet";
- break;
-
- default:
- case VNET_DPDK_PMD_UNKNOWN:
- dev_type = "### UNKNOWN ###";
- break;
- }
-
- return format (s, dev_type);
-}
-
-static u8 * format_dpdk_link_status (u8 * s, va_list * args)
-{
- dpdk_device_t * xd = va_arg (*args, dpdk_device_t *);
- struct rte_eth_link * l = &xd->link;
- vnet_main_t * vnm = vnet_get_main();
- vnet_hw_interface_t * hi = vnet_get_hw_interface (vnm, xd->vlib_hw_if_index);
-
- s = format (s, "%s ", l->link_status ? "up" : "down");
- if (l->link_status)
- {
- u32 promisc = rte_eth_promiscuous_get (xd->device_index);
-
- s = format (s, "%s duplex ", (l->link_duplex == ETH_LINK_FULL_DUPLEX) ?
- "full" : "half");
- s = format (s, "speed %u mtu %d %s\n", l->link_speed,
- hi->max_packet_bytes, promisc ? " promisc" : "");
- }
- else
- s = format (s, "\n");
-
- return s;
-}
-
-#define _line_len 72
-#define _(v, str) \
-if (bitmap & v) { \
- if (format_get_indent (s) > next_split ) { \
- next_split += _line_len; \
- s = format(s,"\n%U", format_white_space, indent); \
- } \
- s = format(s, "%s ", str); \
-}
-
-static u8 * format_dpdk_rss_hf_name(u8 * s, va_list * args)
-{
- u64 bitmap = va_arg (*args, u64);
- int next_split = _line_len;
- int indent = format_get_indent (s);
-
- if (!bitmap)
- return format(s, "none");
-
- foreach_dpdk_rss_hf
-
- return s;
-}
-
-static u8 * format_dpdk_rx_offload_caps(u8 * s, va_list * args)
-{
- u32 bitmap = va_arg (*args, u32);
- int next_split = _line_len;
- int indent = format_get_indent (s);
-
- if (!bitmap)
- return format(s, "none");
-
- foreach_dpdk_rx_offload_caps
-
- return s;
-}
-
-static u8 * format_dpdk_tx_offload_caps(u8 * s, va_list * args)
-{
- u32 bitmap = va_arg (*args, u32);
- int next_split = _line_len;
- int indent = format_get_indent (s);
- if (!bitmap)
- return format(s, "none");
-
- foreach_dpdk_tx_offload_caps
-
- return s;
-}
-
-#undef _line_len
-#undef _
-
-static u8 * format_dpdk_device (u8 * s, va_list * args)
-{
- u32 dev_instance = va_arg (*args, u32);
- int verbose = va_arg (*args, int);
- dpdk_main_t * dm = &dpdk_main;
- dpdk_device_t * xd = vec_elt_at_index (dm->devices, dev_instance);
- uword indent = format_get_indent (s);
- f64 now = vlib_time_now (dm->vlib_main);
-
- dpdk_update_counters (xd, now);
- dpdk_update_link_state (xd, now);
-
- s = format (s, "%U\n%Ucarrier %U",
- format_dpdk_device_type, xd->device_index,
- format_white_space, indent + 2,
- format_dpdk_link_status, xd);
-
- if (verbose > 1 && xd->dev_type == VNET_DPDK_DEV_ETH)
- {
- struct rte_eth_dev_info di;
- struct rte_pci_device * pci;
- struct rte_eth_rss_conf rss_conf;
- int vlan_off;
-
- rss_conf.rss_key = 0;
- rte_eth_dev_info_get(xd->device_index, &di);
- rte_eth_dev_rss_hash_conf_get(xd->device_index, &rss_conf);
- pci = di.pci_dev;
-
- if (pci)
- s = format(s, "%Upci id: device %04x:%04x subsystem %04x:%04x\n"
- "%Upci address: %04x:%02x:%02x.%02x\n",
- format_white_space, indent + 2,
- pci->id.vendor_id, pci->id.device_id,
- pci->id.subsystem_vendor_id,
- pci->id.subsystem_device_id,
- format_white_space, indent + 2,
- pci->addr.domain, pci->addr.bus,
- pci->addr.devid, pci->addr.function);
- s = format(s, "%Umax rx packet len: %d\n",
- format_white_space, indent + 2, di.max_rx_pktlen);
- s = format(s, "%Upromiscuous: unicast %s all-multicast %s\n",
- format_white_space, indent + 2,
- rte_eth_promiscuous_get(xd->device_index) ? "on" : "off",
- rte_eth_promiscuous_get(xd->device_index) ? "on" : "off");
- vlan_off = rte_eth_dev_get_vlan_offload(xd->device_index);
- s = format(s, "%Uvlan offload: strip %s filter %s qinq %s\n",
- format_white_space, indent + 2,
- vlan_off & ETH_VLAN_STRIP_OFFLOAD ? "on" : "off",
- vlan_off & ETH_VLAN_FILTER_OFFLOAD ? "on" : "off",
- vlan_off & ETH_VLAN_EXTEND_OFFLOAD ? "on" : "off");
- s = format(s, "%Uqueue size (max): rx %d (%d) tx %d (%d)\n",
- format_white_space, indent + 2,
- xd->rx_q_used, di.max_rx_queues,
- xd->tx_q_used, di.max_tx_queues);
- s = format(s, "%Urx offload caps: %U\n",
- format_white_space, indent + 2,
- format_dpdk_rx_offload_caps, di.rx_offload_capa);
- s = format(s, "%Utx offload caps: %U\n",
- format_white_space, indent + 2,
- format_dpdk_tx_offload_caps, di.tx_offload_capa);
- s = format(s, "%Urss active: %U\n"
- "%Urss supported: %U\n",
- format_white_space, indent + 2,
- format_dpdk_rss_hf_name, rss_conf.rss_hf,
- format_white_space, indent + 2,
- format_dpdk_rss_hf_name, di.flow_type_rss_offloads);
- }
-
- if (xd->cpu_socket > -1)
- s = format (s, "%Ucpu socket %d",
- format_white_space, indent + 2,
- xd->cpu_socket);
-
- /* $$$ MIB counters */
-
- {
-#define _(N, V) \
- if (xd->stats.V != 0) \
- s = format (s, "\n%U%-40U%16Ld", \
- format_white_space, indent + 2, \
- format_c_identifier, #N, xd->stats.V);
-
- foreach_dpdk_counter
-#undef _
- }
-
- u8 * xs = 0;
- struct rte_eth_xstats * xstat;
-
- vec_foreach(xstat, xd->xstats)
- {
- if (xstat->value)
- {
- /* format_c_identifier don't like c strings inside vector */
- u8 * name = format(0,"%s", xstat->name);
- xs = format(xs, "\n%U%-38U%16Ld",
- format_white_space, indent + 4,
- format_c_identifier, name, xstat->value);
- vec_free(name);
- }
- }
-
- if (xs)
- {
- s = format(s, "\n%Uextended stats:%v",
- format_white_space, indent + 2, xs);
- vec_free(xs);
- }
-
- return s;
-}
-
-static u8 * format_dpdk_tx_dma_trace (u8 * s, va_list * va)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
- CLIB_UNUSED (vnet_main_t * vnm) = vnet_get_main();
- dpdk_tx_dma_trace_t * t = va_arg (*va, dpdk_tx_dma_trace_t *);
- dpdk_main_t * dm = &dpdk_main;
- dpdk_device_t * xd = vec_elt_at_index (dm->devices, t->device_index);
- uword indent = format_get_indent (s);
- vnet_sw_interface_t * sw = vnet_get_sw_interface (vnm, xd->vlib_sw_if_index);
-
- s = format (s, "%U tx queue %d",
- format_vnet_sw_interface_name, vnm, sw,
- t->queue_index);
-
- s = format (s, "\n%Ubuffer 0x%x: %U",
- format_white_space, indent,
- t->buffer_index,
- format_vlib_buffer, &t->buffer);
-
- s = format (s, "\n%U%U", format_white_space, indent,
- format_ethernet_header_with_length, t->buffer.pre_data,
- sizeof (t->buffer.pre_data));
-
- return s;
-}
-
static void dpdk_clear_hw_interface_counters (u32 instance)
{
dpdk_main_t * dm = &dpdk_main;
*/
if (xd->admin_up != 0xff)
{
- rte_eth_stats_reset (xd->device_index);
- memset (&xd->last_stats, 0, sizeof (xd->last_stats));
+ /*
+ * Set the "last_cleared_stats" to the current stats, so that
+ * things appear to clear from a display perspective.
+ */
dpdk_update_counters (xd, vlib_time_now (dm->vlib_main));
+
+ clib_memcpy (&xd->last_cleared_stats, &xd->stats, sizeof(xd->stats));
+ clib_memcpy (xd->last_cleared_xstats, xd->xstats,
+ vec_len(xd->last_cleared_xstats) *
+ sizeof(xd->last_cleared_xstats[0]));
}
else
{
- rte_eth_stats_reset (xd->device_index);
- memset(&xd->stats, 0, sizeof(xd->stats));
+ /*
+ * Internally rte_eth_xstats_reset() is calling rte_eth_stats_reset(),
+ * so we're only calling xstats_reset() here.
+ */
+ rte_eth_xstats_reset (xd->device_index);
+ memset (&xd->stats, 0, sizeof(xd->stats));
memset (&xd->last_stats, 0, sizeof (xd->last_stats));
}
- rte_eth_xstats_reset(xd->device_index);
+
+ if (PREDICT_FALSE(xd->dev_type == VNET_DPDK_DEV_VHOST_USER)) {
+ int i;
+ for (i = 0; i < xd->rx_q_used * VIRTIO_QNUM; i++) {
+ xd->vu_intf->vrings[i].packets = 0;
+ xd->vu_intf->vrings[i].bytes = 0;
+ }
+ }
}
#ifdef RTE_LIBRTE_KNI
vlib_buffer_main_t * bm = vm->buffer_main;
memset(&conf, 0, sizeof(conf));
snprintf(conf.name, RTE_KNI_NAMESIZE, "vpp%u", xd->kni_port_id);
- conf.mbuf_size = MBUF_SIZE;
+ conf.mbuf_size = VLIB_BUFFER_DATA_SIZE;
memset(&ops, 0, sizeof(ops));
ops.port_id = xd->kni_port_id;
ops.change_mtu = kni_change_mtu;
}
else
{
+ /*
+ * DAW-FIXME: VMXNET3 device stop/start doesn't work,
+ * therefore fake the stop in the dpdk driver by
+ * silently dropping all of the incoming pkts instead of
+ * stopping the driver / hardware.
+ */
+ if (xd->pmd != VNET_DPDK_PMD_VMXNET3)
+ xd->admin_up = 0;
+ else
+ xd->admin_up = ~0;
+
rte_eth_allmulticast_disable (xd->device_index);
vnet_hw_interface_set_flags (vnm, xd->vlib_hw_if_index, 0);
* stopping the driver / hardware.
*/
if (xd->pmd != VNET_DPDK_PMD_VMXNET3)
- {
rte_eth_dev_stop (xd->device_index);
- xd->admin_up = 0;
- }
- else
- xd->admin_up = ~0;
}
if (rv < 0)
if (xd->dev_type != VNET_DPDK_DEV_ETH)
return 0;
- /* currently we program VLANS only for IXGBE VF */
- if (xd->pmd != VNET_DPDK_PMD_IXGBEVF)
+
+ /* currently we program VLANS only for IXGBE VF and I40E VF */
+ if ((xd->pmd != VNET_DPDK_PMD_IXGBEVF) &&
+ (xd->pmd != VNET_DPDK_PMD_I40EVF))
return 0;
if (t->sub.eth.flags.no_tags == 1)
.name_renumber = dpdk_device_renumber,
};
+VLIB_DEVICE_TX_FUNCTION_MULTIARCH (dpdk_device_class,
+ dpdk_interface_tx)
+
void dpdk_set_flowcontrol_callback (vlib_main_t *vm,
dpdk_flowcontrol_callback_t callback)
{
clib_error_t * error = 0;
uword event_type;
uword *event_data = 0;
- u32 index;
u32 sw_if_index;
u32 flags;
dpdk_main.admin_up_down_in_progress = 1;
- for (index=0; index<vec_len(event_data); index++)
+ switch (event_type) {
+ case UP_DOWN_FLAG_EVENT:
{
- sw_if_index = event_data[index] >> 32;
- flags = (u32) event_data[index];
-
- switch (event_type) {
- case UP_DOWN_FLAG_EVENT:
- error = vnet_sw_interface_set_flags (vnet_get_main(), sw_if_index, flags);
- clib_error_report(error);
- break;
- }
+ if (vec_len(event_data) == 2) {
+ sw_if_index = event_data[0];
+ flags = event_data[1];
+ error = vnet_sw_interface_set_flags (vnet_get_main(), sw_if_index, flags);
+ clib_error_report(error);
+ }
}
+ break;
+ }
vec_reset_length (event_data);
*/
void post_sw_interface_set_flags (vlib_main_t *vm, u32 sw_if_index, u32 flags)
{
- vlib_process_signal_event
+ uword * d = vlib_process_signal_event_data
(vm, admin_up_down_process_node.index,
- UP_DOWN_FLAG_EVENT,
- (((uword)sw_if_index << 32) | flags));
+ UP_DOWN_FLAG_EVENT, 2, sizeof(u32));
+ d[0] = sw_if_index;
+ d[1] = flags;
}
/*
}
return 0; // no override
}
+
+/*
+ * Return a copy of the DPDK port stats in dest.
+ */
+clib_error_t*
+dpdk_get_hw_interface_stats (u32 hw_if_index, struct rte_eth_stats* dest)
+{
+ dpdk_main_t * dm = &dpdk_main;
+ vnet_main_t * vnm = vnet_get_main();
+ vnet_hw_interface_t * hi = vnet_get_hw_interface (vnm, hw_if_index);
+ dpdk_device_t * xd = vec_elt_at_index (dm->devices, hi->dev_instance);
+
+ if (!dest) {
+ return clib_error_return (0, "Missing or NULL argument");
+ }
+ if (!xd) {
+ return clib_error_return (0, "Unable to get DPDK device from HW interface");
+ }
+
+ dpdk_update_counters (xd, vlib_time_now (dm->vlib_main));
+
+ clib_memcpy(dest, &xd->stats, sizeof(xd->stats));
+ return (0);
+}
+
+/*
+ * Return the number of dpdk mbufs
+ */
+u32 dpdk_num_mbufs (void)
+{
+ dpdk_main_t * dm = &dpdk_main;
+
+ return dm->num_mbufs;
+}
+
+/*
+ * Return the io_thread_release
+ */
+int dpdk_io_thread_release (void)
+{
+ dpdk_main_t * dm = &dpdk_main;
+
+ return dm->io_thread_release;
+}
+
+/*
+ * Return the pmd type for a given hardware interface
+ */
+dpdk_pmd_t dpdk_get_pmd_type (vnet_hw_interface_t *hi)
+{
+ dpdk_main_t * dm = &dpdk_main;
+ dpdk_device_t * xd;
+
+ assert (hi);
+
+ xd = vec_elt_at_index (dm->devices, hi->dev_instance);
+
+ assert (xd);
+
+ return xd->pmd;
+}
+
+/*
+ * Return the cpu socket for a given hardware interface
+ */
+i8 dpdk_get_cpu_socket (vnet_hw_interface_t *hi)
+{
+ dpdk_main_t * dm = &dpdk_main;
+ dpdk_device_t * xd;
+
+ assert (hi);
+
+ xd = vec_elt_at_index(dm->devices, hi->dev_instance);
+
+ assert (xd);
+
+ return xd->cpu_socket;
+}