X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fplugins%2Fdpdk%2Fdevice%2Fdevice.c;h=95678e9b0e28b879bab9d49b39e63eb6cae44916;hb=19ff0c369;hp=270abe19190bd5ecc16eb3918be63b25f8099b53;hpb=831f4200cab2b363f2a8ea340331343c14407b7d;p=vpp.git diff --git a/src/plugins/dpdk/device/device.c b/src/plugins/dpdk/device/device.c index 270abe19190..95678e9b0e2 100644 --- a/src/plugins/dpdk/device/device.c +++ b/src/plugins/dpdk/device/device.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include @@ -42,6 +41,29 @@ static char *dpdk_tx_func_error_strings[] = { #undef _ }; +static clib_error_t * +dpdk_add_del_mac_address (vnet_hw_interface_t * hi, + const u8 * address, u8 is_add) +{ + int error; + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd = vec_elt_at_index (dm->devices, hi->dev_instance); + + if (is_add) + error = rte_eth_dev_mac_addr_add (xd->port_id, + (struct rte_ether_addr *) address, 0); + else + error = rte_eth_dev_mac_addr_remove (xd->port_id, + (struct rte_ether_addr *) address); + + if (error) + { + return clib_error_return (0, "mac address add/del failed: %d", error); + } + + return NULL; +} + static clib_error_t * dpdk_set_mac_address (vnet_hw_interface_t * hi, const u8 * old_address, const u8 * address) @@ -50,8 +72,7 @@ dpdk_set_mac_address (vnet_hw_interface_t * hi, dpdk_main_t *dm = &dpdk_main; dpdk_device_t *xd = vec_elt_at_index (dm->devices, hi->dev_instance); - error = rte_eth_dev_default_mac_addr_set (xd->port_id, - (struct ether_addr *) address); + error = rte_eth_dev_default_mac_addr_set (xd->port_id, (void *) address); if (error) { @@ -60,7 +81,7 @@ dpdk_set_mac_address (vnet_hw_interface_t * hi, else { vec_reset_length (xd->default_mac_address); - vec_add (xd->default_mac_address, address, sizeof (address)); + vec_add (xd->default_mac_address, address, sizeof (mac_address_t)); return NULL; } } @@ -137,46 +158,24 @@ static_always_inline struct rte_mbuf **mb, u32 n_left) { dpdk_main_t *dm = &dpdk_main; + dpdk_tx_queue_t *txq; u32 n_retry; int n_sent = 0; int queue_id; n_retry = 16; - queue_id = vm->thread_index; + queue_id = vm->thread_index % xd->tx_q_used; + txq = vec_elt_at_index (xd->tx_queues, queue_id); do { - /* - * This device only supports one TX queue, - * and we're running multi-threaded... - */ - if (PREDICT_FALSE (xd->lockp != 0)) - { - queue_id = queue_id % xd->tx_q_used; - while (clib_atomic_test_and_set (xd->lockp[queue_id])) - /* zzzz */ - queue_id = (queue_id + 1) % xd->tx_q_used; - } - -#if 0 - if (PREDICT_FALSE (xd->flags & DPDK_DEVICE_FLAG_HQOS)) /* HQoS ON */ - { - /* no wrap, transmit in one burst */ - dpdk_device_hqos_per_worker_thread_t *hqos = - &xd->hqos_wt[vm->thread_index]; + clib_spinlock_lock_if_init (&txq->lock); - ASSERT (hqos->swq != NULL); - - dpdk_hqos_metadata_set (hqos, mb, n_left); - n_sent = rte_ring_sp_enqueue_burst (hqos->swq, (void **) mb, - n_left, 0); - } - else -#endif if (PREDICT_TRUE (xd->flags & DPDK_DEVICE_FLAG_PMD)) { /* no wrap, transmit in one burst */ n_sent = rte_eth_tx_burst (xd->port_id, queue_id, mb, n_left); + n_retry--; } else { @@ -184,8 +183,7 @@ static_always_inline n_sent = 0; } - if (PREDICT_FALSE (xd->lockp != 0)) - clib_atomic_release (xd->lockp[queue_id]); + clib_spinlock_unlock_if_init (&txq->lock); if (PREDICT_FALSE (n_sent < 0)) { @@ -198,8 +196,6 @@ static_always_inline xd->hw_if_index)->tx_node_index; vlib_error_count (vm, node_index, DPDK_TX_FUNC_ERROR_BAD_RETVAL, 1); - clib_warning ("rte_eth_tx_burst[%d]: error %d", - xd->port_id, n_sent); return n_left; // untransmitted packets } n_left -= n_sent; @@ -210,11 +206,11 @@ static_always_inline return n_left; } -static_always_inline void +static_always_inline __clib_unused void dpdk_prefetch_buffer (vlib_main_t * vm, struct rte_mbuf *mb) { vlib_buffer_t *b = vlib_buffer_from_rte_mbuf (mb); - CLIB_PREFETCH (mb, 2 * CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (mb, sizeof (struct rte_mbuf), STORE); CLIB_PREFETCH (b, CLIB_CACHE_LINE_BYTES, LOAD); } @@ -222,16 +218,20 @@ static_always_inline void dpdk_buffer_tx_offload (dpdk_device_t * xd, vlib_buffer_t * b, struct rte_mbuf *mb) { - u32 ip_cksum = b->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM; - u32 tcp_cksum = b->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM; - u32 udp_cksum = b->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM; int is_ip4 = b->flags & VNET_BUFFER_F_IS_IP4; + u32 tso = b->flags & VNET_BUFFER_F_GSO, max_pkt_len; + u32 oflags, ip_cksum, tcp_cksum, udp_cksum; u64 ol_flags; /* Is there any work for us? */ - if (PREDICT_TRUE ((ip_cksum | tcp_cksum | udp_cksum) == 0)) + if (PREDICT_TRUE (((b->flags & VNET_BUFFER_F_OFFLOAD) | tso) == 0)) return; + oflags = vnet_buffer2 (b)->oflags; + ip_cksum = oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM; + tcp_cksum = oflags & VNET_BUFFER_OFFLOAD_F_TCP_CKSUM; + udp_cksum = oflags & VNET_BUFFER_OFFLOAD_F_UDP_CKSUM; + mb->l2_len = vnet_buffer (b)->l3_hdr_offset - b->current_data; mb->l3_len = vnet_buffer (b)->l4_hdr_offset - vnet_buffer (b)->l3_hdr_offset; @@ -241,6 +241,17 @@ dpdk_buffer_tx_offload (dpdk_device_t * xd, vlib_buffer_t * b, ol_flags |= ip_cksum ? PKT_TX_IP_CKSUM : 0; ol_flags |= tcp_cksum ? PKT_TX_TCP_CKSUM : 0; ol_flags |= udp_cksum ? PKT_TX_UDP_CKSUM : 0; + + if (tso) + { + mb->l4_len = vnet_buffer2 (b)->gso_l4_hdr_sz; + mb->tso_segsz = vnet_buffer2 (b)->gso_size; + /* ensure packet is large enough to require tso */ + max_pkt_len = mb->l2_len + mb->l3_len + mb->l4_len + mb->tso_segsz; + if (mb->tso_segsz != 0 && mb->pkt_len > max_pkt_len) + ol_flags |= (tcp_cksum ? PKT_TX_TCP_SEG : PKT_TX_UDP_SEG); + } + mb->ol_flags |= ol_flags; /* we are trying to help compiler here by using local ol_flags with known @@ -281,6 +292,7 @@ VNET_DEVICE_CLASS_TX_FN (dpdk_device_class) (vlib_main_t * vm, n_left = n_packets; mb = ptd->mbufs; +#if (CLIB_N_PREFETCHES >= 8) while (n_left >= 8) { u32 or_flags; @@ -319,10 +331,7 @@ VNET_DEVICE_CLASS_TX_FN (dpdk_device_class) (vlib_main_t * vm, } if (PREDICT_FALSE ((xd->flags & DPDK_DEVICE_FLAG_TX_OFFLOAD) && - (or_flags & - (VNET_BUFFER_F_OFFLOAD_TCP_CKSUM - | VNET_BUFFER_F_OFFLOAD_IP_CKSUM - | VNET_BUFFER_F_OFFLOAD_UDP_CKSUM)))) + (or_flags & VNET_BUFFER_F_OFFLOAD))) { dpdk_buffer_tx_offload (xd, b[0], mb[0]); dpdk_buffer_tx_offload (xd, b[1], mb[1]); @@ -345,6 +354,59 @@ VNET_DEVICE_CLASS_TX_FN (dpdk_device_class) (vlib_main_t * vm, mb += 4; n_left -= 4; } +#elif (CLIB_N_PREFETCHES >= 4) + while (n_left >= 4) + { + vlib_buffer_t *b2, *b3; + u32 or_flags; + + CLIB_PREFETCH (mb[2], CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (mb[3], CLIB_CACHE_LINE_BYTES, STORE); + b2 = vlib_buffer_from_rte_mbuf (mb[2]); + CLIB_PREFETCH (b2, CLIB_CACHE_LINE_BYTES, LOAD); + b3 = vlib_buffer_from_rte_mbuf (mb[3]); + CLIB_PREFETCH (b3, CLIB_CACHE_LINE_BYTES, LOAD); + + b[0] = vlib_buffer_from_rte_mbuf (mb[0]); + b[1] = vlib_buffer_from_rte_mbuf (mb[1]); + + or_flags = b[0]->flags | b[1]->flags; + all_or_flags |= or_flags; + + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[0]); + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[1]); + + if (or_flags & VLIB_BUFFER_NEXT_PRESENT) + { + dpdk_validate_rte_mbuf (vm, b[0], 1); + dpdk_validate_rte_mbuf (vm, b[1], 1); + } + else + { + dpdk_validate_rte_mbuf (vm, b[0], 0); + dpdk_validate_rte_mbuf (vm, b[1], 0); + } + + if (PREDICT_FALSE ((xd->flags & DPDK_DEVICE_FLAG_TX_OFFLOAD) && + (or_flags & VNET_BUFFER_F_OFFLOAD))) + { + dpdk_buffer_tx_offload (xd, b[0], mb[0]); + dpdk_buffer_tx_offload (xd, b[1], mb[1]); + } + + if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE)) + { + if (b[0]->flags & VLIB_BUFFER_IS_TRACED) + dpdk_tx_trace_buffer (dm, node, xd, queue_id, b[0]); + if (b[1]->flags & VLIB_BUFFER_IS_TRACED) + dpdk_tx_trace_buffer (dm, node, xd, queue_id, b[1]); + } + + mb += 2; + n_left -= 2; + } +#endif + while (n_left > 0) { b[0] = vlib_buffer_from_rte_mbuf (mb[0]); @@ -477,8 +539,8 @@ dpdk_subif_add_del_function (vnet_main_t * vnm, if ((xd->flags & DPDK_DEVICE_FLAG_PMD) == 0) goto done; - /* currently we program VLANS only for IXGBE VF and I40E VF */ - if ((xd->pmd != VNET_DPDK_PMD_IXGBEVF) && (xd->pmd != VNET_DPDK_PMD_I40EVF)) + /* currently we program VLANS only for IXGBE VF */ + if (xd->pmd != VNET_DPDK_PMD_IXGBEVF) goto done; if (t->sub.eth.flags.no_tags == 1) @@ -522,6 +584,135 @@ done: return err; } +static clib_error_t * +dpdk_interface_set_rss_queues (struct vnet_main_t *vnm, + struct vnet_hw_interface_t *hi, + clib_bitmap_t * bitmap) +{ + dpdk_main_t *xm = &dpdk_main; + u32 hw_if_index = hi->hw_if_index; + vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); + dpdk_device_t *xd = vec_elt_at_index (xm->devices, hw->dev_instance); + clib_error_t *err = 0; + struct rte_eth_rss_reta_entry64 *reta_conf = NULL; + struct rte_eth_dev_info dev_info; + u16 *reta = NULL; + u16 *valid_queue = NULL; + u16 valid_queue_count = 0; + uint32_t i, j; + uint32_t ret; + + rte_eth_dev_info_get (xd->port_id, &dev_info); + + /* parameter check */ + if (clib_bitmap_count_set_bits (bitmap) == 0) + { + err = clib_error_return (0, "must assign at least one valid rss queue"); + goto done; + } + + if (clib_bitmap_count_set_bits (bitmap) > dev_info.nb_rx_queues) + { + err = clib_error_return (0, "too many rss queues"); + goto done; + } + + /* new RETA */ + reta = clib_mem_alloc (dev_info.reta_size * sizeof (*reta)); + if (reta == NULL) + { + err = clib_error_return (0, "clib_mem_alloc failed"); + goto done; + } + + clib_memset (reta, 0, dev_info.reta_size * sizeof (*reta)); + + valid_queue_count = 0; + /* *INDENT-OFF* */ + clib_bitmap_foreach (i, bitmap) { + if (i >= dev_info.nb_rx_queues) + { + err = clib_error_return (0, "illegal queue number"); + goto done; + } + reta[valid_queue_count++] = i; + } + /* *INDENT-ON* */ + + /* check valid_queue_count not zero, make coverity happy */ + if (valid_queue_count == 0) + { + err = clib_error_return (0, "must assign at least one valid rss queue"); + goto done; + } + + valid_queue = reta; + for (i = valid_queue_count, j = 0; i < dev_info.reta_size; i++, j++) + { + j = j % valid_queue_count; + reta[i] = valid_queue[j]; + } + + /* update reta table */ + reta_conf = + (struct rte_eth_rss_reta_entry64 *) clib_mem_alloc (dev_info.reta_size / + RTE_RETA_GROUP_SIZE * + sizeof (*reta_conf)); + if (reta_conf == NULL) + { + err = clib_error_return (0, "clib_mem_alloc failed"); + goto done; + } + + clib_memset (reta_conf, 0, + dev_info.reta_size / RTE_RETA_GROUP_SIZE * + sizeof (*reta_conf)); + + for (i = 0; i < dev_info.reta_size; i++) + { + uint32_t reta_id = i / RTE_RETA_GROUP_SIZE; + uint32_t reta_pos = i % RTE_RETA_GROUP_SIZE; + + reta_conf[reta_id].mask = UINT64_MAX; + reta_conf[reta_id].reta[reta_pos] = reta[i]; + } + + ret = + rte_eth_dev_rss_reta_update (xd->port_id, reta_conf, dev_info.reta_size); + if (ret) + { + err = clib_error_return (0, "rte_eth_dev_rss_reta_update err %d", ret); + goto done; + } + +done: + if (reta) + clib_mem_free (reta); + if (reta_conf) + clib_mem_free (reta_conf); + + return err; +} + +static clib_error_t * +dpdk_interface_rx_mode_change (vnet_main_t *vnm, u32 hw_if_index, u32 qid, + vnet_hw_if_rx_mode mode) +{ + dpdk_main_t *xm = &dpdk_main; + vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); + dpdk_device_t *xd = vec_elt_at_index (xm->devices, hw->dev_instance); + int rv = 0; + if (!(xd->flags & DPDK_DEVICE_FLAG_INT_SUPPORTED)) + return clib_error_return (0, "unsupported op (is the interface up?)", rv); + if (mode == VNET_HW_IF_RX_MODE_POLLING) + rv = rte_eth_dev_rx_intr_disable (xd->port_id, qid); + else + rv = rte_eth_dev_rx_intr_enable (xd->port_id, qid); + if (rv) + return clib_error_return (0, "dpdk_interface_rx_mode_change err %d", rv); + return 0; +} + /* *INDENT-OFF* */ VNET_DEVICE_CLASS (dpdk_device_class) = { .name = "dpdk", @@ -535,8 +726,11 @@ VNET_DEVICE_CLASS (dpdk_device_class) = { .subif_add_del_function = dpdk_subif_add_del_function, .rx_redirect_to_node = dpdk_set_interface_next_node, .mac_addr_change_function = dpdk_set_mac_address, + .mac_addr_add_del_function = dpdk_add_del_mac_address, .format_flow = format_dpdk_flow, .flow_ops_function = dpdk_flow_ops_fn, + .set_rss_queues_function = dpdk_interface_set_rss_queues, + .rx_mode_change_function = dpdk_interface_rx_mode_change, }; /* *INDENT-ON* */