dpdk: code preparation for bumping to DPDK 22.11
[vpp.git] / src / plugins / dpdk / device / common.c
index 89046d1..e4a79e8 100644 (file)
 #include <dpdk/device/dpdk_priv.h>
 #include <vppinfra/error.h>
 
+/* DPDK TX offload to vnet hw interface caps mapppings */
+static struct
+{
+  u64 offload;
+  vnet_hw_if_caps_t caps;
+} tx_off_caps_map[] = {
+  { RTE_ETH_TX_OFFLOAD_IPV4_CKSUM, VNET_HW_IF_CAP_TX_IP4_CKSUM },
+  { RTE_ETH_TX_OFFLOAD_TCP_CKSUM, VNET_HW_IF_CAP_TX_TCP_CKSUM },
+  { RTE_ETH_TX_OFFLOAD_UDP_CKSUM, VNET_HW_IF_CAP_TX_UDP_CKSUM },
+  { RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM, VNET_HW_IF_CAP_TX_IP4_OUTER_CKSUM },
+  { RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM, VNET_HW_IF_CAP_TX_UDP_OUTER_CKSUM },
+  { RTE_ETH_TX_OFFLOAD_TCP_TSO, VNET_HW_IF_CAP_TCP_GSO },
+  { RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO, VNET_HW_IF_CAP_VXLAN_TNL_GSO }
+};
+
 void
 dpdk_device_error (dpdk_device_t * xd, char *str, int rv)
 {
@@ -41,14 +56,16 @@ dpdk_device_error (dpdk_device_t * xd, char *str, int rv)
 void
 dpdk_device_setup (dpdk_device_t * xd)
 {
-  dpdk_main_t *dm = &dpdk_main;
   vlib_main_t *vm = vlib_get_main ();
   vnet_main_t *vnm = vnet_get_main ();
-  vlib_thread_main_t *tm = vlib_get_thread_main ();
   vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, xd->sw_if_index);
   vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, xd->hw_if_index);
+  u16 buf_sz = vlib_buffer_get_default_data_size (vm);
+  vnet_hw_if_caps_change_t caps = {};
   struct rte_eth_dev_info dev_info;
-  u64 bitmap;
+  struct rte_eth_conf conf = {};
+  u64 rxo, txo;
+  u32 max_frame_size;
   int rv;
   int j;
 
@@ -59,70 +76,153 @@ dpdk_device_setup (dpdk_device_t * xd)
 
   if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP)
     {
-      vnet_hw_interface_set_flags (dm->vnet_main, xd->hw_if_index, 0);
+      vnet_hw_interface_set_flags (vnm, xd->hw_if_index, 0);
       dpdk_device_stop (xd);
     }
 
-  /* Enable flow director when flows exist */
-  if (xd->pmd == VNET_DPDK_PMD_I40E)
+  rte_eth_dev_info_get (xd->port_id, &dev_info);
+
+  dpdk_log_debug ("[%u] configuring device %U", xd->port_id,
+                 format_dpdk_rte_device, dev_info.device);
+
+  /* create rx and tx offload wishlist */
+  rxo = RTE_ETH_RX_OFFLOAD_IPV4_CKSUM;
+  txo = 0;
+
+  if (xd->conf.enable_tcp_udp_checksum)
+    rxo |= RTE_ETH_RX_OFFLOAD_UDP_CKSUM | RTE_ETH_RX_OFFLOAD_TCP_CKSUM;
+
+  if (xd->conf.disable_tx_checksum_offload == 0 &&
+      xd->conf.enable_outer_checksum_offload)
+    txo |=
+      RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM | RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM;
+
+  if (xd->conf.disable_tx_checksum_offload == 0)
+    txo |= RTE_ETH_TX_OFFLOAD_IPV4_CKSUM | RTE_ETH_TX_OFFLOAD_TCP_CKSUM |
+          RTE_ETH_TX_OFFLOAD_UDP_CKSUM;
+
+  if (xd->conf.disable_multi_seg == 0)
     {
-      if ((xd->flags & DPDK_DEVICE_FLAG_RX_FLOW_OFFLOAD) != 0)
-       xd->port_conf.fdir_conf.mode = RTE_FDIR_MODE_PERFECT;
-      else
-       xd->port_conf.fdir_conf.mode = RTE_FDIR_MODE_NONE;
+      txo |= RTE_ETH_TX_OFFLOAD_MULTI_SEGS;
+      rxo |= RTE_ETH_RX_OFFLOAD_SCATTER;
+#if RTE_VERSION < RTE_VERSION_NUM(21, 11, 0, 0)
+      rxo |= DEV_RX_OFFLOAD_JUMBO_FRAME;
+#endif
     }
 
-  rte_eth_dev_info_get (xd->port_id, &dev_info);
-
-  bitmap = xd->port_conf.txmode.offloads & ~dev_info.tx_offload_capa;
-  if (bitmap)
+  if (xd->conf.enable_lro)
+    rxo |= RTE_ETH_RX_OFFLOAD_TCP_LRO;
+
+  /* per-device offload config */
+  if (xd->conf.enable_tso)
+    txo |= RTE_ETH_TX_OFFLOAD_TCP_CKSUM | RTE_ETH_TX_OFFLOAD_TCP_TSO |
+          RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO;
+
+  if (xd->conf.disable_rx_scatter)
+    rxo &= ~RTE_ETH_RX_OFFLOAD_SCATTER;
+
+  /* mask unsupported offloads */
+  rxo &= dev_info.rx_offload_capa;
+  txo &= dev_info.tx_offload_capa;
+
+  dpdk_log_debug ("[%u] Supported RX offloads: %U", xd->port_id,
+                 format_dpdk_rx_offload_caps, dev_info.rx_offload_capa);
+  dpdk_log_debug ("[%u] Configured RX offloads: %U", xd->port_id,
+                 format_dpdk_rx_offload_caps, rxo);
+  dpdk_log_debug ("[%u] Supported TX offloads: %U", xd->port_id,
+                 format_dpdk_tx_offload_caps, dev_info.tx_offload_capa);
+  dpdk_log_debug ("[%u] Configured TX offloads: %U", xd->port_id,
+                 format_dpdk_tx_offload_caps, txo);
+
+  /* finalize configuration */
+  conf.rxmode.offloads = rxo;
+  conf.txmode.offloads = txo;
+  if (rxo & RTE_ETH_RX_OFFLOAD_TCP_LRO)
+    conf.rxmode.max_lro_pkt_size = xd->conf.max_lro_pkt_size;
+
+  if (xd->conf.enable_lsc_int)
+    conf.intr_conf.lsc = 1;
+  if (xd->conf.enable_rxq_int)
+    conf.intr_conf.rxq = 1;
+
+  conf.rxmode.mq_mode = RTE_ETH_MQ_RX_NONE;
+  if (xd->conf.n_rx_queues > 1)
     {
-      dpdk_log_warn ("unsupported tx offloads requested on port %u: %U",
-                    xd->port_id, format_dpdk_tx_offload_caps, bitmap);
-      xd->port_conf.txmode.offloads ^= bitmap;
+      if (xd->conf.disable_rss == 0)
+       {
+         conf.rxmode.mq_mode = RTE_ETH_MQ_RX_RSS;
+         conf.rx_adv_conf.rss_conf.rss_hf = xd->conf.rss_hf;
+       }
     }
 
-  bitmap = xd->port_conf.rxmode.offloads & ~dev_info.rx_offload_capa;
-  if (bitmap)
+#if RTE_VERSION < RTE_VERSION_NUM(21, 11, 0, 0)
+  if (rxo & DEV_RX_OFFLOAD_JUMBO_FRAME)
+    {
+      conf.rxmode.max_rx_pkt_len = dev_info.max_rx_pktlen;
+      xd->max_supported_frame_size = dev_info.max_rx_pktlen;
+      mtu = xd->max_supported_frame_size - xd->driver_frame_overhead;
+    }
+  else
     {
-      dpdk_log_warn ("unsupported rx offloads requested on port %u: %U",
-                    xd->port_id, format_dpdk_rx_offload_caps, bitmap);
-      xd->port_conf.rxmode.offloads ^= bitmap;
+      xd->max_supported_frame_size =
+       clib_min (1500 + xd->driver_frame_overhead, buf_sz);
     }
+#else
+  if (xd->conf.disable_multi_seg)
+    xd->max_supported_frame_size = clib_min (dev_info.max_rx_pktlen, buf_sz);
+  else
+    xd->max_supported_frame_size = dev_info.max_rx_pktlen;
+#endif
+
+  max_frame_size = clib_min (xd->max_supported_frame_size,
+                            ethernet_main.default_mtu + hi->frame_overhead);
 
-  rv = rte_eth_dev_configure (xd->port_id, xd->rx_q_used,
-                             xd->tx_q_used, &xd->port_conf);
+#if RTE_VERSION >= RTE_VERSION_NUM(21, 11, 0, 0)
+  conf.rxmode.mtu = max_frame_size - xd->driver_frame_overhead;
+#endif
 
-  if (rv < 0)
+retry:
+  rv = rte_eth_dev_configure (xd->port_id, xd->conf.n_rx_queues,
+                             xd->conf.n_tx_queues, &conf);
+  if (rv < 0 && conf.intr_conf.rxq)
     {
-      dpdk_device_error (xd, "rte_eth_dev_configure", rv);
-      goto error;
+      conf.intr_conf.rxq = 0;
+      goto retry;
     }
 
-  vec_validate_aligned (xd->tx_queues, xd->tx_q_used - 1,
+#if RTE_VERSION < RTE_VERSION_NUM(21, 11, 0, 0)
+  rte_eth_dev_set_mtu (xd->port_id,
+                      max_frame_size - xd->driver_frame_overhead);
+#endif
+
+  hi->max_frame_size = 0;
+  vnet_hw_interface_set_max_frame_size (vnm, xd->hw_if_index, max_frame_size);
+  dpdk_log_debug ("[%u] max_frame_size %u max max_frame_size %u "
+                 "driver_frame_overhead %u",
+                 xd->port_id, hi->max_frame_size,
+                 xd->max_supported_frame_size, xd->driver_frame_overhead);
+
+  vec_validate_aligned (xd->tx_queues, xd->conf.n_tx_queues - 1,
                        CLIB_CACHE_LINE_BYTES);
-  for (j = 0; j < xd->tx_q_used; j++)
+  for (j = 0; j < xd->conf.n_tx_queues; j++)
     {
-      rv =
-       rte_eth_tx_queue_setup (xd->port_id, j, xd->nb_tx_desc,
-                               xd->cpu_socket, &xd->tx_conf);
+      rv = rte_eth_tx_queue_setup (xd->port_id, j, xd->conf.n_tx_desc,
+                                  xd->cpu_socket, 0);
 
       /* retry with any other CPU socket */
       if (rv < 0)
-       rv =
-         rte_eth_tx_queue_setup (xd->port_id, j,
-                                 xd->nb_tx_desc, SOCKET_ID_ANY,
-                                 &xd->tx_conf);
+       rv = rte_eth_tx_queue_setup (xd->port_id, j, xd->conf.n_tx_desc,
+                                    SOCKET_ID_ANY, 0);
       if (rv < 0)
        dpdk_device_error (xd, "rte_eth_tx_queue_setup", rv);
 
-      if (xd->tx_q_used < tm->n_vlib_mains)
-       clib_spinlock_init (&vec_elt (xd->tx_queues, j).lock);
+      clib_spinlock_init (&vec_elt (xd->tx_queues, j).lock);
     }
 
-  vec_validate_aligned (xd->rx_queues, xd->rx_q_used - 1,
+  vec_validate_aligned (xd->rx_queues, xd->conf.n_rx_queues - 1,
                        CLIB_CACHE_LINE_BYTES);
-  for (j = 0; j < xd->rx_q_used; j++)
+
+  for (j = 0; j < xd->conf.n_rx_queues; j++)
     {
       dpdk_rx_queue_t *rxq = vec_elt_at_index (xd->rx_queues, j);
       u8 bpidx = vlib_buffer_pool_get_default_for_numa (
@@ -130,12 +230,12 @@ dpdk_device_setup (dpdk_device_t * xd)
       vlib_buffer_pool_t *bp = vlib_get_buffer_pool (vm, bpidx);
       struct rte_mempool *mp = dpdk_mempool_by_buffer_pool_index[bpidx];
 
-      rv = rte_eth_rx_queue_setup (xd->port_id, j, xd->nb_rx_desc,
+      rv = rte_eth_rx_queue_setup (xd->port_id, j, xd->conf.n_rx_desc,
                                   xd->cpu_socket, 0, mp);
 
       /* retry with any other CPU socket */
       if (rv < 0)
-       rv = rte_eth_rx_queue_setup (xd->port_id, j, xd->nb_rx_desc,
+       rv = rte_eth_rx_queue_setup (xd->port_id, j, xd->conf.n_rx_desc,
                                     SOCKET_ID_ANY, 0, mp);
 
       rxq->buffer_pool_index = bp->index;
@@ -147,7 +247,40 @@ dpdk_device_setup (dpdk_device_t * xd)
   if (vec_len (xd->errors))
     goto error;
 
-  rte_eth_dev_set_mtu (xd->port_id, hi->max_packet_bytes);
+  xd->buffer_flags =
+    (VLIB_BUFFER_TOTAL_LENGTH_VALID | VLIB_BUFFER_EXT_HDR_VALID);
+
+  if ((rxo & (RTE_ETH_RX_OFFLOAD_TCP_CKSUM | RTE_ETH_RX_OFFLOAD_UDP_CKSUM)) ==
+      (RTE_ETH_RX_OFFLOAD_TCP_CKSUM | RTE_ETH_RX_OFFLOAD_UDP_CKSUM))
+    xd->buffer_flags |=
+      (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
+
+  dpdk_device_flag_set (xd, DPDK_DEVICE_FLAG_RX_IP4_CKSUM,
+                       rxo & RTE_ETH_RX_OFFLOAD_IPV4_CKSUM);
+  dpdk_device_flag_set (xd, DPDK_DEVICE_FLAG_MAYBE_MULTISEG,
+                       rxo & RTE_ETH_RX_OFFLOAD_SCATTER);
+  dpdk_device_flag_set (
+    xd, DPDK_DEVICE_FLAG_TX_OFFLOAD,
+    (txo & (RTE_ETH_TX_OFFLOAD_TCP_CKSUM | RTE_ETH_TX_OFFLOAD_UDP_CKSUM)) ==
+      (RTE_ETH_TX_OFFLOAD_TCP_CKSUM | RTE_ETH_TX_OFFLOAD_UDP_CKSUM));
+
+  /* unconditionally set mac filtering cap */
+  caps.val = caps.mask = VNET_HW_IF_CAP_MAC_FILTER;
+
+  ethernet_set_flags (vnm, xd->hw_if_index,
+                     ETHERNET_INTERFACE_FLAG_DEFAULT_L3);
+
+  for (int i = 0; i < ARRAY_LEN (tx_off_caps_map); i++)
+    {
+      __typeof__ (tx_off_caps_map[0]) *v = tx_off_caps_map + i;
+      caps.mask |= v->caps;
+      if ((v->offload & txo) == v->offload)
+       caps.val |= v->caps;
+    }
+
+  vnet_hw_if_change_caps (vnm, xd->hw_if_index, &caps);
+  xd->enabled_rx_off = rxo;
+  xd->enabled_tx_off = txo;
 
   if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP)
     dpdk_device_start (xd);
@@ -187,10 +320,11 @@ dpdk_setup_interrupts (dpdk_device_t *xd)
 {
   vnet_main_t *vnm = vnet_get_main ();
   vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, xd->hw_if_index);
+  int int_mode = 0;
   if (!hi)
     return;
 
-  if (!xd->port_conf.intr_conf.rxq)
+  if (!xd->conf.enable_rxq_int)
     return;
 
   /* Probe for interrupt support */
@@ -210,8 +344,8 @@ dpdk_setup_interrupts (dpdk_device_t *xd)
 
   if (xd->flags & DPDK_DEVICE_FLAG_INT_SUPPORTED)
     {
-      hi->caps |= VNET_HW_INTERFACE_CAP_SUPPORTS_INT_MODE;
-      for (int q = 0; q < xd->rx_q_used; q++)
+      int_mode = 1;
+      for (int q = 0; q < xd->conf.n_rx_queues; q++)
        {
          dpdk_rx_queue_t *rxq = vec_elt_at_index (xd->rx_queues, q);
          clib_file_t f = { 0 };
@@ -219,7 +353,7 @@ dpdk_setup_interrupts (dpdk_device_t *xd)
          if (rxq->efd < 0)
            {
              xd->flags &= ~DPDK_DEVICE_FLAG_INT_SUPPORTED;
-             hi->caps &= ~VNET_HW_INTERFACE_CAP_SUPPORTS_INT_MODE;
+             int_mode = 0;
              break;
            }
          f.read_function = dpdk_rx_read_ready;
@@ -240,6 +374,11 @@ dpdk_setup_interrupts (dpdk_device_t *xd)
            }
        }
     }
+
+  if (int_mode)
+    vnet_hw_if_set_caps (vnm, hi->hw_if_index, VNET_HW_IF_CAP_INT_MODE);
+  else
+    vnet_hw_if_unset_caps (vnm, hi->hw_if_index, VNET_HW_IF_CAP_INT_MODE);
   vnet_hw_if_update_runtime_data (vnm, xd->hw_if_index);
 }
 
@@ -259,6 +398,11 @@ dpdk_device_start (dpdk_device_t * xd)
       return;
     }
 
+  dpdk_log_debug ("[%u] RX burst function: %U", xd->port_id,
+                 format_dpdk_burst_fn, xd, VLIB_RX);
+  dpdk_log_debug ("[%u] TX burst function: %U", xd->port_id,
+                 format_dpdk_burst_fn, xd, VLIB_TX);
+
   dpdk_setup_interrupts (xd);
 
   if (xd->default_mac_address)
@@ -311,10 +455,11 @@ dpdk_port_state_callback_inline (dpdk_portid_t port_id,
   rte_eth_link_get_nowait (port_id, &link);
   u8 link_up = link.link_status;
   if (link_up)
-    dpdk_log_info ("Port %d Link Up - speed %u Mbps - %s",
-                  port_id, (unsigned) link.link_speed,
-                  (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
-                  "full-duplex" : "half-duplex");
+    dpdk_log_info ("Port %d Link Up - speed %u Mbps - %s", port_id,
+                  (unsigned) link.link_speed,
+                  (link.link_duplex == RTE_ETH_LINK_FULL_DUPLEX) ?
+                          "full-duplex" :
+                          "half-duplex");
   else
     dpdk_log_info ("Port %d Link Down\n\n", port_id);
 
@@ -337,7 +482,11 @@ dpdk_get_pci_device (const struct rte_eth_dev_info *info)
   const struct rte_bus *bus;
 
   bus = rte_bus_find_by_device (info->device);
+#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0)
+  if (bus && !strcmp (rte_bus_name (bus), "pci"))
+#else
   if (bus && !strcmp (bus->name, "pci"))
+#endif
     return RTE_DEV_TO_PCI (info->device);
   else
     return NULL;
@@ -350,7 +499,11 @@ dpdk_get_vmbus_device (const struct rte_eth_dev_info *info)
   const struct rte_bus *bus;
 
   bus = rte_bus_find_by_device (info->device);
+#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0)
+  if (bus && !strcmp (rte_bus_name (bus), "vmbus"))
+#else
   if (bus && !strcmp (bus->name, "vmbus"))
+#endif
     return container_of (info->device, struct rte_vmbus_device, device);
   else
     return NULL;