iavf: new driver using new dev infra 89/39689/22
authorDamjan Marion <damarion@cisco.com>
Tue, 17 Oct 2023 16:08:18 +0000 (16:08 +0000)
committerDamjan Marion <damarion@cisco.com>
Thu, 2 Nov 2023 16:08:57 +0000 (16:08 +0000)
Type: feature
Change-Id: I9ae0dbf28b4571a37c568b587b771f90c06f200d
Signed-off-by: Damjan Marion <damarion@cisco.com>
18 files changed:
MAINTAINERS
docs/spelling_wordlist.txt
src/plugins/dev_iavf/CMakeLists.txt [new file with mode: 0644]
src/plugins/dev_iavf/adminq.c [new file with mode: 0644]
src/plugins/dev_iavf/counters.c [new file with mode: 0644]
src/plugins/dev_iavf/format.c [new file with mode: 0644]
src/plugins/dev_iavf/iavf.c [new file with mode: 0644]
src/plugins/dev_iavf/iavf.h [new file with mode: 0644]
src/plugins/dev_iavf/iavf_desc.h [new file with mode: 0644]
src/plugins/dev_iavf/iavf_regs.h [new file with mode: 0644]
src/plugins/dev_iavf/port.c [new file with mode: 0644]
src/plugins/dev_iavf/queue.c [new file with mode: 0644]
src/plugins/dev_iavf/rx_node.c [new file with mode: 0644]
src/plugins/dev_iavf/tx_node.c [new file with mode: 0644]
src/plugins/dev_iavf/virtchnl.c [new file with mode: 0644]
src/plugins/dev_iavf/virtchnl.h [new file with mode: 0644]
src/plugins/dev_iavf/virtchnl_funcs.h [new file with mode: 0644]
src/vppinfra/ring.h

index fc7be1a..2abc3d7 100644 (file)
@@ -410,6 +410,11 @@ I: avf
 M:     Damjan Marion <damarion@cisco.com>
 F:     src/plugins/avf/
 
+Plugin - IAVF Device driver
+I:     iavf
+M:     Damjan Marion <damarion@cisco.com>
+F:     src/plugins/dev_iavf/
+
 Plugin - Dispatch Trace PCAP
 I:     dispatch-trace
 M:     Dave Barach <vpp@barachs.net>
index e1dbef2..7fec295 100644 (file)
@@ -248,6 +248,7 @@ Dest
 det
 dev
 devbind
+dev_iavf
 df
 dhcp
 dhcp
@@ -465,6 +466,7 @@ ia
 iacl
 iAcl
 iACL
+iavf
 iBGP
 ibverb
 IBverbs
diff --git a/src/plugins/dev_iavf/CMakeLists.txt b/src/plugins/dev_iavf/CMakeLists.txt
new file mode 100644 (file)
index 0000000..8fa89b7
--- /dev/null
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: Apache-2.0
+# Copyright(c) 2022 Cisco Systems, Inc.
+
+add_vpp_plugin(dev_iavf
+  SOURCES
+  adminq.c
+  counters.c
+  format.c
+  iavf.c
+  port.c
+  queue.c
+  rx_node.c
+  tx_node.c
+  virtchnl.c
+
+  MULTIARCH_SOURCES
+  rx_node.c
+  tx_node.c
+)
+
diff --git a/src/plugins/dev_iavf/adminq.c b/src/plugins/dev_iavf/adminq.c
new file mode 100644 (file)
index 0000000..982a9a9
--- /dev/null
@@ -0,0 +1,481 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <ctype.h>
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/pci.h>
+#include <vnet/dev/counters.h>
+#include <dev_iavf/iavf.h>
+#include <dev_iavf/virtchnl.h>
+#include <vnet/ethernet/ethernet.h>
+
+#define IIAVF_AQ_LARGE_BUF 512
+#define IIAVF_AQ_ATQ_LEN   4
+#define IIAVF_AQ_ARQ_LEN   16
+
+VLIB_REGISTER_LOG_CLASS (iavf_log, static) = {
+  .class_name = "iavf",
+  .subclass_name = "adminq",
+};
+
+struct iavf_adminq_dma_mem
+{
+  iavf_aq_desc_t atq[IIAVF_AQ_ATQ_LEN];
+  iavf_aq_desc_t arq[IIAVF_AQ_ARQ_LEN];
+  struct
+  {
+    u8 data[IIAVF_AQ_BUF_SIZE];
+  } atq_bufs[IIAVF_AQ_ATQ_LEN];
+  struct
+  {
+    u8 data[IIAVF_AQ_BUF_SIZE];
+  } arq_bufs[IIAVF_AQ_ARQ_LEN];
+};
+
+static_always_inline int
+iavf_aq_desc_is_done (iavf_aq_desc_t *d)
+{
+  iavf_aq_desc_flags_t flags;
+  flags.as_u16 = __atomic_load_n (&d->flags.as_u16, __ATOMIC_ACQUIRE);
+  return flags.dd;
+}
+
+static u8 *
+format_iavf_aq_desc_flags (u8 *s, va_list *args)
+{
+  iavf_aq_desc_flags_t f = va_arg (*args, iavf_aq_desc_flags_t);
+  int i = 0;
+
+#define _(n, v)                                                               \
+  if (f.v)                                                                    \
+    {                                                                         \
+      char str[] = #v, *sp = str;                                             \
+      if (i++)                                                                \
+       {                                                                     \
+         vec_add1 (s, ',');                                                  \
+         vec_add1 (s, ' ');                                                  \
+       }                                                                     \
+      while (sp[0])                                                           \
+       vec_add1 (s, (u8) toupper (sp++[0]));                                 \
+    }
+  foreach_iavf_aq_desc_flag
+#undef _
+    return s;
+}
+
+static u8 *
+format_iavf_aq_desc_retval (u8 *s, va_list *args)
+{
+  iavf_aq_desc_retval_t rv = va_arg (*args, u32);
+
+  char *retvals[] = {
+#define _(a, b) [a] = #b,
+    foreach_iavf_aq_desc_retval
+#undef _
+  };
+
+  if (rv >= ARRAY_LEN (retvals) || retvals[rv] == 0)
+    return format (s, "UNKNOWN(%d)", rv);
+
+  return format (s, "%s", retvals[rv]);
+}
+
+static u8 *
+format_iavf_aq_desc (u8 *s, va_list *args)
+{
+  iavf_aq_desc_t *d = va_arg (*args, iavf_aq_desc_t *);
+  u32 indent = format_get_indent (s);
+
+  s = format (s, "opcode 0x%04x datalen %u retval %U (%u) flags %U", d->opcode,
+             d->datalen, format_iavf_aq_desc_retval, d->retval, d->retval,
+             format_iavf_aq_desc_flags, d->flags);
+
+  if (d->opcode == IIAVF_AQ_DESC_OP_SEND_TO_PF ||
+      d->opcode == IIAVF_AQ_DESC_OP_MESSAGE_FROM_PF)
+    {
+      s =
+       format (s, "\n%Uv_opcode %U (%u) v_retval %U (%d) buf_dma_addr 0x%lx",
+               format_white_space, indent, format_virtchnl_op_name,
+               d->v_opcode, d->v_opcode, format_virtchnl_status, d->v_retval,
+               d->v_retval, (uword) d->param2 << 32 | d->param3);
+    }
+  else
+    {
+      s = format (
+       s, "\n%Ucookie_hi 0x%x cookie_lo 0x%x params %08x %08x %08x %08x",
+       format_white_space, indent, d->cookie_hi, d->cookie_lo, d->param0,
+       d->param1, d->param2, d->param3);
+    }
+  return s;
+}
+
+vnet_dev_rv_t
+iavf_aq_alloc (vlib_main_t *vm, vnet_dev_t *dev)
+{
+  iavf_device_t *ad = vnet_dev_get_data (dev);
+  return vnet_dev_dma_mem_alloc (vm, dev, sizeof (iavf_adminq_dma_mem_t), 0,
+                                (void **) &ad->aq_mem);
+}
+
+void
+iavf_aq_free (vlib_main_t *vm, vnet_dev_t *dev)
+{
+  iavf_device_t *ad = vnet_dev_get_data (dev);
+  vnet_dev_dma_mem_free (vm, dev, ad->aq_mem);
+}
+
+static void
+iavf_aq_arq_slot_init (vlib_main_t *vm, vnet_dev_t *dev, u16 slot)
+{
+  iavf_device_t *ad = vnet_dev_get_data (dev);
+  u64 pa = vnet_dev_get_dma_addr (vm, dev, ad->aq_mem->arq_bufs + slot);
+  ad->aq_mem->arq[slot] = (iavf_aq_desc_t){
+    .flags.buf = 1,
+    .flags.lb = IIAVF_AQ_BUF_SIZE > IIAVF_AQ_LARGE_BUF,
+    .datalen = sizeof (ad->aq_mem->arq_bufs[0].data),
+    .addr_hi = (u32) (pa >> 32),
+    .addr_lo = (u32) pa,
+  };
+}
+
+static void
+iavf_aq_poll (vlib_main_t *vm, vnet_dev_t *dev)
+{
+  iavf_device_t *ad = vnet_dev_get_data (dev);
+  iavf_aq_desc_t *d;
+  u8 *b;
+
+  while (iavf_aq_arq_next_acq (vm, dev, &d, &b, 0))
+    {
+
+      log_debug (dev, "poll[%u] flags %x %U op %u v_op %u", ad->arq_next_slot,
+                d->flags.as_u16, format_iavf_aq_desc_flags, d->flags,
+                d->opcode, d->v_opcode);
+      if ((d->datalen != sizeof (virtchnl_pf_event_t)) ||
+         ((d->flags.buf) == 0))
+       {
+         log_err (dev, "event message error");
+       }
+
+      vec_add1 (ad->events, *(virtchnl_pf_event_t *) b);
+      iavf_aq_arq_next_rel (vm, dev);
+    }
+
+  if (vec_len (ad->events))
+    {
+      virtchnl_pf_event_t *e;
+      char *virtchnl_event_names[] = {
+#define _(v, n) [v] = #n,
+       foreach_virtchnl_event_code
+#undef _
+      };
+
+      vec_foreach (e, ad->events)
+       {
+         log_debug (dev, "event %s (%u) sev %d",
+                    virtchnl_event_names[e->event], e->event, e->severity);
+
+         if (e->event == VIRTCHNL_EVENT_LINK_CHANGE)
+           {
+             vnet_dev_port_state_changes_t changes = {};
+             vnet_dev_port_t *port = vnet_dev_get_port_by_id (dev, 0);
+
+             if (port)
+               {
+                 iavf_port_t *ap = vnet_dev_get_port_data (port);
+                 int link_up;
+                 u32 speed = 0;
+
+                 if (ap->vf_cap_flags & VIRTCHNL_VF_CAP_ADV_LINK_SPEED)
+                   {
+                     link_up = e->event_data.link_event_adv.link_status;
+                     speed = e->event_data.link_event_adv.link_speed;
+                   }
+                 else
+                   {
+                     const u32 speed_table[8] = { 100,   1000,  10000, 40000,
+                                                  20000, 25000, 2500,  5000 };
+
+                     link_up = e->event_data.link_event.link_status;
+                     speed = e->event_data.link_event.link_speed;
+
+                     if (count_set_bits (speed) == 1 && speed &&
+                         pow2_mask (8))
+                       speed = speed_table[get_lowest_set_bit_index (speed)];
+                     else
+                       {
+                         if (link_up)
+                           log_warn (dev,
+                                     "unsupported link speed value "
+                                     "received (0x%x)",
+                                     speed);
+                         speed = 0;
+                       }
+                   }
+
+                 log_debug (dev, "LINK_CHANGE speed %u state %u", speed,
+                            link_up);
+
+                 if (port->link_up != link_up)
+                   {
+                     changes.change.link_state = 1;
+                     changes.link_state = link_up;
+                     log_debug (dev, "link state changed to %s",
+                                link_up ? "up" : "down");
+                   }
+
+                 if (port->speed != speed * 1000)
+                   {
+                     changes.change.link_speed = 1;
+                     changes.link_speed = speed * 1000;
+                     log_debug (dev, "link speed changed to %u Mbps", speed);
+                   }
+
+                 if (changes.change.any)
+                   vnet_dev_port_state_change (vm, port, changes);
+               }
+           }
+       }
+      vec_reset_length (ad->events);
+    }
+}
+
+static inline void
+iavf_irq_0_set_state (iavf_device_t *ad, int enable)
+{
+  u32 dyn_ctl0 = 0, icr0_ena = 0;
+
+  dyn_ctl0 |= (3 << 3); /* 11b = No ITR update */
+
+  iavf_reg_write (ad, AVFINT_ICR0_ENA1, icr0_ena);
+  iavf_reg_write (ad, AVFINT_DYN_CTL0, dyn_ctl0);
+  iavf_reg_flush (ad);
+
+  if (!enable)
+    return;
+
+  dyn_ctl0 = 0;
+  icr0_ena = 0;
+
+  icr0_ena |= (1 << 30); /* [30] Admin Queue Enable */
+
+  dyn_ctl0 |= (1 << 0); /* [0] Interrupt Enable */
+  dyn_ctl0 |= (1 << 1); /* [1] Clear PBA */
+  dyn_ctl0 |= (2 << 3); /* [4:3] ITR Index, 11b = No ITR update */
+  dyn_ctl0 |= ((IAVF_ITR_INT / 2) << 5); /* [16:5] ITR Interval in 2us steps */
+
+  iavf_reg_write (ad, AVFINT_ICR0_ENA1, icr0_ena);
+  iavf_reg_write (ad, AVFINT_DYN_CTL0, dyn_ctl0);
+  iavf_reg_flush (ad);
+}
+
+static void
+iavf_adminq_msix_handler (vlib_main_t *vm, vnet_dev_t *dev, u16 line)
+{
+  log_debug (dev, "MSI-X interrupt 0 received");
+  vnet_dev_process_call_op_no_wait (vm, dev, iavf_aq_poll);
+}
+
+static void
+iavf_adminq_intx_handler (vlib_main_t *vm, vnet_dev_t *dev)
+{
+  iavf_adminq_msix_handler (vm, dev, 0);
+}
+
+void
+iavf_aq_init (vlib_main_t *vm, vnet_dev_t *dev)
+{
+  iavf_device_t *ad = vnet_dev_get_data (dev);
+  uword pa;
+  u32 len;
+
+  /* disable both tx and rx adminq queue */
+  iavf_reg_write (ad, IAVF_ATQLEN, 0);
+  iavf_reg_write (ad, IAVF_ARQLEN, 0);
+
+  len = IIAVF_AQ_ATQ_LEN;
+  pa = vnet_dev_get_dma_addr (vm, dev, &ad->aq_mem->atq);
+  iavf_reg_write (ad, IAVF_ATQT, 0);                   /* Tail */
+  iavf_reg_write (ad, IAVF_ATQH, 0);                   /* Head */
+  iavf_reg_write (ad, IAVF_ATQBAL, (u32) pa);          /* Base Address Low */
+  iavf_reg_write (ad, IAVF_ATQBAH, (u32) (pa >> 32));  /* Base Address High */
+  iavf_reg_write (ad, IAVF_ATQLEN, len | (1ULL << 31)); /* len & ena */
+
+  len = IIAVF_AQ_ARQ_LEN;
+  pa = vnet_dev_get_dma_addr (vm, dev, ad->aq_mem->arq);
+  iavf_reg_write (ad, IAVF_ARQT, 0);                   /* Tail */
+  iavf_reg_write (ad, IAVF_ARQH, 0);                   /* Head */
+  iavf_reg_write (ad, IAVF_ARQBAL, (u32) pa);          /* Base Address Low */
+  iavf_reg_write (ad, IAVF_ARQBAH, (u32) (pa >> 32));  /* Base Address High */
+  iavf_reg_write (ad, IAVF_ARQLEN, len | (1ULL << 31)); /* len & ena */
+
+  for (int i = 0; i < len; i++)
+    iavf_aq_arq_slot_init (vm, dev, i);
+  iavf_reg_write (ad, IAVF_ARQT, len - 1); /* Tail */
+
+  ad->atq_next_slot = 0;
+  ad->arq_next_slot = 0;
+  ad->adminq_active = 1;
+}
+
+void
+iavf_aq_poll_on (vlib_main_t *vm, vnet_dev_t *dev)
+{
+  iavf_device_t *ad = vnet_dev_get_data (dev);
+
+  vnet_dev_poll_dev_add (vm, dev, IIAVF_AQ_POLL_INTERVAL, iavf_aq_poll);
+
+  if (vnet_dev_get_pci_n_msix_interrupts (dev) > 0)
+    {
+      vnet_dev_pci_msix_add_handler (vm, dev, iavf_adminq_msix_handler, 0, 1);
+      vnet_dev_pci_msix_enable (vm, dev, 0, 1);
+    }
+  else
+    vnet_dev_pci_intx_add_handler (vm, dev, iavf_adminq_intx_handler);
+
+  iavf_irq_0_set_state (ad, 1);
+}
+
+void
+iavf_aq_poll_off (vlib_main_t *vm, vnet_dev_t *dev)
+{
+  iavf_device_t *ad = vnet_dev_get_data (dev);
+
+  iavf_irq_0_set_state (ad, 0);
+
+  vnet_dev_poll_dev_remove (vm, dev, iavf_aq_poll);
+
+  if (vnet_dev_get_pci_n_msix_interrupts (dev) > 0)
+    {
+      vnet_dev_pci_msix_disable (vm, dev, 0, 1);
+      vnet_dev_pci_msix_remove_handler (vm, dev, 0, 1);
+    }
+  else
+    vnet_dev_pci_intx_remove_handler (vm, dev);
+}
+
+vnet_dev_rv_t
+iavf_aq_atq_enq (vlib_main_t *vm, vnet_dev_t *dev, iavf_aq_desc_t *desc,
+                const u8 *data, u16 len, f64 timeout)
+{
+  iavf_device_t *ad = vnet_dev_get_data (dev);
+  iavf_aq_desc_t *d = ad->aq_mem->atq + ad->atq_next_slot;
+  u8 *buf = ad->aq_mem->atq_bufs[ad->atq_next_slot].data;
+
+  ASSERT (len <= IIAVF_AQ_BUF_SIZE);
+
+  *d = *desc;
+
+  if (len)
+    {
+      u64 pa = vnet_dev_get_dma_addr (vm, dev, buf);
+      d->datalen = len;
+      d->addr_hi = (u32) (pa >> 32);
+      d->addr_lo = (u32) pa;
+      d->flags.buf = 1;
+      d->flags.rd = 1;
+      d->flags.lb = len > IIAVF_AQ_LARGE_BUF;
+      clib_memcpy_fast (buf, data, len);
+    }
+
+  log_debug (dev, "slot %u\n  %U", ad->atq_next_slot, format_iavf_aq_desc, d);
+
+  ad->atq_next_slot = (ad->atq_next_slot + 1) % IIAVF_AQ_ATQ_LEN;
+  iavf_reg_write (ad, IAVF_ATQT, ad->atq_next_slot);
+  iavf_reg_flush (ad);
+
+  if (timeout > 0)
+    {
+      f64 suspend_time = timeout / 62;
+      f64 t0 = vlib_time_now (vm);
+      iavf_aq_desc_flags_t flags;
+
+      while (1)
+       {
+         flags.as_u16 = __atomic_load_n (&d->flags.as_u16, __ATOMIC_ACQUIRE);
+
+         if (flags.err)
+           {
+             log_err (dev, "adminq enqueue error [opcode 0x%x, retval %d]",
+                      d->opcode, d->retval);
+             return VNET_DEV_ERR_BUG;
+           }
+
+         if (flags.dd && flags.cmp)
+           return VNET_DEV_OK;
+
+         if (vlib_time_now (vm) - t0 > timeout)
+           {
+             log_err (dev, "adminq enqueue timeout [opcode 0x%x]", d->opcode);
+             return VNET_DEV_ERR_TIMEOUT;
+           }
+
+         vlib_process_suspend (vm, suspend_time);
+         suspend_time *= 2;
+       }
+    }
+
+  return VNET_DEV_OK;
+}
+
+void
+iavf_aq_deinit (vlib_main_t *vm, vnet_dev_t *dev)
+{
+  iavf_device_t *ad = vnet_dev_get_data (dev);
+  if (ad->adminq_active)
+    {
+      iavf_aq_desc_t d = {
+       .opcode = IIAVF_AQ_DESC_OP_QUEUE_SHUTDOWN,
+       .driver_unloading = 1,
+       .flags = { .si = 1 },
+      };
+      log_debug (dev, "adminq queue shutdown");
+      iavf_aq_atq_enq (vm, dev, &d, 0, 0, 0);
+      ad->adminq_active = 0;
+    }
+}
+
+int
+iavf_aq_arq_next_acq (vlib_main_t *vm, vnet_dev_t *dev, iavf_aq_desc_t **dp,
+                     u8 **bp, f64 timeout)
+{
+  iavf_device_t *ad = vnet_dev_get_data (dev);
+  iavf_aq_desc_t *d = ad->aq_mem->arq + ad->arq_next_slot;
+
+  if (timeout)
+    {
+      f64 suspend_time = timeout / 62;
+      f64 t0 = vlib_time_now (vm);
+
+      while (!iavf_aq_desc_is_done (d))
+       {
+         if (vlib_time_now (vm) - t0 > timeout)
+           return 0;
+
+         vlib_process_suspend (vm, suspend_time);
+
+         suspend_time *= 2;
+       }
+    }
+  else if (!iavf_aq_desc_is_done (d))
+    return 0;
+
+  log_debug (dev, "arq desc acquired in slot %u\n  %U", ad->arq_next_slot,
+            format_iavf_aq_desc, d);
+  *dp = d;
+  *bp = ad->aq_mem->arq_bufs[ad->arq_next_slot].data;
+  return 1;
+}
+
+void
+iavf_aq_arq_next_rel (vlib_main_t *vm, vnet_dev_t *dev)
+{
+  iavf_device_t *ad = vnet_dev_get_data (dev);
+  ASSERT (iavf_aq_desc_is_done (ad->aq_mem->arq + ad->arq_next_slot));
+  iavf_aq_arq_slot_init (vm, dev, ad->arq_next_slot);
+  iavf_reg_write (ad, IAVF_ARQT, ad->arq_next_slot);
+  iavf_reg_flush (ad);
+  ad->arq_next_slot = (ad->arq_next_slot + 1) % IIAVF_AQ_ARQ_LEN;
+}
diff --git a/src/plugins/dev_iavf/counters.c b/src/plugins/dev_iavf/counters.c
new file mode 100644 (file)
index 0000000..6dcd011
--- /dev/null
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/pci.h>
+#include <vnet/dev/counters.h>
+#include <dev_iavf/iavf.h>
+#include <dev_iavf/virtchnl.h>
+#include <dev_iavf/virtchnl_funcs.h>
+
+VLIB_REGISTER_LOG_CLASS (iavf_log, static) = {
+  .class_name = "iavf",
+  .subclass_name = "counters",
+};
+
+typedef enum
+{
+  IIAVF_PORT_CTR_RX_BYTES,
+  IIAVF_PORT_CTR_TX_BYTES,
+  IIAVF_PORT_CTR_RX_PACKETS,
+  IIAVF_PORT_CTR_TX_PACKETS,
+  IIAVF_PORT_CTR_RX_DROPS,
+  IIAVF_PORT_CTR_TX_DROPS,
+  IIAVF_PORT_CTR_RX_UCAST,
+  IIAVF_PORT_CTR_TX_UCAST,
+  IIAVF_PORT_CTR_RX_MCAST,
+  IIAVF_PORT_CTR_TX_MCAST,
+  IIAVF_PORT_CTR_RX_BCAST,
+  IIAVF_PORT_CTR_TX_BCAST,
+  IIAVF_PORT_CTR_RX_UNKNOWN_PROTOCOL,
+  IIAVF_PORT_CTR_TX_ERRORS,
+} iavf_port_counter_id_t;
+
+vnet_dev_counter_t iavf_port_counters[] = {
+  VNET_DEV_CTR_RX_BYTES (IIAVF_PORT_CTR_RX_BYTES),
+  VNET_DEV_CTR_RX_PACKETS (IIAVF_PORT_CTR_RX_PACKETS),
+  VNET_DEV_CTR_RX_DROPS (IIAVF_PORT_CTR_RX_DROPS),
+  VNET_DEV_CTR_VENDOR (IIAVF_PORT_CTR_RX_UCAST, RX, PACKETS, "unicast"),
+  VNET_DEV_CTR_VENDOR (IIAVF_PORT_CTR_RX_MCAST, RX, PACKETS, "multicast"),
+  VNET_DEV_CTR_VENDOR (IIAVF_PORT_CTR_RX_BCAST, RX, PACKETS, "broadcast"),
+  VNET_DEV_CTR_VENDOR (IIAVF_PORT_CTR_RX_UNKNOWN_PROTOCOL, RX, PACKETS,
+                      "unknown protocol"),
+
+  VNET_DEV_CTR_TX_BYTES (IIAVF_PORT_CTR_TX_BYTES),
+  VNET_DEV_CTR_TX_PACKETS (IIAVF_PORT_CTR_TX_PACKETS),
+  VNET_DEV_CTR_TX_DROPS (IIAVF_PORT_CTR_TX_DROPS),
+  VNET_DEV_CTR_VENDOR (IIAVF_PORT_CTR_TX_UCAST, TX, PACKETS, "unicast"),
+  VNET_DEV_CTR_VENDOR (IIAVF_PORT_CTR_TX_MCAST, TX, PACKETS, "multicast"),
+  VNET_DEV_CTR_VENDOR (IIAVF_PORT_CTR_TX_BCAST, TX, PACKETS, "broadcast"),
+  VNET_DEV_CTR_VENDOR (IIAVF_PORT_CTR_TX_ERRORS, TX, PACKETS, "errors"),
+};
+
+void
+iavf_port_add_counters (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+  vnet_dev_port_add_counters (vm, port, iavf_port_counters,
+                             ARRAY_LEN (iavf_port_counters));
+}
+
+void
+iavf_port_poll_stats (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+  vnet_dev_rv_t rv;
+  vnet_dev_t *dev = port->dev;
+  virtchnl_eth_stats_t stats;
+  iavf_port_t *ap = vnet_dev_get_port_data (port);
+  virtchnl_queue_select_t qs = { .vsi_id = ap->vsi_id };
+
+  rv = iavf_vc_op_get_stats (vm, dev, &qs, &stats);
+
+  if (rv != VNET_DEV_OK)
+    return;
+
+  foreach_vnet_dev_counter (c, port->counter_main)
+    {
+      switch (c->user_data)
+       {
+       case IIAVF_PORT_CTR_RX_BYTES:
+         vnet_dev_counter_value_update (vm, c, stats.rx_bytes);
+         break;
+       case IIAVF_PORT_CTR_TX_BYTES:
+         vnet_dev_counter_value_update (vm, c, stats.tx_bytes);
+         break;
+       case IIAVF_PORT_CTR_RX_PACKETS:
+         vnet_dev_counter_value_update (
+           vm, c, stats.rx_unicast + stats.rx_broadcast + stats.rx_multicast);
+         break;
+       case IIAVF_PORT_CTR_TX_PACKETS:
+         vnet_dev_counter_value_update (
+           vm, c, stats.tx_unicast + stats.tx_broadcast + stats.tx_multicast);
+         break;
+       case IIAVF_PORT_CTR_RX_DROPS:
+         vnet_dev_counter_value_update (vm, c, stats.rx_discards);
+         break;
+       case IIAVF_PORT_CTR_TX_DROPS:
+         vnet_dev_counter_value_update (vm, c, stats.tx_discards);
+         break;
+       case IIAVF_PORT_CTR_RX_UCAST:
+         vnet_dev_counter_value_update (vm, c, stats.rx_unicast);
+         break;
+       case IIAVF_PORT_CTR_TX_UCAST:
+         vnet_dev_counter_value_update (vm, c, stats.tx_unicast);
+         break;
+       case IIAVF_PORT_CTR_RX_MCAST:
+         vnet_dev_counter_value_update (vm, c, stats.rx_multicast);
+         break;
+       case IIAVF_PORT_CTR_TX_MCAST:
+         vnet_dev_counter_value_update (vm, c, stats.tx_multicast);
+         break;
+       case IIAVF_PORT_CTR_RX_BCAST:
+         vnet_dev_counter_value_update (vm, c, stats.rx_broadcast);
+         break;
+       case IIAVF_PORT_CTR_TX_BCAST:
+         vnet_dev_counter_value_update (vm, c, stats.tx_broadcast);
+         break;
+       case IIAVF_PORT_CTR_RX_UNKNOWN_PROTOCOL:
+         vnet_dev_counter_value_update (vm, c, stats.rx_unknown_protocol);
+         break;
+       case IIAVF_PORT_CTR_TX_ERRORS:
+         vnet_dev_counter_value_update (vm, c, stats.tx_errors);
+         break;
+       default:
+         ASSERT (0);
+       }
+    }
+}
diff --git a/src/plugins/dev_iavf/format.c b/src/plugins/dev_iavf/format.c
new file mode 100644 (file)
index 0000000..bc84cc3
--- /dev/null
@@ -0,0 +1,133 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/pci.h>
+#include <vnet/dev/counters.h>
+#include <dev_iavf/iavf.h>
+#include <dev_iavf/virtchnl.h>
+
+u8 *
+format_iavf_vf_cap_flags (u8 *s, va_list *args)
+{
+  u32 flags = va_arg (*args, u32);
+  int not_first = 0;
+
+  char *strs[32] = {
+#define _(a, b, c) [a] = c,
+    foreach_iavf_vf_cap_flag
+#undef _
+  };
+
+  for (int i = 0; i < 32; i++)
+    {
+      if ((flags & (1 << i)) == 0)
+       continue;
+      if (not_first)
+       s = format (s, " ");
+      if (strs[i])
+       s = format (s, "%s", strs[i]);
+      else
+       s = format (s, "unknown(%u)", i);
+      not_first = 1;
+    }
+  return s;
+}
+
+u8 *
+format_iavf_rx_desc_qw1 (u8 *s, va_list *args)
+{
+  iavf_rx_desc_qw1_t *qw1 = va_arg (*args, iavf_rx_desc_qw1_t *);
+  s = format (s, "len %u ptype %u ubmcast %u fltstat %u flags", qw1->length,
+             qw1->ptype, qw1->ubmcast, qw1->fltstat);
+
+#define _(f)                                                                  \
+  if (qw1->f)                                                                 \
+  s = format (s, " " #f)
+
+  _ (dd);
+  _ (eop);
+  _ (l2tag1p);
+  _ (l3l4p);
+  _ (crcp);
+  _ (flm);
+  _ (lpbk);
+  _ (ipv6exadd);
+  _ (int_udp_0);
+  _ (ipe);
+  _ (l4e);
+  _ (oversize);
+#undef _
+  return s;
+}
+
+u8 *
+format_iavf_rx_trace (u8 *s, va_list *args)
+{
+  vlib_main_t *vm = va_arg (*args, vlib_main_t *);
+  vlib_node_t *node = va_arg (*args, vlib_node_t *);
+  iavf_rx_trace_t *t = va_arg (*args, iavf_rx_trace_t *);
+  iavf_rx_desc_qw1_t *qw1;
+  vnet_main_t *vnm = vnet_get_main ();
+  vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, t->hw_if_index);
+  u32 indent = format_get_indent (s);
+  int i = 0;
+
+  s = format (s, "avf: %v (%d) qid %u next-node %U flow-id %u", hi->name,
+             t->hw_if_index, t->qid, format_vlib_next_node_name, vm,
+             node->index, t->next_index, t->flow_id);
+
+  qw1 = (iavf_rx_desc_qw1_t *) t->qw1s;
+
+  do
+    s = format (s, "\n%Udesc %u: %U", format_white_space, indent + 2, i,
+               format_iavf_rx_desc_qw1, qw1 + i);
+  while ((qw1[i++].eop) == 0 && i < IAVF_RX_MAX_DESC_IN_CHAIN);
+
+  return s;
+}
+
+u8 *
+format_iavf_port_status (u8 *s, va_list *args)
+{
+  vnet_dev_format_args_t __clib_unused *a =
+    va_arg (*args, vnet_dev_format_args_t *);
+  vnet_dev_port_t *port = va_arg (*args, vnet_dev_port_t *);
+  iavf_port_t *ap = vnet_dev_get_port_data (port);
+  u32 indent = format_get_indent (s);
+
+  s = format (s, "caps: %U", format_iavf_vf_cap_flags, ap->vf_cap_flags);
+  s = format (s, "\n%Uvsi is %u, RSS key size is %u, RSS lut size is %u",
+             format_white_space, indent, ap->vsi_id, ap->rss_key_size,
+             ap->rss_lut_size);
+  s = format (s, "\n%Uflow offload ", format_white_space, indent);
+  if (ap->flow_offload)
+    s = format (s, "enabled, %u flows configured",
+               vec_len (ap->flow_lookup_entries));
+  else
+    s = format (s, "disabled");
+  return s;
+}
+
+u8 *
+format_iavf_log (u8 *s, va_list *args)
+{
+  vnet_dev_t *dev = va_arg (*args, vnet_dev_t *);
+  char *func = va_arg (*args, char *);
+
+  if (dev)
+    s = format (s, "%U", format_vnet_dev_addr, dev);
+  if (dev && func)
+    vec_add1 (s, ' ');
+  if (func)
+    {
+      if (strncmp (func, "iavf_", 5) == 0)
+       func += 5;
+      s = format (s, "%s", func);
+    }
+  vec_add1 (s, ':');
+  vec_add1 (s, ' ');
+  return s;
+}
diff --git a/src/plugins/dev_iavf/iavf.c b/src/plugins/dev_iavf/iavf.c
new file mode 100644 (file)
index 0000000..a8b108c
--- /dev/null
@@ -0,0 +1,271 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/pci.h>
+#include <vnet/dev/counters.h>
+#include <vppinfra/ring.h>
+#include <dev_iavf/iavf.h>
+#include <dev_iavf/virtchnl.h>
+#include <dev_iavf/virtchnl_funcs.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+
+VLIB_REGISTER_LOG_CLASS (iavf_log, static) = {
+  .class_name = "iavf",
+  .subclass_name = "init",
+};
+
+static const u32 driver_cap_flags =
+  /**/ VIRTCHNL_VF_CAP_ADV_LINK_SPEED |
+  /**/ VIRTCHNL_VF_LARGE_NUM_QPAIRS |
+  /**/ VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF |
+  /**/ VIRTCHNL_VF_OFFLOAD_FDIR_PF |
+  /**/ VIRTCHNL_VF_OFFLOAD_L2 |
+  /**/ VIRTCHNL_VF_OFFLOAD_REQ_QUEUES |
+  /**/ VIRTCHNL_VF_OFFLOAD_RSS_PF |
+  /**/ VIRTCHNL_VF_OFFLOAD_RX_POLLING |
+  /**/ VIRTCHNL_VF_OFFLOAD_VLAN |
+  /**/ VIRTCHNL_VF_OFFLOAD_VLAN_V2 |
+  /**/ VIRTCHNL_VF_OFFLOAD_WB_ON_ITR |
+  /**/ 0;
+
+static const virtchnl_version_info_t driver_virtchnl_version = {
+  .major = VIRTCHNL_VERSION_MAJOR,
+  .minor = VIRTCHNL_VERSION_MINOR,
+};
+
+#define _(f, n, s, d)                                                         \
+  { .name = #n, .desc = d, .severity = VL_COUNTER_SEVERITY_##s },
+
+vlib_error_desc_t iavf_rx_node_counters[] = { foreach_iavf_rx_node_counter };
+vlib_error_desc_t iavf_tx_node_counters[] = { foreach_iavf_tx_node_counter };
+#undef _
+
+vnet_dev_node_t iavf_rx_node = {
+  .error_counters = iavf_rx_node_counters,
+  .n_error_counters = ARRAY_LEN (iavf_rx_node_counters),
+  .format_trace = format_iavf_rx_trace,
+};
+
+vnet_dev_node_t iavf_tx_node = {
+  .error_counters = iavf_tx_node_counters,
+  .n_error_counters = ARRAY_LEN (iavf_tx_node_counters),
+};
+
+static struct
+{
+  u16 device_id;
+  char *desc;
+} iavf_dev_types[] = {
+  { 0x1889, "Intel(R) Adaptive Virtual Function" },
+  { 0x154c, "Intel(R) X710 Virtual Function" },
+  { 0x37cd, "Intel(R) X722 Virtual Function" },
+};
+
+static u8 *
+iavf_probe (vlib_main_t *vm, vnet_dev_bus_index_t bus_index, void *dev_info)
+{
+  vnet_dev_bus_pci_device_info_t *di = dev_info;
+
+  if (di->vendor_id != 0x8086)
+    return 0;
+
+  FOREACH_ARRAY_ELT (dt, iavf_dev_types)
+    {
+      if (dt->device_id == di->device_id)
+       return format (0, "%s", dt->desc);
+    }
+
+  return 0;
+}
+
+static vnet_dev_rv_t
+iavf_reset (vlib_main_t *vm, vnet_dev_t *dev)
+{
+  iavf_device_t *ad = vnet_dev_get_data (dev);
+  u32 n_tries = 50;
+
+  iavf_aq_init (vm, dev);
+  iavf_vc_op_reset_vf (vm, dev);
+
+  do
+    {
+      if (n_tries-- == 0)
+       return VNET_DEV_ERR_TIMEOUT;
+      vlib_process_suspend (vm, 0.02);
+    }
+  while ((iavf_reg_read (ad, VFGEN_RSTAT) & 3) != 2);
+
+  iavf_aq_init (vm, dev);
+  iavf_aq_poll_on (vm, dev);
+  return (VNET_DEV_OK);
+}
+
+static vnet_dev_rv_t
+iavf_alloc (vlib_main_t *vm, vnet_dev_t *dev)
+{
+  log_debug (dev, "alloc");
+  return iavf_aq_alloc (vm, dev);
+}
+
+static vnet_dev_rv_t
+iavf_init (vlib_main_t *vm, vnet_dev_t *dev)
+{
+  iavf_device_t *ad = vnet_dev_get_data (dev);
+  virtchnl_version_info_t ver;
+  virtchnl_vf_resource_t res;
+  vnet_dev_rv_t rv;
+
+  log_debug (dev, "init");
+
+  if ((rv = vnet_dev_pci_map_region (vm, dev, 0, &ad->bar0)))
+    return rv;
+
+  if ((rv = vnet_dev_pci_bus_master_enable (vm, dev)))
+    return rv;
+
+  if ((rv = iavf_reset (vm, dev)))
+    return rv;
+
+  if ((rv = iavf_vc_op_version (vm, dev, &driver_virtchnl_version, &ver)))
+    return rv;
+
+  if (ver.major != driver_virtchnl_version.major ||
+      ver.minor != driver_virtchnl_version.minor)
+    return VNET_DEV_ERR_UNSUPPORTED_DEVICE_VER;
+
+  if ((rv = iavf_vc_op_get_vf_resources (vm, dev, &driver_cap_flags, &res)))
+    return rv;
+
+  if (res.num_vsis != 1 || res.vsi_res[0].vsi_type != VIRTCHNL_VSI_SRIOV)
+    return VNET_DEV_ERR_UNSUPPORTED_DEVICE;
+
+  iavf_port_t iavf_port = {
+    .vf_cap_flags = res.vf_cap_flags,
+    .rss_key_size = res.rss_key_size,
+    .rss_lut_size = res.rss_lut_size,
+    .max_vectors = res.max_vectors,
+    .vsi_id = res.vsi_res[0].vsi_id,
+    .num_qp = res.vsi_res[0].num_queue_pairs,
+  };
+
+  vnet_dev_port_add_args_t port_add_args = {
+    .port = {
+      .attr = {
+        .type = VNET_DEV_PORT_TYPE_ETHERNET,
+        .max_rx_queues = res.num_queue_pairs,
+        .max_tx_queues = res.num_queue_pairs,
+        .max_supported_frame_size = res.max_mtu,
+      },
+      .ops = {
+        .init = iavf_port_init,
+        .start = iavf_port_start,
+        .stop = iavf_port_stop,
+       .config_change = iavf_port_cfg_change,
+        .format_status = format_iavf_port_status,
+      },
+      .data_size = sizeof (iavf_port_t),
+      .initial_data = &iavf_port,
+    },
+    .rx_node = &iavf_rx_node,
+    .tx_node = &iavf_tx_node,
+    .rx_queue = {
+      .config = {
+        .data_size = sizeof (iavf_rxq_t),
+        .default_size = 512,
+        .multiplier = 32,
+        .min_size = 32,
+        .max_size = 4096,
+       .size_is_power_of_two = 1,
+      },
+      .ops = {
+        .alloc = iavf_rx_queue_alloc,
+        .free = iavf_rx_queue_free,
+      },
+    },
+    .tx_queue = {
+      .config = {
+        .data_size = sizeof (iavf_txq_t),
+        .default_size = 512,
+        .multiplier = 32,
+        .min_size = 32,
+        .max_size = 4096,
+       .size_is_power_of_two = 1,
+      },
+      .ops = {
+        .alloc = iavf_tx_queue_alloc,
+        .free = iavf_tx_queue_free,
+      },
+    },
+  };
+
+  vnet_dev_set_hw_addr_eth_mac (&port_add_args.port.attr.hw_addr,
+                               res.vsi_res[0].default_mac_addr);
+
+  log_info (dev, "MAC address is %U", format_ethernet_address,
+           res.vsi_res[0].default_mac_addr);
+
+  if (vlib_get_n_threads () <= vnet_dev_get_pci_n_msix_interrupts (dev) - 1)
+    port_add_args.port.attr.caps.interrupt_mode = 1;
+  else
+    log_notice (dev,
+               "number of threads (%u) bigger than number of interrupt lines "
+               "(%u), interrupt mode disabled",
+               vlib_get_n_threads (), res.max_vectors);
+
+  if (res.vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RSS_PF)
+    {
+      if (res.rss_key_size < IAVF_MAX_RSS_KEY_SIZE)
+       {
+         log_notice (
+           dev, "unsupported RSS config provided by device, RSS disabled");
+       }
+      else
+       {
+         port_add_args.port.attr.caps.rss = 1;
+         if (res.rss_lut_size > IAVF_MAX_RSS_LUT_SIZE)
+           log_notice (dev, "device supports bigger RSS LUT than driver");
+       }
+    }
+
+  return vnet_dev_port_add (vm, dev, 0, &port_add_args);
+}
+
+static void
+iavf_deinit (vlib_main_t *vm, vnet_dev_t *dev)
+{
+  log_debug (dev, "deinit");
+  iavf_aq_poll_off (vm, dev);
+  iavf_aq_deinit (vm, dev);
+  iavf_aq_free (vm, dev);
+}
+
+static void
+iavf_free (vlib_main_t *vm, vnet_dev_t *dev)
+{
+  log_debug (dev, "free");
+  iavf_aq_free (vm, dev);
+}
+
+VNET_DEV_REGISTER_DRIVER (avf) = {
+  .name = "iavf",
+  .bus = "pci",
+  .device_data_sz = sizeof (iavf_device_t),
+  .runtime_temp_space_sz = sizeof (iavf_rt_data_t),
+  .ops = {
+    .alloc = iavf_alloc,
+    .init = iavf_init,
+    .deinit = iavf_deinit,
+    .free = iavf_free,
+    .probe = iavf_probe,
+  },
+};
+
+VLIB_PLUGIN_REGISTER () = {
+  .version = VPP_BUILD_VER,
+  .description = "dev_iavf",
+};
diff --git a/src/plugins/dev_iavf/iavf.h b/src/plugins/dev_iavf/iavf.h
new file mode 100644 (file)
index 0000000..7576fa9
--- /dev/null
@@ -0,0 +1,215 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _IIAVF_H_
+#define _IIAVF_H_
+
+#include <vppinfra/clib.h>
+#include <vppinfra/error_bootstrap.h>
+#include <vppinfra/format.h>
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <dev_iavf/iavf_desc.h>
+#include <dev_iavf/virtchnl.h>
+
+#define IAVF_ITR_INT             250
+#define IAVF_RX_MAX_DESC_IN_CHAIN 5
+#define IAVF_MAX_RSS_KEY_SIZE    52
+#define IAVF_MAX_RSS_LUT_SIZE    64
+#define IIAVF_AQ_POLL_INTERVAL   0.2
+#define IIAVF_AQ_BUF_SIZE        4096
+
+typedef struct iavf_adminq_dma_mem iavf_adminq_dma_mem_t;
+
+typedef struct
+{
+  u8 adminq_active : 1;
+  void *bar0;
+
+  /* Admin queues */
+  iavf_adminq_dma_mem_t *aq_mem;
+  u16 atq_next_slot;
+  u16 arq_next_slot;
+  virtchnl_pf_event_t *events;
+
+} iavf_device_t;
+
+typedef struct
+{
+  u32 flow_id;
+  u16 next_index;
+  i16 buffer_advance;
+} iavf_flow_lookup_entry_t;
+
+typedef struct
+{
+  u8 admin_up : 1;
+  u8 flow_offload : 1;
+  iavf_flow_lookup_entry_t *flow_lookup_entries;
+  u32 vf_cap_flags;
+  u16 vsi_id;
+  u16 rss_key_size;
+  u16 rss_lut_size;
+  u16 num_qp;
+  u16 max_vectors;
+} iavf_port_t;
+
+typedef struct
+{
+  u32 *qtx_tail;
+  u32 *buffer_indices;
+  iavf_tx_desc_t *descs;
+  u16 next;
+  u16 n_enqueued;
+  u16 *rs_slots;
+  iavf_tx_desc_t *tmp_descs;
+  u32 *tmp_bufs;
+  u32 *ph_bufs;
+} iavf_txq_t;
+
+typedef struct
+{
+  u32 *qrx_tail;
+  u32 *buffer_indices;
+  iavf_rx_desc_t *descs;
+  u16 next;
+  u16 n_enqueued;
+} iavf_rxq_t;
+
+typedef struct
+{
+  u16 qid;
+  u16 next_index;
+  u32 hw_if_index;
+  u32 flow_id;
+  u64 qw1s[IAVF_RX_MAX_DESC_IN_CHAIN];
+} iavf_rx_trace_t;
+
+/* adminq.c */
+vnet_dev_rv_t iavf_aq_alloc (vlib_main_t *, vnet_dev_t *);
+void iavf_aq_init (vlib_main_t *, vnet_dev_t *);
+void iavf_aq_poll_on (vlib_main_t *, vnet_dev_t *);
+void iavf_aq_poll_off (vlib_main_t *, vnet_dev_t *);
+void iavf_aq_deinit (vlib_main_t *, vnet_dev_t *);
+void iavf_aq_free (vlib_main_t *, vnet_dev_t *);
+vnet_dev_rv_t iavf_aq_atq_enq (vlib_main_t *, vnet_dev_t *, iavf_aq_desc_t *,
+                              const u8 *, u16, f64);
+int iavf_aq_arq_next_acq (vlib_main_t *, vnet_dev_t *, iavf_aq_desc_t **,
+                         u8 **, f64);
+void iavf_aq_arq_next_rel (vlib_main_t *, vnet_dev_t *);
+format_function_t format_virtchnl_op_name;
+format_function_t format_virtchnl_status;
+
+/* format.c */
+format_function_t format_iavf_vf_cap_flags;
+format_function_t format_iavf_rx_trace;
+format_function_t format_iavf_port_status;
+format_function_t format_iavf_log;
+
+/* port.c */
+vnet_dev_rv_t iavf_port_init (vlib_main_t *, vnet_dev_port_t *);
+vnet_dev_rv_t iavf_port_start (vlib_main_t *, vnet_dev_port_t *);
+void iavf_port_stop (vlib_main_t *, vnet_dev_port_t *);
+vnet_dev_rv_t iavf_port_cfg_change (vlib_main_t *, vnet_dev_port_t *,
+                                   vnet_dev_port_cfg_change_req_t *);
+
+/* queue.c */
+vnet_dev_rv_t iavf_rx_queue_alloc (vlib_main_t *, vnet_dev_rx_queue_t *);
+vnet_dev_rv_t iavf_tx_queue_alloc (vlib_main_t *, vnet_dev_tx_queue_t *);
+vnet_dev_rv_t iavf_rx_queue_start (vlib_main_t *, vnet_dev_rx_queue_t *);
+vnet_dev_rv_t iavf_tx_queue_start (vlib_main_t *, vnet_dev_tx_queue_t *);
+void iavf_rx_queue_stop (vlib_main_t *, vnet_dev_rx_queue_t *);
+void iavf_tx_queue_stop (vlib_main_t *, vnet_dev_tx_queue_t *);
+void iavf_rx_queue_free (vlib_main_t *, vnet_dev_rx_queue_t *);
+void iavf_tx_queue_free (vlib_main_t *, vnet_dev_tx_queue_t *);
+
+/* counter.c */
+void iavf_port_poll_stats (vlib_main_t *, vnet_dev_port_t *);
+void iavf_port_add_counters (vlib_main_t *, vnet_dev_port_t *);
+
+/* inline funcs */
+
+static inline u32
+iavf_get_u32 (void *start, int offset)
+{
+  return *(u32 *) (((u8 *) start) + offset);
+}
+
+static inline void
+iavf_reg_write (iavf_device_t *ad, u32 addr, u32 val)
+{
+  __atomic_store_n ((u32 *) ((u8 *) ad->bar0 + addr), val, __ATOMIC_RELEASE);
+}
+
+static inline u32
+iavf_reg_read (iavf_device_t *ad, u32 addr)
+{
+  return __atomic_load_n ((u32 *) (ad->bar0 + addr), __ATOMIC_RELAXED);
+  ;
+}
+
+static inline void
+iavf_reg_flush (iavf_device_t *ad)
+{
+  iavf_reg_read (ad, VFGEN_RSTAT);
+  asm volatile("" ::: "memory");
+}
+
+#define log_debug(dev, f, ...)                                                \
+  vlib_log (VLIB_LOG_LEVEL_DEBUG, iavf_log.class, "%U" f, format_iavf_log,    \
+           (dev), __func__, ##__VA_ARGS__)
+#define log_info(dev, f, ...)                                                 \
+  vlib_log (VLIB_LOG_LEVEL_INFO, iavf_log.class, "%U: " f,                    \
+           format_vnet_dev_addr, (dev), ##__VA_ARGS__)
+#define log_notice(dev, f, ...)                                               \
+  vlib_log (VLIB_LOG_LEVEL_NOTICE, iavf_log.class, "%U: " f,                  \
+           format_vnet_dev_addr, (dev), ##__VA_ARGS__)
+#define log_warn(dev, f, ...)                                                 \
+  vlib_log (VLIB_LOG_LEVEL_WARNING, iavf_log.class, "%U: " f,                 \
+           format_vnet_dev_addr, (dev), ##__VA_ARGS__)
+#define log_err(dev, f, ...)                                                  \
+  vlib_log (VLIB_LOG_LEVEL_ERR, iavf_log.class, "%U: " f,                     \
+           format_vnet_dev_addr, (dev), ##__VA_ARGS__)
+
+/* temp */
+#define IAVF_RX_VECTOR_SZ VLIB_FRAME_SIZE
+
+typedef struct
+{
+  u64 qw1s[IAVF_RX_MAX_DESC_IN_CHAIN - 1];
+  u32 buffers[IAVF_RX_MAX_DESC_IN_CHAIN - 1];
+} iavf_rx_tail_t;
+
+typedef struct
+{
+  CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+  vlib_buffer_t *bufs[IAVF_RX_VECTOR_SZ];
+  u16 next[IAVF_RX_VECTOR_SZ];
+  u64 qw1s[IAVF_RX_VECTOR_SZ];
+  u32 flow_ids[IAVF_RX_VECTOR_SZ];
+  iavf_rx_tail_t tails[IAVF_RX_VECTOR_SZ];
+} iavf_rt_data_t;
+
+#define foreach_iavf_tx_node_counter                                          \
+  _ (SEG_SZ_EXCEEDED, seg_sz_exceeded, ERROR, "segment size exceeded")        \
+  _ (NO_FREE_SLOTS, no_free_slots, ERROR, "no free tx slots")
+
+typedef enum
+{
+#define _(f, n, s, d) IAVF_TX_NODE_CTR_##f,
+  foreach_iavf_tx_node_counter
+#undef _
+} iavf_tx_node_counter_t;
+
+#define foreach_iavf_rx_node_counter                                          \
+  _ (BUFFER_ALLOC, buffer_alloc, ERROR, "buffer alloc error")
+
+typedef enum
+{
+#define _(f, n, s, d) IAVF_RX_NODE_CTR_##f,
+  foreach_iavf_rx_node_counter
+#undef _
+} iavf_rx_node_counter_t;
+
+#endif /* _IIAVF_H_ */
diff --git a/src/plugins/dev_iavf/iavf_desc.h b/src/plugins/dev_iavf/iavf_desc.h
new file mode 100644 (file)
index 0000000..053013e
--- /dev/null
@@ -0,0 +1,125 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _IIAVF_DESC_H_
+#define _IIAVF_DESC_H_
+
+#include <vppinfra/clib.h>
+#include <vppinfra/error_bootstrap.h>
+#include <vppinfra/format.h>
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <dev_iavf/virtchnl.h>
+
+#define IAVF_RX_MAX_DESC_IN_CHAIN 5
+
+#define IAVF_TXD_CMD(x)                       (1 << (x + 4))
+#define IAVF_TXD_CMD_EXT(x, val)       ((u64) val << (x + 4))
+#define IAVF_TXD_CMD_EOP              IAVF_TXD_CMD (0)
+#define IAVF_TXD_CMD_RS                       IAVF_TXD_CMD (1)
+#define IAVF_TXD_CMD_RSV              IAVF_TXD_CMD (2)
+#define IAVF_TXD_CMD_IIPT_NONE        IAVF_TXD_CMD_EXT (5, 0)
+#define IAVF_TXD_CMD_IIPT_IPV6        IAVF_TXD_CMD_EXT (5, 1)
+#define IAVF_TXD_CMD_IIPT_IPV4_NO_CSUM IAVF_TXD_CMD_EXT (5, 2)
+#define IAVF_TXD_CMD_IIPT_IPV4        IAVF_TXD_CMD_EXT (5, 3)
+#define IAVF_TXD_CMD_L4T_UNKNOWN       IAVF_TXD_CMD_EXT (8, 0)
+#define IAVF_TXD_CMD_L4T_TCP          IAVF_TXD_CMD_EXT (8, 1)
+#define IAVF_TXD_CMD_L4T_SCTP         IAVF_TXD_CMD_EXT (8, 2)
+#define IAVF_TXD_CMD_L4T_UDP          IAVF_TXD_CMD_EXT (8, 3)
+#define IAVF_TXD_OFFSET(x, factor, val)                                       \
+  (((u64) val / (u64) factor) << (16 + x))
+#define IAVF_TXD_OFFSET_MACLEN(val) IAVF_TXD_OFFSET (0, 2, val)
+#define IAVF_TXD_OFFSET_IPLEN(val)  IAVF_TXD_OFFSET (7, 4, val)
+#define IAVF_TXD_OFFSET_L4LEN(val)  IAVF_TXD_OFFSET (14, 4, val)
+#define IAVF_TXD_DTYP_CTX          0x1ULL
+#define IAVF_TXD_CTX_CMD_TSO       IAVF_TXD_CMD (0)
+#define IAVF_TXD_CTX_SEG(val, x)    (((u64) val) << (30 + x))
+#define IAVF_TXD_CTX_SEG_TLEN(val)  IAVF_TXD_CTX_SEG (val, 0)
+#define IAVF_TXD_CTX_SEG_MSS(val)   IAVF_TXD_CTX_SEG (val, 20)
+
+typedef union
+{
+  struct
+  {
+    u32 mirr : 13;
+    u32 _reserved1 : 3;
+    u32 l2tag1 : 16;
+    u32 filter_status;
+  };
+  u64 as_u64;
+} iavf_rx_desc_qw0_t;
+
+typedef union
+{
+  struct
+  {
+    /* status */
+    u64 dd : 1;
+    u64 eop : 1;
+    u64 l2tag1p : 1;
+    u64 l3l4p : 1;
+    u64 crcp : 1;
+    u64 _reserved2 : 4;
+    u64 ubmcast : 2;
+    u64 flm : 1;
+    u64 fltstat : 2;
+    u64 lpbk : 1;
+    u64 ipv6exadd : 1;
+    u64 _reserved3 : 2;
+    u64 int_udp_0 : 1;
+
+    /* error */
+    u64 _reserved_err0 : 3;
+    u64 ipe : 1;
+    u64 l4e : 1;
+    u64 _reserved_err5 : 1;
+    u64 oversize : 1;
+    u64 _reserved_err7 : 1;
+
+    u64 rsv2 : 3;
+    u64 ptype : 8;
+    u64 length : 26;
+  };
+  u64 as_u64;
+} iavf_rx_desc_qw1_t;
+
+STATIC_ASSERT_SIZEOF (iavf_rx_desc_qw0_t, 8);
+STATIC_ASSERT_SIZEOF (iavf_rx_desc_qw1_t, 8);
+
+typedef struct
+{
+  union
+  {
+    struct
+    {
+      iavf_rx_desc_qw0_t qw0;
+      iavf_rx_desc_qw0_t qw1;
+      u64 rsv3 : 64;
+      u32 flex_lo;
+      u32 fdid_flex_hi;
+    };
+    u64 qword[4];
+    u64 addr;
+#ifdef CLIB_HAVE_VEC256
+    u64x4 as_u64x4;
+#endif
+  };
+} iavf_rx_desc_t;
+
+STATIC_ASSERT_SIZEOF (iavf_rx_desc_t, 32);
+
+typedef struct
+{
+  union
+  {
+    u64 qword[2];
+#ifdef CLIB_HAVE_VEC128
+    u64x2 as_u64x2;
+#endif
+  };
+} iavf_tx_desc_t;
+
+STATIC_ASSERT_SIZEOF (iavf_tx_desc_t, 16);
+
+#endif /* _IIAVF_DESC_H_ */
diff --git a/src/plugins/dev_iavf/iavf_regs.h b/src/plugins/dev_iavf/iavf_regs.h
new file mode 100644 (file)
index 0000000..3c270d7
--- /dev/null
@@ -0,0 +1,351 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _IIAVF_REGS_H_
+#define _IIAVF_REGS_H_
+
+#include <vppinfra/clib.h>
+#include <vppinfra/error_bootstrap.h>
+#include <vppinfra/format.h>
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+
+#define iavf_reg_ctrl_t_fields                                                \
+  __ (1, full_duplex)                                                         \
+  __ (1, _reserved1)                                                          \
+  __ (1, gio_master_disable)                                                  \
+  __ (3, _reserved3)                                                          \
+  __ (1, set_link_up)                                                         \
+  __ (9, _reserved7)                                                          \
+  __ (1, sdp0_gpien)                                                          \
+  __ (1, sdp1_gpien)                                                          \
+  __ (1, sdp0_data)                                                           \
+  __ (1, sdp1_data)                                                           \
+  __ (1, adww3wuc)                                                            \
+  __ (1, sdp0_wde)                                                            \
+  __ (1, sdp0_iodir)                                                          \
+  __ (1, sdp1_iodir)                                                          \
+  __ (2, _reserved24)                                                         \
+  __ (1, port_sw_reset)                                                       \
+  __ (1, rx_flow_ctl_en)                                                      \
+  __ (1, tx_flow_ctl_en)                                                      \
+  __ (1, device_reset)                                                        \
+  __ (1, vlan_mode_enable)                                                    \
+  __ (1, phy_reset)
+
+#define iavf_reg_status_t_fields                                              \
+  __ (1, full_duplex)                                                         \
+  __ (1, link_up)                                                             \
+  __ (2, _reserved2)                                                          \
+  __ (1, tx_off)                                                              \
+  __ (1, _reserved5)                                                          \
+  __ (2, speed)                                                               \
+  __ (2, asdv)                                                                \
+  __ (1, phy_reset_asserted)                                                  \
+  __ (8, _reserved11)                                                         \
+  __ (1, gio_master_en_sts)                                                   \
+  __ (1, dev_rst_set)                                                         \
+  __ (1, rst_done)                                                            \
+  __ (1, speed_2p5)                                                           \
+  __ (7, _reserved23)                                                         \
+  __ (1, lpi_ignore)                                                          \
+  __ (1, _reserved31)
+
+#define iavf_reg_ctrl_ext_t_fields                                            \
+  __ (2, _reserved0)                                                          \
+  __ (1, sdp2_gpien)                                                          \
+  __ (1, sdp3_gpien)                                                          \
+  __ (2, _reserved4)                                                          \
+  __ (1, sdp2_data)                                                           \
+  __ (1, sdp3_data)                                                           \
+  __ (2, _reserved8)                                                          \
+  __ (1, sdp2_iodir)                                                          \
+  __ (1, sdp3_iodir)                                                          \
+  __ (1, _reserved12)                                                         \
+  __ (1, eeprom_block_rst)                                                    \
+  __ (2, _reserved14)                                                         \
+  __ (1, no_snoop_dis)                                                        \
+  __ (1, relaxed_ordering_dis)                                                \
+  __ (2, _reserved18)                                                         \
+  __ (1, phy_power_down_ena)                                                  \
+  __ (5, _reserved121)                                                        \
+  __ (1, ext_vlan_ena)                                                        \
+  __ (1, _reserved127)                                                        \
+  __ (1, driver_loaded)                                                       \
+  __ (3, _reserved29)
+
+#define iavf_reg_mdic_t_fields                                                \
+  __ (16, data)                                                               \
+  __ (5, regadd)                                                              \
+  __ (5, _reserved21)                                                         \
+  __ (2, opcode)                                                              \
+  __ (1, ready)                                                               \
+  __ (1, mid_ie)                                                              \
+  __ (1, mid_err)                                                             \
+  __ (1, _reserved31)
+
+#define iavf_reg_rctl_t_fields                                                \
+  __ (1, _reserved0)                                                          \
+  __ (1, rx_enable)                                                           \
+  __ (1, store_bad_packets)                                                   \
+  __ (1, uc_promisc_ena)                                                      \
+  __ (1, mc_promisc_ena)                                                      \
+  __ (1, long_pkt_reception_ena)                                              \
+  __ (2, loopback_mode)                                                       \
+  __ (2, hash_select)                                                         \
+  __ (2, _reserved10)                                                         \
+  __ (2, mc_uc_tbl_off)                                                       \
+  __ (1, _reserved14)                                                         \
+  __ (1, bcast_accept_mode)                                                   \
+  __ (2, rx_buf_sz)                                                           \
+  __ (1, vlan_filter_ena)                                                     \
+  __ (1, cannonical_form_ind_ena)                                             \
+  __ (1, cannonical_form_ind_bit_val)                                         \
+  __ (1, pad_small_rx_pkts)                                                   \
+  __ (1, discard_pause_frames)                                                \
+  __ (1, pass_mac_ctrl_frames)                                                \
+  __ (2, _reserved24)                                                         \
+  __ (1, strip_eth_crc)                                                       \
+  __ (5, _reserved26)
+
+#define iavf_reg_tctl_t_fields                                                \
+  __ (1, _reserved0)                                                          \
+  __ (1, tx_enable)                                                           \
+  __ (1, _reserved2)                                                          \
+  __ (1, pad_short_pkts)                                                      \
+  __ (8, collision_threshold)                                                 \
+  __ (10, backoff_slot_time)                                                  \
+  __ (1, sw_xoff_tx)                                                          \
+  __ (1, _reserved23)                                                         \
+  __ (1, retransmit_on_late_colision)                                         \
+  __ (7, reserved25)
+
+#define iavf_reg_phpm_t_fields                                                \
+  __ (1, _reserved0)                                                          \
+  __ (1, restart_autoneg)                                                     \
+  __ (1, _reserved2)                                                          \
+  __ (1, dis_1000_in_non_d0a)                                                 \
+  __ (1, link_energy_detect)                                                  \
+  __ (1, go_link_disc)                                                        \
+  __ (1, disable_1000)                                                        \
+  __ (1, spd_b2b_en)                                                          \
+  __ (1, rst_compl)                                                           \
+  __ (1, dis_100_in_non_d0a)                                                  \
+  __ (1, ulp_req)                                                             \
+  __ (1, disable_2500)                                                        \
+  __ (1, dis_2500_in_non_d0a)                                                 \
+  __ (1, ulp_trig)                                                            \
+  __ (2, ulp_delay)                                                           \
+  __ (1, link_enery_en)                                                       \
+  __ (1, dev_off_en)                                                          \
+  __ (1, dev_off_state)                                                       \
+  __ (1, ulp_en)                                                              \
+  __ (12, _reserved20)
+
+#define iavf_reg_manc_t_fields                                                \
+  __ (1, flow_ctrl_discard)                                                   \
+  __ (1, ncsi_discard)                                                        \
+  __ (12, _reserved2)                                                         \
+  __ (1, fw_reset)                                                            \
+  __ (1, tco_isolate)                                                         \
+  __ (1, tco_reset)                                                           \
+  __ (1, rcv_tco_en)                                                          \
+  __ (1, keep_phy_link_up)                                                    \
+  __ (1, rcv_all)                                                             \
+  __ (1, inhibit_ulp)                                                         \
+  __ (2, _reserved21)                                                         \
+  __ (1, en_xsum_filter)                                                      \
+  __ (1, en_ipv4_filter)                                                      \
+  __ (1, fixed_net_type)                                                      \
+  __ (1, net_type)                                                            \
+  __ (1, ipv6_adv_only)                                                       \
+  __ (1, en_bmc2os)                                                           \
+  __ (1, en_bmc2net)                                                          \
+  __ (1, mproxye)                                                             \
+  __ (1, mproxya)
+
+#define iavf_reg_swsm_t_fields                                                \
+  __ (1, smbi)                                                                \
+  __ (1, swesmbi)                                                             \
+  __ (30, _reserved2)
+
+#define iavf_reg_fwsm_t_fields                                                \
+  __ (1, eep_fw_semaphore)                                                    \
+  __ (3, fw_mode)                                                             \
+  __ (2, _reserved4)                                                          \
+  __ (1, eep_reload_ind)                                                      \
+  __ (8, _reserved7)                                                          \
+  __ (1, fw_val_bit)                                                          \
+  __ (3, reset_ctr)                                                           \
+  __ (6, ext_err_ind)                                                         \
+  __ (1, pcie_config_err_ind)                                                 \
+  __ (5, _reserved26)                                                         \
+  __ (1, factory_mac_addr_restored)
+
+#define iavf_reg_sw_fw_sync_t_fields                                          \
+  __ (1, sw_flash_sm)                                                         \
+  __ (1, sw_phy_sm)                                                           \
+  __ (1, sw_i2c_sm)                                                           \
+  __ (1, sw_mac_csr_sm)                                                       \
+  __ (3, _reserved4)                                                          \
+  __ (1, sw_svr_sm)                                                           \
+  __ (1, sw_mb_sm)                                                            \
+  __ (1, _reserved9)                                                          \
+  __ (1, sw_mng_sm)                                                           \
+  __ (5, _reserved11)                                                         \
+  __ (1, fw_flash_sm)                                                         \
+  __ (1, fw_phy_sm)                                                           \
+  __ (1, fw_i2c_sm)                                                           \
+  __ (1, fw_mac_csr_sm)                                                       \
+  __ (3, _reserved20)                                                         \
+  __ (1, fw_svr_sm)                                                           \
+  __ (8, _reserved24)
+
+#define iavf_reg_srrctl_t_fields                                              \
+  __ (7, bsizepacket)                                                         \
+  __ (1, _reserved7)                                                          \
+  __ (6, bsizeheader)                                                         \
+  __ (2, timer1_sel)                                                          \
+  __ (1, _reserved16)                                                         \
+  __ (2, timer0_sel)                                                          \
+  __ (1, use_domain)                                                          \
+  __ (5, rdmts)                                                               \
+  __ (3, desc_type)                                                           \
+  __ (2, _reserved28)                                                         \
+  __ (1, timestamp)                                                           \
+  __ (1, drop_en)
+
+#define iavf_reg_rxdctl_t_fields                                              \
+  __ (5, pthresh)                                                             \
+  __ (3, _reserved5)                                                          \
+  __ (5, hthresh)                                                             \
+  __ (3, _reserved13)                                                         \
+  __ (5, wthresh)                                                             \
+  __ (4, _reserved21)                                                         \
+  __ (1, enable)                                                              \
+  __ (1, swflush)                                                             \
+  __ (5, _reserved27)
+
+#define iavf_reg_eec_t_fields                                                 \
+  __ (6, _reserved0)                                                          \
+  __ (1, flash_in_use)                                                        \
+  __ (1, _reserved7)                                                          \
+  __ (1, ee_pres)                                                             \
+  __ (1, auto_rd)                                                             \
+  __ (1, _reservedxi10)                                                       \
+  __ (4, ee_size)                                                             \
+  __ (4, pci_ana_done)                                                        \
+  __ (1, flash_detected)                                                      \
+  __ (2, _reserved20)                                                         \
+  __ (1, shadow_modified)                                                     \
+  __ (1, flupd)                                                               \
+  __ (1, _reserved24)                                                         \
+  __ (1, sec1val)                                                             \
+  __ (1, fludone)                                                             \
+  __ (5, _reserved27)
+
+#define iavf_reg_eemngctl_t_fields                                            \
+  __ (11, addr)                                                               \
+  __ (4, reserved11)                                                          \
+  __ (1, cmd_valid)                                                           \
+  __ (1, write)                                                               \
+  __ (1, eebusy)                                                              \
+  __ (1, cfg_done)                                                            \
+  __ (12, _reserved19)                                                        \
+  __ (1, done)
+
+#define IAVF_REG_STRUCT(n)                                                    \
+  typedef union                                                               \
+  {                                                                           \
+    struct                                                                    \
+    {                                                                         \
+      n##_fields;                                                             \
+    };                                                                        \
+    u32 as_u32;                                                               \
+  } n;                                                                        \
+  STATIC_ASSERT_SIZEOF (n, 4);
+
+#define __(n, f) u32 f : n;
+IAVF_REG_STRUCT (iavf_reg_status_t);
+IAVF_REG_STRUCT (iavf_reg_ctrl_t);
+IAVF_REG_STRUCT (iavf_reg_ctrl_ext_t);
+IAVF_REG_STRUCT (iavf_reg_mdic_t);
+IAVF_REG_STRUCT (iavf_reg_rctl_t);
+IAVF_REG_STRUCT (iavf_reg_tctl_t);
+IAVF_REG_STRUCT (iavf_reg_phpm_t);
+IAVF_REG_STRUCT (iavf_reg_manc_t);
+IAVF_REG_STRUCT (iavf_reg_swsm_t);
+IAVF_REG_STRUCT (iavf_reg_fwsm_t);
+IAVF_REG_STRUCT (iavf_reg_sw_fw_sync_t);
+IAVF_REG_STRUCT (iavf_reg_srrctl_t);
+IAVF_REG_STRUCT (iavf_reg_rxdctl_t);
+IAVF_REG_STRUCT (iavf_reg_eec_t);
+IAVF_REG_STRUCT (iavf_reg_eemngctl_t);
+#undef __
+
+#define foreach_iavf_reg                                                      \
+  _ (0x00000, CTRL, iavf_reg_ctrl_t_fields)                                   \
+  _ (0x00008, STATUS, iavf_reg_status_t_fields)                               \
+  _ (0x00018, CTRL_EXT, iavf_reg_ctrl_ext_t_fields)                           \
+  _ (0x00020, MDIC, iavf_reg_mdic_t_fields)                                   \
+  _ (0x00100, RCTL, iavf_reg_rctl_t_fields)                                   \
+  _ (0x00400, TCTL, iavf_reg_tctl_t_fields)                                   \
+  _ (0x00404, TCTL_EXT, )                                                     \
+  _ (0x00e14, PHPM, iavf_reg_phpm_t_fields)                                   \
+  _ (0x01500, ICR, )                                                          \
+  _ (0x0150c, IMC, )                                                          \
+  _ (0x05400, RAL0, )                                                         \
+  _ (0x05404, RAH0, )                                                         \
+  _ (0x05820, MANC, iavf_reg_manc_t_fields)                                   \
+  _ (0x05b50, SWSM, iavf_reg_swsm_t_fields)                                   \
+  _ (0x05b54, FWSM, iavf_reg_fwsm_t_fields)                                   \
+  _ (0x05b5c, SW_FW_SYNC, iavf_reg_sw_fw_sync_t_fields)                       \
+  _ (0x0c000, RDBAL0, )                                                       \
+  _ (0x0c004, RDBAH0, )                                                       \
+  _ (0x0c008, RDLEN0, )                                                       \
+  _ (0x0c00c, SRRCTL0, iavf_reg_srrctl_t_fields)                              \
+  _ (0x0c010, RDH0, )                                                         \
+  _ (0x0c018, RDT0, )                                                         \
+  _ (0x0c028, RXDCTL0, iavf_reg_rxdctl_t_fields)                              \
+  _ (0x12010, EEC, iavf_reg_eec_t_fields)                                     \
+  _ (0x12030, EEMNGCTL, iavf_reg_eemngctl_t_fields)
+
+#define IAVF_REG_RDBAL(n)  (IAVF_REG_RDBAL0 + (n) *0x40)
+#define IAVF_REG_RDBAH(n)  (IAVF_REG_RDBAH0 + (n) *0x40)
+#define IAVF_REG_RDLEN(n)  (IAVF_REG_RDLEN0 + (n) *0x40)
+#define IAVF_REG_SRRCTL(n) (IAVF_REG_SRRCTL0 + (n) *0x40)
+#define IAVF_REG_RDH(n)           (IAVF_REG_RDH0 + (n) *0x40)
+#define IAVF_REG_RDT(n)           (IAVF_REG_RDT0 + (n) *0x40)
+#define IAVF_REG_RXDCTL(n) (IAVF_REG_RXDCTL0 + (n) *0x40)
+#define IAVF_REG_SRRCTL(n) (IAVF_REG_SRRCTL0 + (n) *0x40)
+
+typedef enum
+{
+#define _(o, n, f) IAVF_REG_##n = (o),
+  foreach_iavf_reg
+#undef _
+} iavf_reg_t;
+
+typedef union
+{
+  struct
+  {
+    u32 intena : 1;
+    u32 clearpba : 1;
+    u32 swint_trig : 1;
+    u32 itr_indx : 2;
+    u32 interval : 12;
+    u32 _rsvd23 : 7;
+    u32 sw_itr_indx_ena : 1;
+    u32 sw_itr_indx : 2;
+    u32 _rsvd29 : 3;
+    u32 wb_on_itr : 1;
+    u32 intena_msk : 1;
+  };
+  u32 as_u32;
+} iavf_dyn_ctln;
+
+STATIC_ASSERT_SIZEOF (iavf_dyn_ctln, 4);
+
+#endif /* _IIAVF_REGS_H_ */
diff --git a/src/plugins/dev_iavf/port.c b/src/plugins/dev_iavf/port.c
new file mode 100644 (file)
index 0000000..c0f363c
--- /dev/null
@@ -0,0 +1,442 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/pci.h>
+#include <vnet/dev/counters.h>
+#include <dev_iavf/iavf.h>
+#include <dev_iavf/iavf_regs.h>
+#include <dev_iavf/virtchnl.h>
+#include <dev_iavf/virtchnl_funcs.h>
+#include <vnet/ethernet/ethernet.h>
+
+VLIB_REGISTER_LOG_CLASS (iavf_log, static) = {
+  .class_name = "iavf",
+  .subclass_name = "port",
+};
+
+static const u8 default_rss_key[] = {
+  0x44, 0x39, 0x79, 0x6b, 0xb5, 0x4c, 0x50, 0x23, 0xb6, 0x75, 0xea, 0x5b, 0x12,
+  0x4f, 0x9f, 0x30, 0xb8, 0xa2, 0xc0, 0x3d, 0xdf, 0xdc, 0x4d, 0x02, 0xa0, 0x8c,
+  0x9b, 0x33, 0x4a, 0xf6, 0x4a, 0x4c, 0x05, 0xc6, 0xfa, 0x34, 0x39, 0x58, 0xd8,
+  0x55, 0x7d, 0x99, 0x58, 0x3a, 0xe1, 0x38, 0xc9, 0x2e, 0x81, 0x15, 0x03, 0x66,
+};
+
+const static iavf_dyn_ctln dyn_ctln_disabled = {};
+const static iavf_dyn_ctln dyn_ctln_enabled = {
+  .clearpba = 1,
+  .interval = IAVF_ITR_INT / 2,
+  .intena = 1,
+};
+const static iavf_dyn_ctln dyn_ctln_wb_on_itr = {
+  .clearpba = 1,
+  .itr_indx = 1,
+  .interval = 32 / 2,
+  .wb_on_itr = 1,
+};
+
+vnet_dev_rv_t
+iavf_port_vlan_strip_disable (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+  vnet_dev_t *dev = port->dev;
+  iavf_port_t *ap = vnet_dev_get_port_data (port);
+  virtchnl_vlan_caps_t vc;
+  vnet_dev_rv_t rv;
+  u32 outer, inner;
+  const u32 mask = VIRTCHNL_VLAN_ETHERTYPE_8100;
+
+  if ((ap->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN_V2) == 0)
+    return iavf_vc_op_disable_vlan_stripping (vm, dev);
+
+  if ((rv = iavf_vc_op_get_offload_vlan_v2_caps (vm, dev, &vc)))
+    return rv;
+
+  outer = vc.offloads.stripping_support.outer;
+  inner = vc.offloads.stripping_support.inner;
+
+  outer = outer & VIRTCHNL_VLAN_TOGGLE ? outer & mask : 0;
+  inner = inner & VIRTCHNL_VLAN_TOGGLE ? inner & mask : 0;
+
+  virtchnl_vlan_setting_t vs = {
+    .vport_id = ap->vsi_id,
+    .outer_ethertype_setting = outer,
+    .inner_ethertype_setting = inner,
+  };
+
+  return iavf_vc_op_disable_vlan_stripping_v2 (vm, dev, &vs);
+}
+
+vnet_dev_rv_t
+iavf_port_init_rss (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+  vnet_dev_t *dev = port->dev;
+  iavf_port_t *ap = vnet_dev_get_port_data (port);
+  u16 keylen = clib_min (sizeof (default_rss_key), ap->rss_key_size);
+  u8 buffer[VIRTCHNL_MSG_SZ (virtchnl_rss_key_t, key, keylen)];
+  virtchnl_rss_key_t *key = (virtchnl_rss_key_t *) buffer;
+
+  if (!port->attr.caps.rss)
+    return VNET_DEV_OK;
+
+  /* config RSS key */
+  *key = (virtchnl_rss_key_t){
+    .vsi_id = ap->vsi_id,
+    .key_len = keylen,
+  };
+
+  clib_memcpy (key->key, default_rss_key, sizeof (default_rss_key));
+
+  return iavf_vc_op_config_rss_key (vm, dev, key);
+}
+
+vnet_dev_rv_t
+iavf_port_update_rss_lut (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+  vnet_dev_t *dev = port->dev;
+  iavf_port_t *ap = vnet_dev_get_port_data (port);
+  u16 lut_size = clib_min (IAVF_MAX_RSS_LUT_SIZE, ap->rss_lut_size);
+  u8 buffer[VIRTCHNL_MSG_SZ (virtchnl_rss_lut_t, lut, lut_size)];
+  virtchnl_rss_lut_t *lut = (virtchnl_rss_lut_t *) buffer;
+  u32 enabled_rxq_bmp = 0;
+
+  if (!port->attr.caps.rss)
+    return VNET_DEV_OK;
+
+  *lut = (virtchnl_rss_lut_t){
+    .vsi_id = ap->vsi_id,
+    .lut_entries = lut_size,
+  };
+
+  foreach_vnet_dev_port_rx_queue (q, port)
+    if (q->enabled)
+      enabled_rxq_bmp |= 1ULL << q->queue_id;
+
+  /* config RSS LUT */
+  for (u32 i = 0, j; i < lut->lut_entries;)
+    foreach_set_bit_index (j, enabled_rxq_bmp)
+      {
+       lut->lut[i++] = j;
+       if (i >= lut->lut_entries)
+         break;
+      }
+
+  return iavf_vc_op_config_rss_lut (vm, dev, lut);
+}
+
+vnet_dev_rv_t
+iavf_port_init_vsi_queues (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+  vnet_dev_t *dev = port->dev;
+  iavf_port_t *ap = vnet_dev_get_port_data (port);
+  virtchnl_queue_pair_info_t *qpi;
+  u16 vsi_id = ap->vsi_id;
+  u16 data_size = vlib_buffer_get_default_data_size (vm);
+  u16 max_frame_size = port->max_frame_size;
+  u8 buffer[VIRTCHNL_MSG_SZ (virtchnl_vsi_queue_config_info_t, qpair,
+                            ap->num_qp)];
+  virtchnl_vsi_queue_config_info_t *ci =
+    (virtchnl_vsi_queue_config_info_t *) buffer;
+
+  *ci = (virtchnl_vsi_queue_config_info_t){
+    .num_queue_pairs = ap->num_qp,
+    .vsi_id = vsi_id,
+  };
+
+  for (u16 i = 0; i < ap->num_qp; i++)
+    ci->qpair[i] = (virtchnl_queue_pair_info_t){
+      .rxq = { .vsi_id = vsi_id, .queue_id = i },
+      .txq = { .vsi_id = vsi_id, .queue_id = i },
+    };
+
+  foreach_vnet_dev_port_rx_queue (q, port)
+    {
+      iavf_rxq_t *arq = vnet_dev_get_rx_queue_data (q);
+      qpi = ci->qpair + q->queue_id;
+      qpi->rxq.ring_len = q->size;
+      qpi->rxq.databuffer_size = data_size;
+      qpi->rxq.dma_ring_addr = vnet_dev_get_dma_addr (vm, dev, arq->descs);
+      qpi->rxq.max_pkt_size = max_frame_size;
+    }
+
+  foreach_vnet_dev_port_tx_queue (q, port)
+    {
+      iavf_txq_t *atq = vnet_dev_get_tx_queue_data (q);
+      qpi = ci->qpair + q->queue_id;
+      qpi->txq.ring_len = q->size;
+      qpi->txq.dma_ring_addr = vnet_dev_get_dma_addr (vm, dev, atq->descs);
+    }
+
+  return iavf_vc_op_config_vsi_queues (vm, dev, ci);
+}
+
+vnet_dev_rv_t
+iavf_port_rx_irq_enable_disable (vlib_main_t *vm, vnet_dev_port_t *port,
+                                int enable)
+{
+  vnet_dev_t *dev = port->dev;
+  iavf_device_t *ad = vnet_dev_get_data (dev);
+  iavf_port_t *ap = vnet_dev_get_port_data (port);
+  u16 n_threads = vlib_get_n_threads ();
+  u8 buffer[VIRTCHNL_MSG_SZ (virtchnl_irq_map_info_t, vecmap, n_threads)];
+  virtchnl_irq_map_info_t *im = (virtchnl_irq_map_info_t *) buffer;
+  vnet_dev_rv_t rv;
+
+  if (port->attr.caps.interrupt_mode)
+    {
+      *im = (virtchnl_irq_map_info_t){
+       .num_vectors = n_threads,
+      };
+      for (u16 i = 0; i < im->num_vectors; i++)
+       im->vecmap[i] = (virtchnl_vector_map_t){
+         .vsi_id = ap->vsi_id,
+         .vector_id = i + 1,
+       };
+      if (enable)
+       foreach_vnet_dev_port_rx_queue (rxq, port)
+         if (rxq->enabled)
+           im->vecmap[rxq->rx_thread_index].rxq_map |= 1 << rxq->queue_id;
+    }
+  else
+    {
+      *im = (virtchnl_irq_map_info_t){
+       .num_vectors = 1,
+       .vecmap[0] = {
+           .vsi_id = ap->vsi_id,
+           .vector_id = 1,
+       },
+      };
+      if (enable)
+       foreach_vnet_dev_port_rx_queue (rxq, port)
+         if (rxq->enabled)
+           im->vecmap[0].rxq_map |= 1 << rxq->queue_id;
+    }
+
+  if ((rv = iavf_vc_op_config_irq_map (vm, dev, im)))
+    return rv;
+
+  for (int i = 0; i < im->num_vectors; i++)
+    {
+      u32 val;
+
+      if (enable == 0)
+       val = dyn_ctln_disabled.as_u32;
+      else if (ap->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_WB_ON_ITR)
+       val = dyn_ctln_wb_on_itr.as_u32;
+      else
+       val = dyn_ctln_enabled.as_u32;
+
+      iavf_reg_write (ad, AVFINT_DYN_CTLN (im->vecmap[i].vector_id), val);
+    }
+
+  return rv;
+}
+
+vnet_dev_rv_t
+iavf_port_init (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+  vnet_dev_rv_t rv;
+
+  log_debug (port->dev, "port %u", port->port_id);
+
+  if ((rv = iavf_port_vlan_strip_disable (vm, port)))
+    return rv;
+
+  if ((rv = iavf_port_init_rss (vm, port)))
+    return rv;
+
+  if (port->dev->poll_stats)
+    iavf_port_add_counters (vm, port);
+
+  return VNET_DEV_OK;
+}
+
+static vnet_dev_rv_t
+iavf_enable_disable_queues (vlib_main_t *vm, vnet_dev_port_t *port, int enable)
+{
+  iavf_port_t *ap = vnet_dev_get_port_data (port);
+
+  virtchnl_queue_select_t qs = {
+    .vsi_id = ap->vsi_id,
+  };
+
+  foreach_vnet_dev_port_rx_queue (q, port)
+    if ((enable && q->enabled) || (!enable && q->started))
+      qs.rx_queues |= 1ULL << q->queue_id;
+
+  foreach_vnet_dev_port_tx_queue (q, port)
+    if ((enable && q->enabled) || (!enable && q->started))
+      qs.tx_queues |= 1ULL << q->queue_id;
+
+  return enable ? iavf_vc_op_enable_queues (vm, port->dev, &qs) :
+                       iavf_vc_op_disable_queues (vm, port->dev, &qs);
+}
+
+vnet_dev_rv_t
+iavf_port_start (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+  vnet_dev_rv_t rv;
+
+  log_debug (port->dev, "port %u", port->port_id);
+
+  foreach_vnet_dev_port_rx_queue (q, port)
+    if (q->enabled)
+      if ((rv = iavf_rx_queue_start (vm, q)))
+       goto done;
+
+  foreach_vnet_dev_port_tx_queue (q, port)
+    if ((rv = iavf_tx_queue_start (vm, q)))
+      goto done;
+
+  if ((rv = iavf_port_update_rss_lut (vm, port)))
+    goto done;
+
+  /* configure qpairs */
+  if ((rv = iavf_port_init_vsi_queues (vm, port)))
+    goto done;
+
+  if ((rv = iavf_port_rx_irq_enable_disable (vm, port, /* enable */ 1)))
+    goto done;
+
+  if ((rv = iavf_enable_disable_queues (vm, port, 1)))
+    goto done;
+
+  if (port->dev->poll_stats)
+    vnet_dev_poll_port_add (vm, port, 1, iavf_port_poll_stats);
+
+done:
+  if (rv)
+    {
+      foreach_vnet_dev_port_rx_queue (q, port)
+       iavf_rx_queue_stop (vm, q);
+      foreach_vnet_dev_port_tx_queue (q, port)
+       iavf_tx_queue_stop (vm, q);
+    }
+  return rv;
+}
+
+void
+iavf_port_stop (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+  log_debug (port->dev, "port %u", port->port_id);
+
+  iavf_enable_disable_queues (vm, port, /* enable */ 0);
+  iavf_port_rx_irq_enable_disable (vm, port, /* disable */ 0);
+
+  if (port->dev->poll_stats)
+    vnet_dev_poll_port_remove (vm, port, iavf_port_poll_stats);
+
+  foreach_vnet_dev_port_rx_queue (rxq, port)
+    iavf_rx_queue_stop (vm, rxq);
+
+  foreach_vnet_dev_port_tx_queue (txq, port)
+    iavf_tx_queue_stop (vm, txq);
+
+  vnet_dev_port_state_change (vm, port,
+                             (vnet_dev_port_state_changes_t){
+                               .change.link_state = 1,
+                               .change.link_speed = 1,
+                               .link_speed = 0,
+                               .link_state = 0,
+                             });
+}
+
+vnet_dev_rv_t
+iavf_port_cfg_change_validate (vlib_main_t *vm, vnet_dev_port_t *port,
+                              vnet_dev_port_cfg_change_req_t *req)
+{
+  vnet_dev_rv_t rv = VNET_DEV_OK;
+
+  switch (req->type)
+    {
+    case VNET_DEV_PORT_CFG_MAX_FRAME_SIZE:
+      if (port->started)
+       rv = VNET_DEV_ERR_PORT_STARTED;
+      break;
+
+    case VNET_DEV_PORT_CFG_PROMISC_MODE:
+    case VNET_DEV_PORT_CFG_CHANGE_PRIMARY_HW_ADDR:
+    case VNET_DEV_PORT_CFG_ADD_SECONDARY_HW_ADDR:
+    case VNET_DEV_PORT_CFG_REMOVE_SECONDARY_HW_ADDR:
+      break;
+
+    default:
+      rv = VNET_DEV_ERR_NOT_SUPPORTED;
+    };
+
+  return rv;
+}
+
+static vnet_dev_rv_t
+iavf_port_add_del_eth_addr (vlib_main_t *vm, vnet_dev_port_t *port,
+                           vnet_dev_hw_addr_t *addr, int is_add,
+                           int is_primary)
+{
+  iavf_port_t *ap = vnet_dev_get_port_data (port);
+  virtchnl_ether_addr_list_t al = {
+    .vsi_id = ap->vsi_id,
+    .num_elements = 1,
+    .list[0].primary = is_primary ? 1 : 0,
+    .list[0].extra = is_primary ? 0 : 1,
+  };
+
+  clib_memcpy (al.list[0].addr, addr, sizeof (al.list[0].addr));
+
+  return is_add ? iavf_vc_op_add_eth_addr (vm, port->dev, &al) :
+                       iavf_vc_op_del_eth_addr (vm, port->dev, &al);
+}
+
+vnet_dev_rv_t
+iavf_port_cfg_change (vlib_main_t *vm, vnet_dev_port_t *port,
+                     vnet_dev_port_cfg_change_req_t *req)
+{
+  vnet_dev_t *dev = port->dev;
+  iavf_port_t *ap = vnet_dev_get_port_data (port);
+  vnet_dev_rv_t rv = VNET_DEV_OK;
+
+  switch (req->type)
+    {
+    case VNET_DEV_PORT_CFG_PROMISC_MODE:
+      {
+       virtchnl_promisc_info_t pi = {
+         .vsi_id = ap->vsi_id,
+         .unicast_promisc = req->promisc,
+         .multicast_promisc = req->promisc,
+       };
+
+       rv = iavf_vc_op_config_promisc_mode (vm, dev, &pi);
+      }
+      break;
+
+    case VNET_DEV_PORT_CFG_CHANGE_PRIMARY_HW_ADDR:
+      rv = iavf_port_add_del_eth_addr (vm, port, &port->primary_hw_addr,
+                                      /* is_add */ 0,
+                                      /* is_primary */ 1);
+      if (rv == VNET_DEV_OK)
+       rv = iavf_port_add_del_eth_addr (vm, port, &req->addr,
+                                        /* is_add */ 1,
+                                        /* is_primary */ 1);
+      break;
+
+    case VNET_DEV_PORT_CFG_ADD_SECONDARY_HW_ADDR:
+      rv = iavf_port_add_del_eth_addr (vm, port, &req->addr,
+                                      /* is_add */ 1,
+                                      /* is_primary */ 0);
+      break;
+
+    case VNET_DEV_PORT_CFG_REMOVE_SECONDARY_HW_ADDR:
+      rv = iavf_port_add_del_eth_addr (vm, port, &req->addr,
+                                      /* is_add */ 0,
+                                      /* is_primary */ 0);
+      break;
+
+    case VNET_DEV_PORT_CFG_MAX_FRAME_SIZE:
+      break;
+
+    default:
+      return VNET_DEV_ERR_NOT_SUPPORTED;
+    };
+
+  return rv;
+}
diff --git a/src/plugins/dev_iavf/queue.c b/src/plugins/dev_iavf/queue.c
new file mode 100644 (file)
index 0000000..113c0db
--- /dev/null
@@ -0,0 +1,178 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/pci.h>
+#include <vnet/dev/counters.h>
+#include <vppinfra/ring.h>
+#include <dev_iavf/iavf.h>
+#include <dev_iavf/virtchnl.h>
+#include <dev_iavf/virtchnl_funcs.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+
+VLIB_REGISTER_LOG_CLASS (iavf_log, static) = {
+  .class_name = "iavf",
+  .subclass_name = "queue",
+};
+
+vnet_dev_rv_t
+iavf_rx_queue_alloc (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq)
+{
+  vnet_dev_port_t *port = rxq->port;
+  vnet_dev_t *dev = port->dev;
+  iavf_device_t *ad = vnet_dev_get_data (dev);
+  iavf_rxq_t *arq = vnet_dev_get_rx_queue_data (rxq);
+  vnet_dev_rv_t rv;
+
+  arq->buffer_indices = clib_mem_alloc_aligned (
+    rxq->size * sizeof (arq->buffer_indices[0]), CLIB_CACHE_LINE_BYTES);
+
+  if ((rv =
+        vnet_dev_dma_mem_alloc (vm, dev, sizeof (iavf_rx_desc_t) * rxq->size,
+                                0, (void **) &arq->descs)))
+    return rv;
+
+  arq->qrx_tail = ad->bar0 + IAVF_QRX_TAIL (rxq->queue_id);
+
+  log_debug (dev, "queue %u alocated", rxq->queue_id);
+  return rv;
+}
+
+void
+iavf_rx_queue_free (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq)
+{
+  vnet_dev_t *dev = rxq->port->dev;
+  iavf_rxq_t *aq = vnet_dev_get_rx_queue_data (rxq);
+
+  log_debug (dev, "queue %u", rxq->queue_id);
+
+  vnet_dev_dma_mem_free (vm, dev, aq->descs);
+
+  foreach_pointer (p, aq->buffer_indices)
+    if (p)
+      clib_mem_free (p);
+}
+
+vnet_dev_rv_t
+iavf_tx_queue_alloc (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
+{
+  vnet_dev_t *dev = txq->port->dev;
+  iavf_device_t *ad = vnet_dev_get_data (dev);
+  iavf_txq_t *atq = vnet_dev_get_tx_queue_data (txq);
+  vnet_dev_rv_t rv;
+
+  if ((rv =
+        vnet_dev_dma_mem_alloc (vm, dev, sizeof (iavf_tx_desc_t) * txq->size,
+                                0, (void **) &atq->descs)))
+    return rv;
+
+  clib_ring_new_aligned (atq->rs_slots, 32, CLIB_CACHE_LINE_BYTES);
+  atq->buffer_indices = clib_mem_alloc_aligned (
+    txq->size * sizeof (atq->buffer_indices[0]), CLIB_CACHE_LINE_BYTES);
+  atq->tmp_descs = clib_mem_alloc_aligned (
+    sizeof (atq->tmp_descs[0]) * txq->size, CLIB_CACHE_LINE_BYTES);
+  atq->tmp_bufs = clib_mem_alloc_aligned (
+    sizeof (atq->tmp_bufs[0]) * txq->size, CLIB_CACHE_LINE_BYTES);
+
+  atq->qtx_tail = ad->bar0 + IAVF_QTX_TAIL (txq->queue_id);
+
+  log_debug (dev, "queue %u alocated", txq->queue_id);
+  return VNET_DEV_OK;
+}
+
+void
+iavf_tx_queue_free (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
+{
+  vnet_dev_t *dev = txq->port->dev;
+  iavf_txq_t *atq = vnet_dev_get_tx_queue_data (txq);
+  iavf_txq_t *aq = vnet_dev_get_tx_queue_data (txq);
+
+  log_debug (dev, "queue %u", txq->queue_id);
+  vnet_dev_dma_mem_free (vm, dev, aq->descs);
+  clib_ring_free (atq->rs_slots);
+
+  foreach_pointer (p, aq->tmp_descs, aq->tmp_bufs, aq->buffer_indices)
+    if (p)
+      clib_mem_free (p);
+}
+
+vnet_dev_rv_t
+iavf_rx_queue_start (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq)
+{
+  vnet_dev_t *dev = rxq->port->dev;
+  iavf_rxq_t *arq = vnet_dev_get_rx_queue_data (rxq);
+  iavf_rx_desc_t *d = arq->descs;
+  u32 n_enq, *bi = arq->buffer_indices;
+  u8 bpi = vnet_dev_get_rx_queue_buffer_pool_index (rxq);
+
+  n_enq = vlib_buffer_alloc_from_pool (vm, bi, rxq->size - 8, bpi);
+
+  if (n_enq < 8)
+    {
+      if (n_enq)
+       vlib_buffer_free (vm, bi, n_enq);
+      return VNET_DEV_ERR_BUFFER_ALLOC_FAIL;
+    }
+
+  for (u32 i = 0; i < n_enq; i++)
+    {
+      vlib_buffer_t *b = vlib_get_buffer (vm, bi[i]);
+      u64 dma_addr = vnet_dev_get_dma_addr (vm, dev, b->data);
+      d[i] = (iavf_rx_desc_t){ .addr = dma_addr };
+    }
+
+  arq->n_enqueued = n_enq;
+  arq->next = 0;
+  __atomic_store_n (arq->qrx_tail, n_enq, __ATOMIC_RELEASE);
+  return VNET_DEV_OK;
+}
+
+void
+iavf_rx_queue_stop (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq)
+{
+  iavf_rxq_t *arq = vnet_dev_get_rx_queue_data (rxq);
+
+  __atomic_store_n (arq->qrx_tail, 0, __ATOMIC_RELAXED);
+  if (arq->n_enqueued)
+    {
+      vlib_buffer_free_from_ring_no_next (vm, arq->buffer_indices, arq->next,
+                                         rxq->size, arq->n_enqueued);
+      log_debug (rxq->port->dev, "%u buffers freed from rx queue %u",
+                arq->n_enqueued, rxq->queue_id);
+    }
+  arq->n_enqueued = arq->next = 0;
+}
+
+vnet_dev_rv_t
+iavf_tx_queue_start (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
+{
+  iavf_txq_t *atq = vnet_dev_get_tx_queue_data (txq);
+  atq->next = 0;
+  atq->n_enqueued = 0;
+  clib_ring_reset (atq->rs_slots);
+  __atomic_store_n (atq->qtx_tail, 0, __ATOMIC_RELAXED);
+  return VNET_DEV_OK;
+}
+
+void
+iavf_tx_queue_stop (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
+{
+  iavf_txq_t *atq = vnet_dev_get_tx_queue_data (txq);
+
+  log_debug (txq->port->dev, "queue %u", txq->queue_id);
+
+  __atomic_store_n (atq->qtx_tail, 0, __ATOMIC_RELAXED);
+  if (atq->n_enqueued)
+    {
+      vlib_buffer_free_from_ring_no_next (vm, atq->buffer_indices,
+                                         atq->next - atq->n_enqueued,
+                                         txq->size, atq->n_enqueued);
+      log_debug (txq->port->dev, "%u buffers freed from tx queue %u",
+                atq->n_enqueued, txq->queue_id);
+    }
+  atq->n_enqueued = atq->next = 0;
+}
diff --git a/src/plugins/dev_iavf/rx_node.c b/src/plugins/dev_iavf/rx_node.c
new file mode 100644 (file)
index 0000000..946adb1
--- /dev/null
@@ -0,0 +1,529 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/dev/dev.h>
+#include <vnet/ethernet/ethernet.h>
+#include <dev_iavf/iavf.h>
+
+#define IAVF_RX_REFILL_TRESHOLD 32
+
+static const iavf_rx_desc_qw1_t mask_eop = { .eop = 1 };
+static const iavf_rx_desc_qw1_t mask_flm = { .flm = 1 };
+static const iavf_rx_desc_qw1_t mask_dd = { .dd = 1 };
+static const iavf_rx_desc_qw1_t mask_ipe = { .ipe = 1 };
+static const iavf_rx_desc_qw1_t mask_dd_eop = { .dd = 1, .eop = 1 };
+
+static_always_inline int
+iavf_rxd_is_not_eop (iavf_rx_desc_t *d)
+{
+  return (d->qw1.as_u64 & mask_eop.as_u64) == 0;
+}
+
+static_always_inline int
+iavf_rxd_is_not_dd (iavf_rx_desc_t *d)
+{
+  return (d->qw1.as_u64 & mask_dd.as_u64) == 0;
+}
+
+static_always_inline void
+iavf_rx_desc_write (iavf_rx_desc_t *d, u64 addr)
+{
+#ifdef CLIB_HAVE_VEC256
+  *(u64x4 *) d = (u64x4){ addr, 0, 0, 0 };
+#else
+  d->qword[0] = addr;
+  d->qword[1] = 0;
+#endif
+}
+
+static_always_inline void
+iavf_rxq_refill (vlib_main_t *vm, vlib_node_runtime_t *node,
+                vnet_dev_rx_queue_t *rxq, int use_va_dma)
+{
+  u16 n_refill, mask, n_alloc, slot, size;
+  iavf_rxq_t *arq = vnet_dev_get_rx_queue_data (rxq);
+  vlib_buffer_t *b[8];
+  iavf_rx_desc_t *d, *first_d;
+  void *p[8];
+
+  size = rxq->size;
+  mask = size - 1;
+  n_refill = mask - arq->n_enqueued;
+  if (PREDICT_TRUE (n_refill <= IAVF_RX_REFILL_TRESHOLD))
+    return;
+
+  slot = (arq->next - n_refill - 1) & mask;
+
+  n_refill &= ~7; /* round to 8 */
+  n_alloc = vlib_buffer_alloc_to_ring_from_pool (
+    vm, arq->buffer_indices, slot, size, n_refill,
+    vnet_dev_get_rx_queue_buffer_pool_index (rxq));
+
+  if (PREDICT_FALSE (n_alloc != n_refill))
+    {
+      vlib_error_count (vm, node->node_index, IAVF_RX_NODE_CTR_BUFFER_ALLOC,
+                       1);
+      if (n_alloc)
+       vlib_buffer_free_from_ring (vm, arq->buffer_indices, slot, size,
+                                   n_alloc);
+      return;
+    }
+
+  arq->n_enqueued += n_alloc;
+  first_d = arq->descs;
+
+  ASSERT (slot % 8 == 0);
+
+  while (n_alloc >= 8)
+    {
+      d = first_d + slot;
+
+      if (use_va_dma)
+       {
+         vlib_get_buffers_with_offset (vm, arq->buffer_indices + slot, p, 8,
+                                       sizeof (vlib_buffer_t));
+         iavf_rx_desc_write (d + 0, pointer_to_uword (p[0]));
+         iavf_rx_desc_write (d + 1, pointer_to_uword (p[1]));
+         iavf_rx_desc_write (d + 2, pointer_to_uword (p[2]));
+         iavf_rx_desc_write (d + 3, pointer_to_uword (p[3]));
+         iavf_rx_desc_write (d + 4, pointer_to_uword (p[4]));
+         iavf_rx_desc_write (d + 5, pointer_to_uword (p[5]));
+         iavf_rx_desc_write (d + 6, pointer_to_uword (p[6]));
+         iavf_rx_desc_write (d + 7, pointer_to_uword (p[7]));
+       }
+      else
+       {
+         vlib_get_buffers (vm, arq->buffer_indices + slot, b, 8);
+         iavf_rx_desc_write (d + 0, vlib_buffer_get_pa (vm, b[0]));
+         iavf_rx_desc_write (d + 1, vlib_buffer_get_pa (vm, b[1]));
+         iavf_rx_desc_write (d + 2, vlib_buffer_get_pa (vm, b[2]));
+         iavf_rx_desc_write (d + 3, vlib_buffer_get_pa (vm, b[3]));
+         iavf_rx_desc_write (d + 4, vlib_buffer_get_pa (vm, b[4]));
+         iavf_rx_desc_write (d + 5, vlib_buffer_get_pa (vm, b[5]));
+         iavf_rx_desc_write (d + 6, vlib_buffer_get_pa (vm, b[6]));
+         iavf_rx_desc_write (d + 7, vlib_buffer_get_pa (vm, b[7]));
+       }
+
+      /* next */
+      slot = (slot + 8) & mask;
+      n_alloc -= 8;
+    }
+
+  __atomic_store_n (arq->qrx_tail, slot, __ATOMIC_RELEASE);
+}
+
+static_always_inline uword
+iavf_rx_attach_tail (vlib_main_t *vm, vlib_buffer_template_t *bt,
+                    vlib_buffer_t *b, u64 qw1, iavf_rx_tail_t *t)
+{
+  vlib_buffer_t *hb = b;
+  u32 tlnifb = 0, i = 0;
+
+  if (qw1 & mask_eop.as_u64)
+    return 0;
+
+  while ((qw1 & mask_eop.as_u64) == 0)
+    {
+      ASSERT (i < IAVF_RX_MAX_DESC_IN_CHAIN - 1);
+      ASSERT (qw1 & mask_dd.as_u64);
+      qw1 = t->qw1s[i];
+      b->next_buffer = t->buffers[i];
+      b->flags |= VLIB_BUFFER_NEXT_PRESENT;
+      b = vlib_get_buffer (vm, b->next_buffer);
+      b->template = *bt;
+      tlnifb += b->current_length = ((iavf_rx_desc_qw1_t) qw1).length;
+      i++;
+    }
+
+  hb->total_length_not_including_first_buffer = tlnifb;
+  hb->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
+  return tlnifb;
+}
+
+static_always_inline void
+iavf_process_flow_offload (vnet_dev_port_t *port, iavf_rt_data_t *rtd,
+                          uword n_rx_packets)
+{
+  uword n;
+  iavf_flow_lookup_entry_t fle;
+  iavf_port_t *ap = vnet_dev_get_port_data (port);
+
+  for (n = 0; n < n_rx_packets; n++)
+    {
+      if ((rtd->qw1s[n] & mask_flm.as_u64) == 0)
+       continue;
+
+      fle = *pool_elt_at_index (ap->flow_lookup_entries, rtd->flow_ids[n]);
+
+      if (fle.next_index != (u16) ~0)
+       rtd->next[n] = fle.next_index;
+
+      if (fle.flow_id != ~0)
+       rtd->bufs[n]->flow_id = fle.flow_id;
+
+      if (fle.buffer_advance != ~0)
+       vlib_buffer_advance (rtd->bufs[n], fle.buffer_advance);
+    }
+}
+
+static_always_inline uword
+iavf_process_rx_burst (vlib_main_t *vm, vlib_node_runtime_t *node,
+                      vnet_dev_rx_queue_t *rxq, iavf_rt_data_t *rtd,
+                      vlib_buffer_template_t *bt, u32 n_left,
+                      int maybe_multiseg)
+{
+  vlib_buffer_t **b = rtd->bufs;
+  u64 *qw1 = rtd->qw1s;
+  iavf_rx_tail_t *tail = rtd->tails;
+  uword n_rx_bytes = 0;
+
+  while (n_left >= 4)
+    {
+      if (n_left >= 12)
+       {
+         vlib_prefetch_buffer_header (b[8], LOAD);
+         vlib_prefetch_buffer_header (b[9], LOAD);
+         vlib_prefetch_buffer_header (b[10], LOAD);
+         vlib_prefetch_buffer_header (b[11], LOAD);
+       }
+
+      b[0]->template = *bt;
+      b[1]->template = *bt;
+      b[2]->template = *bt;
+      b[3]->template = *bt;
+
+      n_rx_bytes += b[0]->current_length =
+       ((iavf_rx_desc_qw1_t) qw1[0]).length;
+      n_rx_bytes += b[1]->current_length =
+       ((iavf_rx_desc_qw1_t) qw1[1]).length;
+      n_rx_bytes += b[2]->current_length =
+       ((iavf_rx_desc_qw1_t) qw1[2]).length;
+      n_rx_bytes += b[3]->current_length =
+       ((iavf_rx_desc_qw1_t) qw1[3]).length;
+
+      if (maybe_multiseg)
+       {
+         n_rx_bytes += iavf_rx_attach_tail (vm, bt, b[0], qw1[0], tail + 0);
+         n_rx_bytes += iavf_rx_attach_tail (vm, bt, b[1], qw1[1], tail + 1);
+         n_rx_bytes += iavf_rx_attach_tail (vm, bt, b[2], qw1[2], tail + 2);
+         n_rx_bytes += iavf_rx_attach_tail (vm, bt, b[3], qw1[3], tail + 3);
+       }
+
+      /* next */
+      qw1 += 4;
+      tail += 4;
+      b += 4;
+      n_left -= 4;
+    }
+
+  while (n_left)
+    {
+      b[0]->template = *bt;
+
+      n_rx_bytes += b[0]->current_length =
+       ((iavf_rx_desc_qw1_t) qw1[0]).length;
+
+      if (maybe_multiseg)
+       n_rx_bytes += iavf_rx_attach_tail (vm, bt, b[0], qw1[0], tail + 0);
+
+      /* next */
+      qw1 += 1;
+      tail += 1;
+      b += 1;
+      n_left -= 1;
+    }
+  return n_rx_bytes;
+}
+
+static_always_inline uword
+iavf_device_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+                         vlib_frame_t *frame, vnet_dev_port_t *port,
+                         vnet_dev_rx_queue_t *rxq, int with_flows)
+{
+  vnet_main_t *vnm = vnet_get_main ();
+  u32 thr_idx = vlib_get_thread_index ();
+  iavf_rt_data_t *rtd = vnet_dev_get_rt_temp_space (vm);
+  iavf_rxq_t *arq = vnet_dev_get_rx_queue_data (rxq);
+  vlib_buffer_template_t bt = rxq->buffer_template;
+  u32 n_trace, n_rx_packets = 0, n_rx_bytes = 0;
+  u16 n_tail_desc = 0;
+  u64 or_qw1 = 0;
+  u32 *bi, *to_next, n_left_to_next;
+  u32 next_index = rxq->next_index;
+  u32 sw_if_index = port->intf.sw_if_index;
+  u32 hw_if_index = port->intf.hw_if_index;
+  u16 next = arq->next;
+  u16 size = rxq->size;
+  u16 mask = size - 1;
+  iavf_rx_desc_t *d, *descs = arq->descs;
+#ifdef CLIB_HAVE_VEC256
+  u64x4 q1x4, or_q1x4 = { 0 };
+  u32x4 fdidx4;
+  u64x4 dd_eop_mask4 = u64x4_splat (mask_dd_eop.as_u64);
+#elif defined(CLIB_HAVE_VEC128)
+  u32x4 q1x4_lo, q1x4_hi, or_q1x4 = { 0 };
+  u32x4 fdidx4;
+  u32x4 dd_eop_mask4 = u32x4_splat (mask_dd_eop.as_u64);
+#endif
+  int single_next = 1;
+
+  /* is there anything on the ring */
+  d = descs + next;
+  if ((d->qword[1] & mask_dd.as_u64) == 0)
+    goto done;
+
+  vlib_get_new_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+  /* fetch up to IAVF_RX_VECTOR_SZ from the rx ring, unflatten them and
+     copy needed data from descriptor to rx vector */
+  bi = to_next;
+
+  while (n_rx_packets < IAVF_RX_VECTOR_SZ)
+    {
+      if (next + 11 < size)
+       {
+         int stride = 8;
+         clib_prefetch_load ((void *) (descs + (next + stride)));
+         clib_prefetch_load ((void *) (descs + (next + stride + 1)));
+         clib_prefetch_load ((void *) (descs + (next + stride + 2)));
+         clib_prefetch_load ((void *) (descs + (next + stride + 3)));
+       }
+
+#ifdef CLIB_HAVE_VEC256
+      if (n_rx_packets >= IAVF_RX_VECTOR_SZ - 4 || next >= size - 4)
+       goto one_by_one;
+
+      q1x4 = u64x4_gather ((void *) &d[0].qword[1], (void *) &d[1].qword[1],
+                          (void *) &d[2].qword[1], (void *) &d[3].qword[1]);
+
+      /* not all packets are ready or at least one of them is chained */
+      if (!u64x4_is_equal (q1x4 & dd_eop_mask4, dd_eop_mask4))
+       goto one_by_one;
+
+      or_q1x4 |= q1x4;
+
+      u64x4_store_unaligned (q1x4, rtd->qw1s + n_rx_packets);
+#elif defined(CLIB_HAVE_VEC128)
+      if (n_rx_packets >= IAVF_RX_VECTOR_SZ - 4 || next >= size - 4)
+       goto one_by_one;
+
+      q1x4_lo =
+       u32x4_gather ((void *) &d[0].qword[1], (void *) &d[1].qword[1],
+                     (void *) &d[2].qword[1], (void *) &d[3].qword[1]);
+
+      /* not all packets are ready or at least one of them is chained */
+      if (!u32x4_is_equal (q1x4_lo & dd_eop_mask4, dd_eop_mask4))
+       goto one_by_one;
+
+      q1x4_hi = u32x4_gather (
+       (void *) &d[0].qword[1] + 4, (void *) &d[1].qword[1] + 4,
+       (void *) &d[2].qword[1] + 4, (void *) &d[3].qword[1] + 4);
+
+      or_q1x4 |= q1x4_lo;
+      rtd->qw1s[n_rx_packets + 0] = (u64) q1x4_hi[0] << 32 | (u64) q1x4_lo[0];
+      rtd->qw1s[n_rx_packets + 1] = (u64) q1x4_hi[1] << 32 | (u64) q1x4_lo[1];
+      rtd->qw1s[n_rx_packets + 2] = (u64) q1x4_hi[2] << 32 | (u64) q1x4_lo[2];
+      rtd->qw1s[n_rx_packets + 3] = (u64) q1x4_hi[3] << 32 | (u64) q1x4_lo[3];
+#endif
+#if defined(CLIB_HAVE_VEC256) || defined(CLIB_HAVE_VEC128)
+
+      if (with_flows)
+       {
+         fdidx4 = u32x4_gather (
+           (void *) &d[0].fdid_flex_hi, (void *) &d[1].fdid_flex_hi,
+           (void *) &d[2].fdid_flex_hi, (void *) &d[3].fdid_flex_hi);
+         u32x4_store_unaligned (fdidx4, rtd->flow_ids + n_rx_packets);
+       }
+
+      vlib_buffer_copy_indices (bi, arq->buffer_indices + next, 4);
+
+      /* next */
+      next = (next + 4) & mask;
+      d = descs + next;
+      n_rx_packets += 4;
+      bi += 4;
+      continue;
+    one_by_one:
+#endif
+      clib_prefetch_load ((void *) (descs + ((next + 8) & mask)));
+
+      if (iavf_rxd_is_not_dd (d))
+       break;
+
+      bi[0] = arq->buffer_indices[next];
+
+      /* deal with chained buffers */
+      if (PREDICT_FALSE (iavf_rxd_is_not_eop (d)))
+       {
+         u16 tail_desc = 0;
+         u16 tail_next = next;
+         iavf_rx_tail_t *tail = rtd->tails + n_rx_packets;
+         iavf_rx_desc_t *td;
+         do
+           {
+             tail_next = (tail_next + 1) & mask;
+             td = descs + tail_next;
+
+             /* bail out in case of incomplete transaction */
+             if (iavf_rxd_is_not_dd (td))
+               goto no_more_desc;
+
+             or_qw1 |= tail->qw1s[tail_desc] = td[0].qword[1];
+             tail->buffers[tail_desc] = arq->buffer_indices[tail_next];
+             tail_desc++;
+           }
+         while (iavf_rxd_is_not_eop (td));
+         next = tail_next;
+         n_tail_desc += tail_desc;
+       }
+
+      or_qw1 |= rtd->qw1s[n_rx_packets] = d[0].qword[1];
+      if (PREDICT_FALSE (with_flows))
+       {
+         rtd->flow_ids[n_rx_packets] = d[0].fdid_flex_hi;
+       }
+
+      /* next */
+      next = (next + 1) & mask;
+      d = descs + next;
+      n_rx_packets++;
+      bi++;
+    }
+no_more_desc:
+
+  if (n_rx_packets == 0)
+    goto done;
+
+  arq->next = next;
+  arq->n_enqueued -= n_rx_packets + n_tail_desc;
+
+  /* avoid eating our own tail */
+  arq->descs[(next + arq->n_enqueued) & mask].qword[1] = 0;
+
+#if defined(CLIB_HAVE_VEC256) || defined(CLIB_HAVE_VEC128)
+  or_qw1 |= or_q1x4[0] | or_q1x4[1] | or_q1x4[2] | or_q1x4[3];
+#endif
+
+  vlib_get_buffers (vm, to_next, rtd->bufs, n_rx_packets);
+
+  n_rx_bytes =
+    n_tail_desc ?
+           iavf_process_rx_burst (vm, node, rxq, rtd, &bt, n_rx_packets, 1) :
+           iavf_process_rx_burst (vm, node, rxq, rtd, &bt, n_rx_packets, 0);
+
+  /* the MARKed packets may have different next nodes */
+  if (PREDICT_FALSE (with_flows && (or_qw1 & mask_flm.as_u64)))
+    {
+      u32 n;
+      single_next = 0;
+      for (n = 0; n < n_rx_packets; n++)
+       rtd->next[n] = next_index;
+
+      iavf_process_flow_offload (port, rtd, n_rx_packets);
+    }
+
+  /* packet trace if enabled */
+  if (PREDICT_FALSE ((n_trace = vlib_get_trace_count (vm, node))))
+    {
+      u32 n_left = n_rx_packets;
+      u32 i, j;
+      u16 *next_indices = rtd->next;
+
+      i = 0;
+      while (n_trace && n_left)
+       {
+         vlib_buffer_t *b = rtd->bufs[i];
+         if (PREDICT_FALSE (single_next == 0))
+           next_index = next_indices[0];
+
+         if (PREDICT_TRUE (vlib_trace_buffer (vm, node, next_index, b,
+                                              /* follow_chain */ 0)))
+           {
+             iavf_rx_trace_t *tr = vlib_add_trace (vm, node, b, sizeof (*tr));
+             tr->next_index = next_index;
+             tr->qid = rxq->queue_id;
+             tr->hw_if_index = hw_if_index;
+             tr->qw1s[0] = rtd->qw1s[i];
+             tr->flow_id =
+               (tr->qw1s[0] & mask_flm.as_u64) ? rtd->flow_ids[i] : 0;
+             for (j = 1; j < IAVF_RX_MAX_DESC_IN_CHAIN; j++)
+               tr->qw1s[j] = rtd->tails[i].qw1s[j - 1];
+
+             n_trace--;
+           }
+
+         /* next */
+         n_left--;
+         i++;
+         next_indices++;
+       }
+      vlib_set_trace_count (vm, node, n_trace);
+    }
+
+  /* enqueu the packets to the next nodes */
+  if (PREDICT_FALSE (with_flows && (or_qw1 & mask_flm.as_u64)))
+    {
+      /* release next node's frame vector, in this case we use
+        vlib_buffer_enqueue_to_next to place the packets
+       */
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+
+      /* enqueue buffers to the next node */
+      vlib_buffer_enqueue_to_next (vm, node, to_next, rtd->next, n_rx_packets);
+    }
+  else
+    {
+      if (PREDICT_TRUE (next_index == VNET_DEV_ETH_RX_PORT_NEXT_ETH_INPUT))
+       {
+         vlib_next_frame_t *nf;
+         vlib_frame_t *f;
+         ethernet_input_frame_t *ef;
+         nf = vlib_node_runtime_get_next_frame (vm, node, next_index);
+         f = vlib_get_frame (vm, nf->frame);
+         f->flags = ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX;
+
+         ef = vlib_frame_scalar_args (f);
+         ef->sw_if_index = sw_if_index;
+         ef->hw_if_index = hw_if_index;
+
+         if ((or_qw1 & mask_ipe.as_u64) == 0)
+           f->flags |= ETH_INPUT_FRAME_F_IP4_CKSUM_OK;
+         vlib_frame_no_append (f);
+       }
+
+      n_left_to_next -= n_rx_packets;
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+
+  vlib_increment_combined_counter (
+    vnm->interface_main.combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
+    thr_idx, hw_if_index, n_rx_packets, n_rx_bytes);
+
+done:
+  return n_rx_packets;
+}
+
+VNET_DEV_NODE_FN (iavf_rx_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+  u32 n_rx = 0;
+  foreach_vnet_dev_rx_queue_runtime (rxq, node)
+    {
+      vnet_dev_port_t *port = rxq->port;
+      iavf_port_t *ap = vnet_dev_get_port_data (port);
+      if (PREDICT_FALSE (ap->flow_offload))
+       n_rx += iavf_device_input_inline (vm, node, frame, port, rxq, 1);
+      else
+       n_rx += iavf_device_input_inline (vm, node, frame, port, rxq, 0);
+
+      /* refill rx ring */
+      if (rxq->port->dev->va_dma)
+       iavf_rxq_refill (vm, node, rxq, 1 /* use_va_dma */);
+      else
+       iavf_rxq_refill (vm, node, rxq, 0 /* use_va_dma */);
+    }
+
+  return n_rx;
+}
diff --git a/src/plugins/dev_iavf/tx_node.c b/src/plugins/dev_iavf/tx_node.c
new file mode 100644 (file)
index 0000000..451db80
--- /dev/null
@@ -0,0 +1,517 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vppinfra/ring.h>
+#include <vppinfra/vector/ip_csum.h>
+
+#include <vnet/dev/dev.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/udp/udp_packet.h>
+#include <vnet/tcp/tcp_packet.h>
+
+#include <dev_iavf/iavf.h>
+
+static_always_inline u8
+iavf_tx_desc_get_dtyp (iavf_tx_desc_t *d)
+{
+  return d->qword[1] & 0x0f;
+}
+
+struct iavf_ip4_psh
+{
+  u32 src;
+  u32 dst;
+  u8 zero;
+  u8 proto;
+  u16 l4len;
+};
+
+struct iavf_ip6_psh
+{
+  ip6_address_t src;
+  ip6_address_t dst;
+  u32 l4len;
+  u32 proto;
+};
+
+static_always_inline u64
+iavf_tx_prepare_cksum (vlib_buffer_t *b, u8 is_tso)
+{
+  u64 flags = 0;
+  if (!is_tso && !(b->flags & VNET_BUFFER_F_OFFLOAD))
+    return 0;
+
+  vnet_buffer_oflags_t oflags = vnet_buffer (b)->oflags;
+  u32 is_tcp = is_tso || oflags & VNET_BUFFER_OFFLOAD_F_TCP_CKSUM;
+  u32 is_udp = !is_tso && oflags & VNET_BUFFER_OFFLOAD_F_UDP_CKSUM;
+
+  if (!is_tcp && !is_udp)
+    return 0;
+
+  u32 is_ip4 = b->flags & VNET_BUFFER_F_IS_IP4;
+  u32 is_ip6 = b->flags & VNET_BUFFER_F_IS_IP6;
+
+  ASSERT (!(is_tcp && is_udp));
+  ASSERT (is_ip4 || is_ip6);
+  i16 l2_hdr_offset = b->current_data;
+  i16 l3_hdr_offset = vnet_buffer (b)->l3_hdr_offset;
+  i16 l4_hdr_offset = vnet_buffer (b)->l4_hdr_offset;
+  u16 l2_len = l3_hdr_offset - l2_hdr_offset;
+  u16 l3_len = l4_hdr_offset - l3_hdr_offset;
+  ip4_header_t *ip4 = (void *) (b->data + l3_hdr_offset);
+  ip6_header_t *ip6 = (void *) (b->data + l3_hdr_offset);
+  tcp_header_t *tcp = (void *) (b->data + l4_hdr_offset);
+  udp_header_t *udp = (void *) (b->data + l4_hdr_offset);
+  u16 l4_len = is_tcp ? tcp_header_bytes (tcp) : sizeof (udp_header_t);
+  u16 sum = 0;
+
+  flags |= IAVF_TXD_OFFSET_MACLEN (l2_len) | IAVF_TXD_OFFSET_IPLEN (l3_len) |
+          IAVF_TXD_OFFSET_L4LEN (l4_len);
+  flags |= is_ip4 ? IAVF_TXD_CMD_IIPT_IPV4 : IAVF_TXD_CMD_IIPT_IPV6;
+  flags |= is_tcp ? IAVF_TXD_CMD_L4T_TCP : IAVF_TXD_CMD_L4T_UDP;
+
+  if (is_ip4)
+    ip4->checksum = 0;
+
+  if (is_tso)
+    {
+      if (is_ip4)
+       ip4->length = 0;
+      else
+       ip6->payload_length = 0;
+    }
+
+  if (is_ip4)
+    {
+      struct iavf_ip4_psh psh = { 0 };
+      psh.src = ip4->src_address.as_u32;
+      psh.dst = ip4->dst_address.as_u32;
+      psh.proto = ip4->protocol;
+      psh.l4len = is_tso ?
+                         0 :
+                         clib_host_to_net_u16 (clib_net_to_host_u16 (ip4->length) -
+                                         (l4_hdr_offset - l3_hdr_offset));
+      sum = ~clib_ip_csum ((u8 *) &psh, sizeof (psh));
+    }
+  else
+    {
+      struct iavf_ip6_psh psh = { 0 };
+      psh.src = ip6->src_address;
+      psh.dst = ip6->dst_address;
+      psh.proto = clib_host_to_net_u32 ((u32) ip6->protocol);
+      psh.l4len = is_tso ? 0 : ip6->payload_length;
+      sum = ~clib_ip_csum ((u8 *) &psh, sizeof (psh));
+    }
+
+  if (is_tcp)
+    tcp->checksum = sum;
+  else
+    udp->checksum = sum;
+  return flags;
+}
+
+static_always_inline u32
+iavf_tx_fill_ctx_desc (vlib_main_t *vm, vnet_dev_tx_queue_t *txq,
+                      iavf_tx_desc_t *d, vlib_buffer_t *b)
+{
+  iavf_txq_t *atq = vnet_dev_get_tx_queue_data (txq);
+  vlib_buffer_t *ctx_ph;
+  u32 *bi = atq->ph_bufs;
+
+next:
+  ctx_ph = vlib_get_buffer (vm, bi[0]);
+  if (PREDICT_FALSE (ctx_ph->ref_count == 255))
+    {
+      bi++;
+      goto next;
+    }
+
+  /* Acquire a reference on the placeholder buffer */
+  ctx_ph->ref_count++;
+
+  u16 l234hdr_sz = vnet_buffer (b)->l4_hdr_offset - b->current_data +
+                  vnet_buffer2 (b)->gso_l4_hdr_sz;
+  u16 tlen = vlib_buffer_length_in_chain (vm, b) - l234hdr_sz;
+  d[0].qword[0] = 0;
+  d[0].qword[1] = IAVF_TXD_DTYP_CTX | IAVF_TXD_CTX_CMD_TSO |
+                 IAVF_TXD_CTX_SEG_MSS (vnet_buffer2 (b)->gso_size) |
+                 IAVF_TXD_CTX_SEG_TLEN (tlen);
+  return bi[0];
+}
+
+static_always_inline void
+iavf_tx_copy_desc (iavf_tx_desc_t *d, iavf_tx_desc_t *s, u32 n_descs)
+{
+#if defined CLIB_HAVE_VEC512
+  while (n_descs >= 8)
+    {
+      u64x8u *dv = (u64x8u *) d;
+      u64x8u *sv = (u64x8u *) s;
+
+      dv[0] = sv[0];
+      dv[1] = sv[1];
+
+      /* next */
+      d += 8;
+      s += 8;
+      n_descs -= 8;
+    }
+#elif defined CLIB_HAVE_VEC256
+  while (n_descs >= 4)
+    {
+      u64x4u *dv = (u64x4u *) d;
+      u64x4u *sv = (u64x4u *) s;
+
+      dv[0] = sv[0];
+      dv[1] = sv[1];
+
+      /* next */
+      d += 4;
+      s += 4;
+      n_descs -= 4;
+    }
+#elif defined CLIB_HAVE_VEC128
+  while (n_descs >= 2)
+    {
+      u64x2u *dv = (u64x2u *) d;
+      u64x2u *sv = (u64x2u *) s;
+
+      dv[0] = sv[0];
+      dv[1] = sv[1];
+
+      /* next */
+      d += 2;
+      s += 2;
+      n_descs -= 2;
+    }
+#endif
+  while (n_descs)
+    {
+      d[0].qword[0] = s[0].qword[0];
+      d[0].qword[1] = s[0].qword[1];
+      d++;
+      s++;
+      n_descs--;
+    }
+}
+
+static_always_inline void
+iavf_tx_fill_data_desc (vlib_main_t *vm, iavf_tx_desc_t *d, vlib_buffer_t *b,
+                       u64 cmd, int use_va_dma)
+{
+  if (use_va_dma)
+    d->qword[0] = vlib_buffer_get_current_va (b);
+  else
+    d->qword[0] = vlib_buffer_get_current_pa (vm, b);
+  d->qword[1] = (((u64) b->current_length) << 34 | cmd | IAVF_TXD_CMD_RSV);
+}
+static_always_inline u16
+iavf_tx_prepare (vlib_main_t *vm, vlib_node_runtime_t *node,
+                vnet_dev_tx_queue_t *txq, u32 *buffers, u32 n_packets,
+                u16 *n_enq_descs, int use_va_dma)
+{
+  iavf_txq_t *atq = vnet_dev_get_tx_queue_data (txq);
+  const u64 cmd_eop = IAVF_TXD_CMD_EOP;
+  u16 n_free_desc, n_desc_left, n_packets_left = n_packets;
+#if defined CLIB_HAVE_VEC512
+  vlib_buffer_t *b[8];
+#else
+  vlib_buffer_t *b[4];
+#endif
+  iavf_tx_desc_t *d = atq->tmp_descs;
+  u32 *tb = atq->tmp_bufs;
+
+  n_free_desc = n_desc_left = txq->size - atq->n_enqueued - 8;
+
+  if (n_desc_left == 0)
+    return 0;
+
+  while (n_packets_left && n_desc_left)
+    {
+#if defined CLIB_HAVE_VEC512
+      u32 flags;
+      u64x8 or_flags_vec512;
+      u64x8 flags_mask_vec512;
+#else
+      u32 flags, or_flags;
+#endif
+
+#if defined CLIB_HAVE_VEC512
+      if (n_packets_left < 8 || n_desc_left < 8)
+#else
+      if (n_packets_left < 8 || n_desc_left < 4)
+#endif
+       goto one_by_one;
+
+#if defined CLIB_HAVE_VEC512
+      u64x8 base_ptr = u64x8_splat (vm->buffer_main->buffer_mem_start);
+      u32x8 buf_indices = u32x8_load_unaligned (buffers);
+
+      *(u64x8 *) &b = base_ptr + u64x8_from_u32x8 (
+                                  buf_indices << CLIB_LOG2_CACHE_LINE_BYTES);
+
+      or_flags_vec512 = u64x8_i64gather (u64x8_load_unaligned (b), 0, 1);
+#else
+      vlib_prefetch_buffer_with_index (vm, buffers[4], LOAD);
+      vlib_prefetch_buffer_with_index (vm, buffers[5], LOAD);
+      vlib_prefetch_buffer_with_index (vm, buffers[6], LOAD);
+      vlib_prefetch_buffer_with_index (vm, buffers[7], LOAD);
+
+      b[0] = vlib_get_buffer (vm, buffers[0]);
+      b[1] = vlib_get_buffer (vm, buffers[1]);
+      b[2] = vlib_get_buffer (vm, buffers[2]);
+      b[3] = vlib_get_buffer (vm, buffers[3]);
+
+      or_flags = b[0]->flags | b[1]->flags | b[2]->flags | b[3]->flags;
+#endif
+
+#if defined CLIB_HAVE_VEC512
+      flags_mask_vec512 = u64x8_splat (
+       VLIB_BUFFER_NEXT_PRESENT | VNET_BUFFER_F_OFFLOAD | VNET_BUFFER_F_GSO);
+      if (PREDICT_FALSE (
+           !u64x8_is_all_zero (or_flags_vec512 & flags_mask_vec512)))
+#else
+      if (PREDICT_FALSE (or_flags &
+                        (VLIB_BUFFER_NEXT_PRESENT | VNET_BUFFER_F_OFFLOAD |
+                         VNET_BUFFER_F_GSO)))
+#endif
+       goto one_by_one;
+
+#if defined CLIB_HAVE_VEC512
+      vlib_buffer_copy_indices (tb, buffers, 8);
+      iavf_tx_fill_data_desc (vm, d + 0, b[0], cmd_eop, use_va_dma);
+      iavf_tx_fill_data_desc (vm, d + 1, b[1], cmd_eop, use_va_dma);
+      iavf_tx_fill_data_desc (vm, d + 2, b[2], cmd_eop, use_va_dma);
+      iavf_tx_fill_data_desc (vm, d + 3, b[3], cmd_eop, use_va_dma);
+      iavf_tx_fill_data_desc (vm, d + 4, b[4], cmd_eop, use_va_dma);
+      iavf_tx_fill_data_desc (vm, d + 5, b[5], cmd_eop, use_va_dma);
+      iavf_tx_fill_data_desc (vm, d + 6, b[6], cmd_eop, use_va_dma);
+      iavf_tx_fill_data_desc (vm, d + 7, b[7], cmd_eop, use_va_dma);
+
+      buffers += 8;
+      n_packets_left -= 8;
+      n_desc_left -= 8;
+      d += 8;
+      tb += 8;
+#else
+      vlib_buffer_copy_indices (tb, buffers, 4);
+
+      iavf_tx_fill_data_desc (vm, d + 0, b[0], cmd_eop, use_va_dma);
+      iavf_tx_fill_data_desc (vm, d + 1, b[1], cmd_eop, use_va_dma);
+      iavf_tx_fill_data_desc (vm, d + 2, b[2], cmd_eop, use_va_dma);
+      iavf_tx_fill_data_desc (vm, d + 3, b[3], cmd_eop, use_va_dma);
+
+      buffers += 4;
+      n_packets_left -= 4;
+      n_desc_left -= 4;
+      d += 4;
+      tb += 4;
+#endif
+
+      continue;
+
+    one_by_one:
+      tb[0] = buffers[0];
+      b[0] = vlib_get_buffer (vm, buffers[0]);
+      flags = b[0]->flags;
+
+      /* No chained buffers or TSO case */
+      if (PREDICT_TRUE (
+           (flags & (VLIB_BUFFER_NEXT_PRESENT | VNET_BUFFER_F_GSO)) == 0))
+       {
+         u64 cmd = cmd_eop;
+
+         if (PREDICT_FALSE (flags & VNET_BUFFER_F_OFFLOAD))
+           cmd |= iavf_tx_prepare_cksum (b[0], 0 /* is_tso */);
+
+         iavf_tx_fill_data_desc (vm, d, b[0], cmd, use_va_dma);
+       }
+      else
+       {
+         u16 n_desc_needed = 1;
+         u64 cmd = 0;
+
+         if (flags & VLIB_BUFFER_NEXT_PRESENT)
+           {
+             vlib_buffer_t *next = vlib_get_buffer (vm, b[0]->next_buffer);
+             n_desc_needed = 2;
+             while (next->flags & VLIB_BUFFER_NEXT_PRESENT)
+               {
+                 next = vlib_get_buffer (vm, next->next_buffer);
+                 n_desc_needed++;
+               }
+           }
+
+         if (flags & VNET_BUFFER_F_GSO)
+           {
+             n_desc_needed++;
+           }
+         else if (PREDICT_FALSE (n_desc_needed > 8))
+           {
+             vlib_buffer_free_one (vm, buffers[0]);
+             vlib_error_count (vm, node->node_index,
+                               IAVF_TX_NODE_CTR_SEG_SZ_EXCEEDED, 1);
+             n_packets_left -= 1;
+             buffers += 1;
+             continue;
+           }
+
+         if (PREDICT_FALSE (n_desc_left < n_desc_needed))
+           break;
+
+         if (flags & VNET_BUFFER_F_GSO)
+           {
+             /* Enqueue a context descriptor */
+             tb[1] = tb[0];
+             tb[0] = iavf_tx_fill_ctx_desc (vm, txq, d, b[0]);
+             n_desc_left -= 1;
+             d += 1;
+             tb += 1;
+             cmd = iavf_tx_prepare_cksum (b[0], 1 /* is_tso */);
+           }
+         else if (flags & VNET_BUFFER_F_OFFLOAD)
+           {
+             cmd = iavf_tx_prepare_cksum (b[0], 0 /* is_tso */);
+           }
+
+         /* Deal with chain buffer if present */
+         while (b[0]->flags & VLIB_BUFFER_NEXT_PRESENT)
+           {
+             iavf_tx_fill_data_desc (vm, d, b[0], cmd, use_va_dma);
+
+             n_desc_left -= 1;
+             d += 1;
+             tb += 1;
+
+             tb[0] = b[0]->next_buffer;
+             b[0] = vlib_get_buffer (vm, b[0]->next_buffer);
+           }
+
+         iavf_tx_fill_data_desc (vm, d, b[0], cmd_eop | cmd, use_va_dma);
+       }
+
+      buffers += 1;
+      n_packets_left -= 1;
+      n_desc_left -= 1;
+      d += 1;
+      tb += 1;
+    }
+
+  *n_enq_descs = n_free_desc - n_desc_left;
+  return n_packets - n_packets_left;
+}
+
+VNET_DEV_NODE_FN (iavf_tx_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+  vnet_dev_tx_node_runtime_t *rt = vnet_dev_get_tx_node_runtime (node);
+  vnet_dev_tx_queue_t *txq = rt->tx_queue;
+  vnet_dev_port_t *port = txq->port;
+  vnet_dev_t *dev = port->dev;
+  iavf_txq_t *atq = vnet_dev_get_tx_queue_data (txq);
+  u16 next;
+  u16 mask = txq->size - 1;
+  u32 *buffers = vlib_frame_vector_args (frame);
+  u16 n_enq, n_left, n_desc, *slot;
+  u16 n_retry = 2;
+
+  n_left = frame->n_vectors;
+
+  vnet_dev_tx_queue_lock_if_needed (txq);
+
+retry:
+  next = atq->next;
+  /* release consumed bufs */
+  if (atq->n_enqueued)
+    {
+      i32 complete_slot = -1;
+      while (1)
+       {
+         u16 *slot = clib_ring_get_first (atq->rs_slots);
+
+         if (slot == 0)
+           break;
+
+         if (iavf_tx_desc_get_dtyp (atq->descs + slot[0]) != 0x0F)
+           break;
+
+         complete_slot = slot[0];
+
+         clib_ring_deq (atq->rs_slots);
+       }
+
+      if (complete_slot >= 0)
+       {
+         u16 first, mask, n_free;
+         mask = txq->size - 1;
+         first = (atq->next - atq->n_enqueued) & mask;
+         n_free = (complete_slot + 1 - first) & mask;
+
+         atq->n_enqueued -= n_free;
+         vlib_buffer_free_from_ring_no_next (vm, atq->buffer_indices, first,
+                                             txq->size, n_free);
+       }
+    }
+
+  n_desc = 0;
+  if (dev->va_dma)
+    n_enq = iavf_tx_prepare (vm, node, txq, buffers, n_left, &n_desc, 1);
+  else
+    n_enq = iavf_tx_prepare (vm, node, txq, buffers, n_left, &n_desc, 0);
+
+  if (n_desc)
+    {
+      if (PREDICT_TRUE (next + n_desc <= txq->size))
+       {
+         /* no wrap */
+         iavf_tx_copy_desc (atq->descs + next, atq->tmp_descs, n_desc);
+         vlib_buffer_copy_indices (atq->buffer_indices + next, atq->tmp_bufs,
+                                   n_desc);
+       }
+      else
+       {
+         /* wrap */
+         u32 n_not_wrap = txq->size - next;
+         iavf_tx_copy_desc (atq->descs + next, atq->tmp_descs, n_not_wrap);
+         iavf_tx_copy_desc (atq->descs, atq->tmp_descs + n_not_wrap,
+                            n_desc - n_not_wrap);
+         vlib_buffer_copy_indices (atq->buffer_indices + next, atq->tmp_bufs,
+                                   n_not_wrap);
+         vlib_buffer_copy_indices (atq->buffer_indices,
+                                   atq->tmp_bufs + n_not_wrap,
+                                   n_desc - n_not_wrap);
+       }
+
+      next += n_desc;
+      if ((slot = clib_ring_enq (atq->rs_slots)))
+       {
+         u16 rs_slot = slot[0] = (next - 1) & mask;
+         atq->descs[rs_slot].qword[1] |= IAVF_TXD_CMD_RS;
+       }
+
+      atq->next = next & mask;
+      __atomic_store_n (atq->qtx_tail, atq->next, __ATOMIC_RELEASE);
+      atq->n_enqueued += n_desc;
+      n_left -= n_enq;
+    }
+
+  if (n_left)
+    {
+      buffers += n_enq;
+
+      if (n_retry--)
+       goto retry;
+
+      vlib_buffer_free (vm, buffers, n_left);
+      vlib_error_count (vm, node->node_index, IAVF_TX_NODE_CTR_NO_FREE_SLOTS,
+                       n_left);
+    }
+
+  vnet_dev_tx_queue_unlock_if_needed (txq);
+
+  return frame->n_vectors - n_left;
+}
diff --git a/src/plugins/dev_iavf/virtchnl.c b/src/plugins/dev_iavf/virtchnl.c
new file mode 100644 (file)
index 0000000..eca4810
--- /dev/null
@@ -0,0 +1,372 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/pci.h>
+#include <vnet/dev/counters.h>
+#include <dev_iavf/iavf.h>
+#include <dev_iavf/virtchnl.h>
+#include <dev_iavf/virtchnl_funcs.h>
+#include <vnet/ethernet/ethernet.h>
+
+VLIB_REGISTER_LOG_CLASS (iavf_log, static) = {
+  .class_name = "iavf",
+  .subclass_name = "virtchnl",
+};
+
+u8 *
+format_virtchnl_op_name (u8 *s, va_list *args)
+{
+  virtchnl_op_t op = va_arg (*args, virtchnl_op_t);
+  char *op_names[] = {
+#define _(a, b) [a] = #b,
+    foreach_virtchnl_op
+#undef _
+  };
+
+  if (op >= ARRAY_LEN (op_names) || op_names[op] == 0)
+    return format (s, "UNKNOWN(%u)", op);
+
+  return format (s, "%s", op_names[op]);
+}
+
+u8 *
+format_virtchnl_status (u8 *s, va_list *args)
+{
+  virtchnl_status_t c = va_arg (*args, virtchnl_status_t);
+
+  if (0)
+    ;
+#define _(a, b) else if (c == a) return format (s, #b);
+  foreach_virtchnl_status
+#undef _
+    return format (s, "UNKNOWN(%d)", c);
+}
+
+static u8 *
+format_virtchnl_vlan_support_caps (u8 *s, va_list *args)
+{
+  virtchnl_vlan_support_caps_t v = va_arg (*args, u32);
+  int not_first = 0;
+
+  char *strs[32] = {
+#define _(a, b, c) [a] = c,
+    foreach_virtchnl_vlan_support_bit
+#undef _
+  };
+
+  if (v == VIRTCHNL_VLAN_UNSUPPORTED)
+    return format (s, "unsupported");
+
+  for (int i = 0; i < 32; i++)
+    {
+      if ((v & (1 << i)) == 0)
+       continue;
+      if (not_first)
+       s = format (s, " ");
+      if (strs[i])
+       s = format (s, "%s", strs[i]);
+      else
+       s = format (s, "unknown(%u)", i);
+      not_first = 1;
+    }
+  return s;
+}
+
+static u8 *
+format_virtchnl_op_req (u8 *s, va_list *args)
+{
+  virtchnl_op_t op = va_arg (*args, virtchnl_op_t);
+  void *p = va_arg (*args, void *);
+  u32 indent = format_get_indent (s);
+
+  if (p == 0)
+    return format (s, "no data");
+
+  switch (op)
+    {
+    case VIRTCHNL_OP_VERSION:
+      {
+       virtchnl_version_info_t *r = p;
+       s = format (s, "version: %u.%u", r->major, r->minor);
+      }
+      break;
+    case VIRTCHNL_OP_GET_VF_RESOURCES:
+      {
+       u32 *r = p;
+       s = format (s, "%U", format_iavf_vf_cap_flags, *r);
+      }
+      break;
+    case VIRTCHNL_OP_ENABLE_QUEUES:
+    case VIRTCHNL_OP_DISABLE_QUEUES:
+    case VIRTCHNL_OP_GET_STATS:
+      {
+       virtchnl_queue_select_t *r = p;
+       s = format (s, "vsi %u rx 0x%x tx 0x%x", r->vsi_id, r->rx_queues,
+                   r->tx_queues);
+      }
+      break;
+    case VIRTCHNL_OP_CONFIG_VSI_QUEUES:
+      {
+       virtchnl_vsi_queue_config_info_t *r = p;
+       s = format (s, "vsi %u num_qp %u", r->vsi_id, r->num_queue_pairs);
+       for (int i = 0; i < r->num_queue_pairs; i++)
+         {
+           virtchnl_rxq_info_t *ri = &r->qpair[i].rxq;
+           virtchnl_txq_info_t *ti = &r->qpair[i].txq;
+
+           s = format (s, "\n%U qpair %u", format_white_space, indent + 2, i);
+           s = format (s,
+                       "\n%U rx vsi %u queue %u dma_ring_addr 0x%lx "
+                       "ring_len %u data_sz %u max_pkt_sz %u",
+                       format_white_space, indent + 4, ri->vsi_id,
+                       ri->queue_id, ri->dma_ring_addr, ri->ring_len,
+                       ri->databuffer_size, ri->max_pkt_size);
+           s = format (
+             s, "\n%U tx vsi %u queue %u dma_ring_addr 0x%lx ring_len %u",
+             format_white_space, indent + 4, ti->vsi_id, ti->queue_id,
+             ti->dma_ring_addr, ti->ring_len);
+         }
+      }
+      break;
+    case VIRTCHNL_OP_CONFIG_IRQ_MAP:
+      {
+       virtchnl_irq_map_info_t *r = p;
+       s = format (s, "num_vectors %u", r->num_vectors);
+       for (int i = 0; i < r->num_vectors; i++)
+         {
+           virtchnl_vector_map_t *vecmap = r->vecmap + i;
+           s = format (s,
+                       "\n%Uvsi %u vector_id %u rxq_map 0x%04x txq_map "
+                       "0x%04x rxitr_idx %u txitr_idx %u",
+                       format_white_space, indent + 2, vecmap->vsi_id,
+                       vecmap->vector_id, vecmap->rxq_map, vecmap->txq_map,
+                       vecmap->rxitr_idx, vecmap->txitr_idx);
+         }
+      }
+      break;
+    case VIRTCHNL_OP_CONFIG_RSS_LUT:
+      {
+       virtchnl_rss_lut_t *r = p;
+       s = format (s, "vsi %u entries %u lut", r->vsi_id, r->lut_entries);
+       for (int i = 0; i < r->lut_entries; i++)
+         s = format (s, " %u", r->lut[i]);
+      }
+      break;
+    case VIRTCHNL_OP_CONFIG_RSS_KEY:
+      {
+       virtchnl_rss_key_t *r = p;
+       s = format (s, "vsi %u len %u key ", r->vsi_id, r->key_len);
+       for (int i = 0; i < r->key_len; i++)
+         s = format (s, "%02x", r->key[i]);
+      }
+      break;
+    case VIRTCHNL_OP_ADD_ETH_ADDR:
+    case VIRTCHNL_OP_DEL_ETH_ADDR:
+      {
+       virtchnl_ether_addr_list_t *r = p;
+       s = format (s, "vsi %u num_elements %u elts: ", r->vsi_id,
+                   r->num_elements);
+       for (int i = 0; i < r->num_elements; i++)
+         s = format (s, "%s%U%s%s", i ? ", " : "", format_ethernet_address,
+                     r->list[i].addr, r->list[i].primary ? " primary" : "",
+                     r->list[i].extra ? " extra" : "");
+      }
+      break;
+    case VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE:
+      {
+       virtchnl_promisc_info_t *r = p;
+       s = format (
+         s, "promisc_info: vsi %u flags 0x%x (unicast %s multicast %s)",
+         r->vsi_id, r->flags,
+         r->flags & FLAG_VF_UNICAST_PROMISC ? "on" : "off",
+         r->flags & FLAG_VF_MULTICAST_PROMISC ? "on" : "off");
+      }
+      break;
+    case VIRTCHNL_OP_DISABLE_VLAN_STRIPPING_V2:
+      {
+       virtchnl_vlan_setting_t *r = p;
+       s = format (s,
+                   "vport %u outer_ethertype_setting 0x%x [%U] "
+                   "inner_ethertype_setting 0x%x [%U]",
+                   r->vport_id, r->outer_ethertype_setting,
+                   format_virtchnl_vlan_support_caps,
+                   r->outer_ethertype_setting, r->inner_ethertype_setting,
+                   format_virtchnl_vlan_support_caps,
+                   r->inner_ethertype_setting);
+      }
+      break;
+    default:
+      s = format (s, "unknown op 0x%04x", op);
+      break;
+    };
+  return s;
+}
+static u8 *
+format_virtchnl_op_resp (u8 *s, va_list *args)
+{
+  virtchnl_op_t op = va_arg (*args, virtchnl_op_t);
+  void *p = va_arg (*args, void *);
+  u32 indent = format_get_indent (s);
+
+  if (p == 0)
+    return format (s, "no data");
+
+  switch (op)
+    {
+    case VIRTCHNL_OP_VERSION:
+      {
+       virtchnl_version_info_t *r = p;
+       s = format (s, "version %u.%u", r->major, r->minor);
+      }
+      break;
+    case VIRTCHNL_OP_GET_VF_RESOURCES:
+      {
+       virtchnl_vf_resource_t *r = p;
+       s =
+         format (s,
+                 "vf_resource: num_vsis %u num_queue_pairs %u "
+                 "max_vectors %u max_mtu %u rss_key_size %u rss_lut_size %u",
+                 r->num_vsis, r->num_queue_pairs, r->max_vectors, r->max_mtu,
+                 r->rss_key_size, r->rss_lut_size);
+       s = format (s, "\n%Uvf_cap_flags 0x%x (%U)", format_white_space,
+                   indent + 2, r->vf_cap_flags, format_iavf_vf_cap_flags,
+                   r->vf_cap_flags);
+       for (int i = 0; i < r->num_vsis; i++)
+         s = format (s,
+                     "\n%Uvsi_resource[%u]: vsi %u num_qp %u vsi_type %u "
+                     "qset_handle %u default_mac_addr %U",
+                     format_white_space, indent + 2, i, r->vsi_res[i].vsi_id,
+                     r->vsi_res[i].num_queue_pairs, r->vsi_res[i].vsi_type,
+                     r->vsi_res[i].qset_handle, format_ethernet_address,
+                     r->vsi_res[i].default_mac_addr);
+      }
+      break;
+    case VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS:
+      {
+       virtchnl_vlan_caps_t *r = p;
+       s = format (s, "filtering: ethertype_init 0x%x max_filters %u",
+                   r->filtering.ethertype_init, r->filtering.max_filters);
+       s = format (s, "\n%U outer [%U] inner [%U]", format_white_space,
+                   indent, format_virtchnl_vlan_support_caps,
+                   r->filtering.filtering_support.outer,
+                   format_virtchnl_vlan_support_caps,
+                   r->filtering.filtering_support.inner);
+       s = format (s, "\n%Uoffloads: ethertype_init 0x%x ethertype_match %u",
+                   format_white_space, indent, r->offloads.ethertype_init,
+                   r->offloads.ethertype_match);
+       s = format (s, "\n%U stripping outer [%U] stripping inner [%U]",
+                   format_white_space, indent,
+                   format_virtchnl_vlan_support_caps,
+                   r->offloads.stripping_support.outer,
+                   format_virtchnl_vlan_support_caps,
+                   r->offloads.stripping_support.inner);
+       s = format (s, "\n%U insertion outer [%U] inserion inner [%U]",
+                   format_white_space, indent,
+                   format_virtchnl_vlan_support_caps,
+                   r->offloads.insertion_support.outer,
+                   format_virtchnl_vlan_support_caps,
+                   r->offloads.insertion_support.inner);
+      }
+      break;
+    case VIRTCHNL_OP_GET_STATS:
+      {
+       virtchnl_eth_stats_t *r = p;
+       s = format (s,
+                   "rx: bytes %lu, unicast %lu, multicast %lu, broadcast "
+                   "%lu, discards %lu unknown_protocol %lu",
+                   r->rx_bytes, r->rx_unicast, r->rx_multicast,
+                   r->rx_broadcast, r->rx_discards, r->rx_unknown_protocol);
+       s = format (s, "\n%U", format_white_space, indent);
+       s = format (s,
+                   "tx: bytes %lu, unicast %lu, multicast %lu, broadcast "
+                   "%lu, discards %lu errors %lu",
+                   r->tx_bytes, r->tx_unicast, r->tx_multicast,
+                   r->tx_broadcast, r->tx_discards, r->tx_errors);
+      }
+      break;
+    default:
+      s = format (s, "unknown op 0x%04x", op);
+      break;
+    };
+  return s;
+}
+
+vnet_dev_rv_t
+iavf_virtchnl_req (vlib_main_t *vm, vnet_dev_t *dev, iavf_virtchnl_req_t *r)
+{
+  iavf_device_t *ad = vnet_dev_get_data (dev);
+  vnet_dev_rv_t rv;
+  iavf_aq_desc_t *d;
+  u8 *b;
+
+  log_debug (dev, "%U req:\n  %U", format_virtchnl_op_name, r->op,
+            format_virtchnl_op_req, r->op, r->req);
+
+  iavf_aq_desc_t txd = {
+    .opcode = IIAVF_AQ_DESC_OP_SEND_TO_PF,
+    .v_opcode = r->op,
+    .flags = { .si = 1 },
+  };
+
+  rv = iavf_aq_atq_enq (vm, dev, &txd, r->req, r->req_sz, 0.5);
+
+  if (rv != VNET_DEV_OK)
+    return rv;
+
+  if (r->no_reply)
+    return VNET_DEV_OK;
+
+retry:
+  if (!iavf_aq_arq_next_acq (vm, dev, &d, &b, 1.0))
+    {
+      log_err (ad, "timeout waiting for virtchnl response");
+      return VNET_DEV_ERR_TIMEOUT;
+    }
+
+  if (d->v_opcode == VIRTCHNL_OP_EVENT)
+    {
+      if ((d->datalen != sizeof (virtchnl_pf_event_t)) ||
+         ((d->flags.buf) == 0))
+       {
+         log_err (dev, "event message error");
+         return VNET_DEV_ERR_BUG;
+       }
+
+      vec_add1 (ad->events, *(virtchnl_pf_event_t *) b);
+      iavf_aq_arq_next_rel (vm, dev);
+      goto retry;
+    }
+
+  if (d->v_opcode != r->op)
+    {
+      log_err (dev,
+              "unexpected response received [v_opcode = %u, expected %u, "
+              "v_retval %d]",
+              d->v_opcode, r->op, d->v_retval);
+      rv = VNET_DEV_ERR_BUG;
+      goto done;
+    }
+
+  r->status = d->v_retval;
+
+  if (d->v_retval)
+    {
+      log_err (dev, "error [v_opcode = %u, v_retval %d]", d->v_opcode,
+              d->v_retval);
+      rv = VNET_DEV_ERR_BUG;
+      goto done;
+    }
+
+  if (r->resp_sz && d->flags.buf)
+    clib_memcpy_fast (r->resp, b, r->resp_sz);
+
+done:
+  iavf_aq_arq_next_rel (vm, dev);
+  if (rv == VNET_DEV_OK)
+    log_debug (dev, "%U resp:\n  %U", format_virtchnl_op_name, r->op,
+              format_virtchnl_op_resp, r->op, r->resp);
+  return rv;
+}
diff --git a/src/plugins/dev_iavf/virtchnl.h b/src/plugins/dev_iavf/virtchnl.h
new file mode 100644 (file)
index 0000000..d141012
--- /dev/null
@@ -0,0 +1,570 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _IIAVF_VIRTCHNL_H_
+#define _IIAVF_VIRTCHNL_H_
+
+#define VIRTCHNL_VERSION_MAJOR 1
+#define VIRTCHNL_VERSION_MINOR 1
+
+#define foreach_iavf_promisc_flags                                            \
+  _ (0, UNICAST_PROMISC, "unicast")                                           \
+  _ (1, MULTICAST_PROMISC, "multicast")
+
+enum
+{
+#define _(a, b, c) FLAG_VF_##b = (1 << a),
+  foreach_iavf_promisc_flags
+#undef _
+};
+
+#define AVFINT_DYN_CTLN(x) (0x00003800 + (0x4 * x))
+#define AVFINT_ICR0       0x00004800
+#define AVFINT_ICR0_ENA1   0x00005000
+#define AVFINT_DYN_CTL0           0x00005C00
+#define IAVF_ARQBAH       0x00006000
+#define IAVF_ATQH         0x00006400
+#define IAVF_ATQLEN       0x00006800
+#define IAVF_ARQBAL       0x00006C00
+#define IAVF_ARQT         0x00007000
+#define IAVF_ARQH         0x00007400
+#define IAVF_ATQBAH       0x00007800
+#define IAVF_ATQBAL       0x00007C00
+#define IAVF_ARQLEN       0x00008000
+#define IAVF_ATQT         0x00008400
+#define VFGEN_RSTAT       0x00008800
+#define IAVF_QTX_TAIL(q)   (0x00000000 + (0x4 * q))
+#define IAVF_QRX_TAIL(q)   (0x00002000 + (0x4 * q))
+
+#define foreach_virtchnl_op                                                   \
+  _ (0, UNKNOWN)                                                              \
+  _ (1, VERSION)                                                              \
+  _ (2, RESET_VF)                                                             \
+  _ (3, GET_VF_RESOURCES)                                                     \
+  _ (4, CONFIG_TX_QUEUE)                                                      \
+  _ (5, CONFIG_RX_QUEUE)                                                      \
+  _ (6, CONFIG_VSI_QUEUES)                                                    \
+  _ (7, CONFIG_IRQ_MAP)                                                       \
+  _ (8, ENABLE_QUEUES)                                                        \
+  _ (9, DISABLE_QUEUES)                                                       \
+  _ (10, ADD_ETH_ADDR)                                                        \
+  _ (11, DEL_ETH_ADDR)                                                        \
+  _ (12, ADD_VLAN)                                                            \
+  _ (13, DEL_VLAN)                                                            \
+  _ (14, CONFIG_PROMISCUOUS_MODE)                                             \
+  _ (15, GET_STATS)                                                           \
+  _ (16, RSVD)                                                                \
+  _ (17, EVENT)                                                               \
+  _ (18, UNDEF_18)                                                            \
+  _ (19, UNDEF_19)                                                            \
+  _ (20, IWARP)                                                               \
+  _ (21, CONFIG_IWARP_IRQ_MAP)                                                \
+  _ (22, RELEASE_IWARP_IRQ_MAP)                                               \
+  _ (23, CONFIG_RSS_KEY)                                                      \
+  _ (24, CONFIG_RSS_LUT)                                                      \
+  _ (25, GET_RSS_HENA_CAPS)                                                   \
+  _ (26, SET_RSS_HENA)                                                        \
+  _ (27, ENABLE_VLAN_STRIPPING)                                               \
+  _ (28, DISABLE_VLAN_STRIPPING)                                              \
+  _ (29, REQUEST_QUEUES)                                                      \
+  _ (30, ENABLE_CHANNELS)                                                     \
+  _ (31, DISABLE_CHANNELS)                                                    \
+  _ (32, ADD_CLOUD_FILTER)                                                    \
+  _ (33, DEL_CLOUD_FILTER)                                                    \
+  _ (45, ADD_RSS_CFG)                                                         \
+  _ (46, DEL_RSS_CFG)                                                         \
+  _ (47, ADD_FDIR_FILTER)                                                     \
+  _ (48, DEL_FDIR_FILTER)                                                     \
+  _ (49, QUERY_FDIR_FILTER)                                                   \
+  _ (50, GET_MAX_RSS_QREGION)                                                 \
+  _ (51, GET_OFFLOAD_VLAN_V2_CAPS)                                            \
+  _ (52, ADD_VLAN_V2)                                                         \
+  _ (53, DEL_VLAN_V2)                                                         \
+  _ (54, ENABLE_VLAN_STRIPPING_V2)                                            \
+  _ (55, DISABLE_VLAN_STRIPPING_V2)                                           \
+  _ (56, ENABLE_VLAN_INSERTION_V2)                                            \
+  _ (57, DISABLE_VLAN_INSERTION_V2)                                           \
+  _ (58, ENABLE_VLAN_FILTERING_V2)                                            \
+  _ (59, DISABLE_VLAN_FILTERING_V2)                                           \
+  _ (107, ENABLE_QUEUES_V2)                                                   \
+  _ (108, DISABLE_QUEUES_V2)                                                  \
+  _ (111, MAP_QUEUE_VECTOR)
+
+typedef enum
+{
+#define _(v, n) VIRTCHNL_OP_##n = v,
+  foreach_virtchnl_op
+#undef _
+    VIRTCHNL_N_OPS,
+} virtchnl_op_t;
+
+#define foreach_virtchnl_status                                               \
+  _ (0, SUCCESS)                                                              \
+  _ (-5, ERR_PARAM)                                                           \
+  _ (-18, ERR_NO_MEMORY)                                                      \
+  _ (-38, ERR_OPCODE_MISMATCH)                                                \
+  _ (-39, ERR_CQP_COMPL_ERROR)                                                \
+  _ (-40, ERR_INVALID_VF_ID)                                                  \
+  _ (-53, ERR_ADMIN_QUEUE_ERROR)                                              \
+  _ (-64, NOT_SUPPORTED)
+
+typedef enum
+{
+#define _(a, b) VIRTCHNL_STATUS_##b = a,
+  foreach_virtchnl_status
+#undef _
+} virtchnl_status_t;
+
+#define foreach_iavf_vf_cap_flag                                              \
+  _ (0, OFFLOAD_L2, "l2")                                                     \
+  _ (1, OFFLOAD_IWARP, "iwarp")                                               \
+  _ (2, OFFLOAD_RSVD, "rsvd")                                                 \
+  _ (3, OFFLOAD_RSS_AQ, "rss-aq")                                             \
+  _ (4, OFFLOAD_RSS_REG, "rss-reg")                                           \
+  _ (5, OFFLOAD_WB_ON_ITR, "wb-on-itr")                                       \
+  _ (6, OFFLOAD_REQ_QUEUES, "req-queues")                                     \
+  _ (7, CAP_ADV_LINK_SPEED, "adv-link-speed")                                 \
+  _ (9, LARGE_NUM_QPAIRS, "large-num-qpairs")                                 \
+  _ (15, OFFLOAD_VLAN_V2, "vlan-v2")                                          \
+  _ (16, OFFLOAD_VLAN, "vlan")                                                \
+  _ (17, OFFLOAD_RX_POLLING, "rx-polling")                                    \
+  _ (18, OFFLOAD_RSS_PCTYPE_V2, "rss-pctype-v2")                              \
+  _ (19, OFFLOAD_RSS_PF, "rss-pf")                                            \
+  _ (20, OFFLOAD_ENCAP, "encap")                                              \
+  _ (21, OFFLOAD_ENCAP_CSUM, "encap-csum")                                    \
+  _ (22, OFFLOAD_RX_ENCAP_CSUM, "rx-encap-csum")                              \
+  _ (23, OFFLOAD_ADQ, "offload-adq")                                          \
+  _ (24, OFFLOAD_ADQ_v2, "offload-adq-v2")                                    \
+  _ (25, OFFLOAD_USO, "offload-uso")                                          \
+  _ (26, OFFLOAD_RX_FLEX_DESC, "offload-rx-flex-desc")                        \
+  _ (27, OFFLOAD_ADV_RSS_PF, "offload-adv-rss-pf")                            \
+  _ (28, OFFLOAD_FDIR_PF, "offload-fdir-pf")                                  \
+  _ (30, CAP_DCF, "dcf")
+
+typedef enum
+{
+#define _(a, b, c) VIRTCHNL_VF_##b = (1 << a),
+  foreach_iavf_vf_cap_flag
+#undef _
+} iavf_vf_cap_flag_t;
+
+typedef enum
+{
+  VIRTCHNL_VSI_TYPE_INVALID = 0,
+  VIRTCHNL_VSI_SRIOV = 6,
+} virtchnl_vsi_type_t;
+
+typedef enum
+{
+  VIRTCHNL_VFR_INPROGRESS = 0,
+  VIRTCHNL_VFR_COMPLETED,
+  VIRTCHNL_VFR_VFACTIVE,
+} virtchnl_vfr_states_t;
+
+typedef struct
+{
+  u16 vsi_id;
+  u16 num_queue_pairs;
+  virtchnl_vsi_type_t vsi_type;
+  u16 qset_handle;
+  u8 default_mac_addr[6];
+} virtchnl_vsi_resource_t;
+
+typedef struct
+{
+  u16 num_vsis;
+  u16 num_queue_pairs;
+  u16 max_vectors;
+  u16 max_mtu;
+  u32 vf_cap_flags;
+  u32 rss_key_size;
+  u32 rss_lut_size;
+  virtchnl_vsi_resource_t vsi_res[1];
+} virtchnl_vf_resource_t;
+
+#define foreach_virtchnl_event_code                                           \
+  _ (0, UNKNOWN)                                                              \
+  _ (1, LINK_CHANGE)                                                          \
+  _ (2, RESET_IMPENDING)                                                      \
+  _ (3, PF_DRIVER_CLOSE)
+
+typedef enum
+{
+#define _(a, b) VIRTCHNL_EVENT_##b = (a),
+  foreach_virtchnl_event_code
+#undef _
+} virtchnl_event_codes_t;
+
+#define foreach_virtchnl_link_speed                                           \
+  _ (0, 2_5GB, "2.5 Gbps")                                                    \
+  _ (1, 100MB, "100 Mbps")                                                    \
+  _ (2, 1GB, "1 Gbps")                                                        \
+  _ (3, 10GB, "10 Gbps")                                                      \
+  _ (4, 40GB, "40 Gbps")                                                      \
+  _ (5, 20GB, "20 Gbps")                                                      \
+  _ (6, 25GB, "25 Gbps")                                                      \
+  _ (7, 5GB, "5 Gbps")
+
+typedef enum
+{
+  VIRTCHNL_LINK_SPEED_UNKNOWN = 0,
+#define _(a, b, c) VIRTCHNL_LINK_SPEED_##b = (1 << a),
+  foreach_virtchnl_link_speed
+#undef _
+} virtchnl_link_speed_t;
+
+typedef struct
+{
+  virtchnl_event_codes_t event;
+  union
+  {
+    struct
+    {
+      virtchnl_link_speed_t link_speed;
+      u8 link_status;
+    } link_event;
+    struct
+    {
+      u32 link_speed;
+      u8 link_status;
+    } link_event_adv;
+  } event_data;
+  int severity;
+} virtchnl_pf_event_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl_pf_event_t, 16);
+
+typedef struct
+{
+  u32 major;
+  u32 minor;
+} virtchnl_version_info_t;
+
+#define foreach_iavf_aq_desc_flag                                             \
+  _ (1, dd)                                                                   \
+  _ (1, cmp)                                                                  \
+  _ (1, err)                                                                  \
+  _ (1, vfe)                                                                  \
+  _ (5, reserved)                                                             \
+  _ (1, lb)                                                                   \
+  _ (1, rd)                                                                   \
+  _ (1, vfc)                                                                  \
+  _ (1, buf)                                                                  \
+  _ (1, si)                                                                   \
+  _ (1, ie)                                                                   \
+  _ (1, fe)
+
+typedef union
+{
+  struct
+  {
+#define _(n, s) u16 s : n;
+    foreach_iavf_aq_desc_flag
+#undef _
+  };
+  u16 as_u16;
+} iavf_aq_desc_flags_t;
+
+STATIC_ASSERT_SIZEOF (iavf_aq_desc_flags_t, 2);
+
+typedef enum
+{
+  IIAVF_AQ_DESC_OP_QUEUE_SHUTDOWN = 0x0003,
+  IIAVF_AQ_DESC_OP_SEND_TO_PF = 0x0801,
+  IIAVF_AQ_DESC_OP_MESSAGE_FROM_PF = 0x0802,
+} __clib_packed iavf_aq_desc_op_t;
+
+#define foreach_iavf_aq_desc_retval                                           \
+  _ (0, OK)                                                                   \
+  _ (1, EPERM)                                                                \
+  _ (2, ENOENT)                                                               \
+  _ (3, ESRCH)                                                                \
+  _ (4, EINTR)                                                                \
+  _ (5, EIO)                                                                  \
+  _ (6, ENXIO)                                                                \
+  _ (7, E2BIG)                                                                \
+  _ (8, EAGAIN)                                                               \
+  _ (9, ENOMEM)                                                               \
+  _ (10, EACCES)                                                              \
+  _ (11, EFAULT)                                                              \
+  _ (12, EBUSY)                                                               \
+  _ (13, EEXIST)                                                              \
+  _ (14, EINVAL)                                                              \
+  _ (15, ENOTTY)                                                              \
+  _ (16, ENOSPC)                                                              \
+  _ (17, ENOSYS)                                                              \
+  _ (18, ERANGE)                                                              \
+  _ (19, EFLUSHED)                                                            \
+  _ (20, BAD_ADDR)                                                            \
+  _ (21, EMODE)                                                               \
+  _ (22, EFBIG)                                                               \
+  _ (23, ESBCOMP)                                                             \
+  _ (24, ENOSEC)                                                              \
+  _ (25, EBADSIG)                                                             \
+  _ (26, ESVN)                                                                \
+  _ (27, EBADMAN)                                                             \
+  _ (28, EBADBUF)                                                             \
+  _ (29, EACCES_BMCU)
+
+typedef enum
+{
+#define _(a, b) IIAVF_AQ_DESC_RETVAL_##b = a,
+  foreach_iavf_aq_desc_retval
+#undef _
+} __clib_packed iavf_aq_desc_retval_t;
+
+typedef struct
+{
+  iavf_aq_desc_flags_t flags;
+  iavf_aq_desc_op_t opcode;
+  u16 datalen;
+  u16 retval;
+  union
+  {
+    u32 cookie_hi;
+    virtchnl_op_t v_opcode;
+  };
+  union
+  {
+    u32 cookie_lo;
+    virtchnl_status_t v_retval;
+  };
+  union
+  {
+    u8 driver_unloading : 1;
+    u32 param0;
+  };
+  u32 param1;
+  union
+  {
+    u32 param2;
+    u32 addr_hi;
+  };
+  union
+  {
+    u32 param3;
+    u32 addr_lo;
+  };
+} iavf_aq_desc_t;
+
+STATIC_ASSERT_SIZEOF (iavf_aq_desc_t, 32);
+
+typedef struct
+{
+  u16 vsi_id;
+  u16 queue_id;
+  u16 ring_len;
+  u64 dma_ring_addr;
+  u64 dma_headwb_addr;
+} virtchnl_txq_info_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl_txq_info_t, 24);
+
+typedef struct
+{
+  u16 vsi_id;
+  u16 queue_id;
+  u32 ring_len;
+  u16 hdr_size;
+  u16 splithdr_enabled;
+  u32 databuffer_size;
+  u32 max_pkt_size;
+  u8 crc_disable;
+  u8 rxdid;
+  u8 pad[2];
+  u64 dma_ring_addr;
+  i32 rx_split_pos;
+  u32 pad2;
+} virtchnl_rxq_info_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl_rxq_info_t, 40);
+
+typedef struct
+{
+  virtchnl_txq_info_t txq;
+  virtchnl_rxq_info_t rxq;
+} virtchnl_queue_pair_info_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl_queue_pair_info_t, 64);
+
+typedef struct
+{
+  u16 vsi_id;
+  u16 num_queue_pairs;
+  u32 pad;
+  virtchnl_queue_pair_info_t qpair[1];
+} virtchnl_vsi_queue_config_info_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl_vsi_queue_config_info_t, 72);
+
+typedef struct
+{
+  u16 vsi_id;
+  u16 pad;
+  u32 rx_queues;
+  u32 tx_queues;
+} virtchnl_queue_select_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl_queue_select_t, 12);
+
+typedef struct
+{
+  u16 vsi_id;
+  u16 vector_id;
+  u16 rxq_map;
+  u16 txq_map;
+  u16 rxitr_idx;
+  u16 txitr_idx;
+} virtchnl_vector_map_t;
+
+typedef struct
+{
+  u16 num_vectors;
+  virtchnl_vector_map_t vecmap[1];
+} virtchnl_irq_map_info_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl_irq_map_info_t, 14);
+
+typedef struct
+{
+  u8 addr[6];
+  union
+  {
+    struct
+    {
+      u8 primary : 1;
+      u8 extra : 1;
+    };
+    u8 type;
+  };
+  u8 pad[1];
+} virtchnl_ether_addr_t;
+
+typedef struct
+{
+  u16 vsi_id;
+  u16 num_elements;
+  virtchnl_ether_addr_t list[1];
+} virtchnl_ether_addr_list_t;
+
+#define foreach_virtchnl_eth_stats                                            \
+  _ (rx_bytes)                                                                \
+  _ (rx_unicast)                                                              \
+  _ (rx_multicast)                                                            \
+  _ (rx_broadcast)                                                            \
+  _ (rx_discards)                                                             \
+  _ (rx_unknown_protocol)                                                     \
+  _ (tx_bytes)                                                                \
+  _ (tx_unicast)                                                              \
+  _ (tx_multicast)                                                            \
+  _ (tx_broadcast)                                                            \
+  _ (tx_discards)                                                             \
+  _ (tx_errors)
+
+typedef struct
+{
+#define _(s) u64 s;
+  foreach_virtchnl_eth_stats
+#undef _
+} virtchnl_eth_stats_t;
+
+typedef struct
+{
+  u16 vsi_id;
+  u16 key_len;
+  u8 key[1];
+} virtchnl_rss_key_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl_rss_key_t, 6);
+
+typedef struct
+{
+  u16 vsi_id;
+  u16 lut_entries;
+  u8 lut[1];
+} virtchnl_rss_lut_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl_rss_lut_t, 6);
+
+/* VIRTCHNL_OP_REQUEST_QUEUES */
+typedef struct
+{
+  u16 num_queue_pairs;
+} virtchnl_vf_res_request_t;
+
+typedef struct
+{
+  u32 outer;
+  u32 inner;
+} virtchnl_vlan_supported_caps_t;
+
+typedef struct
+{
+  virtchnl_vlan_supported_caps_t filtering_support;
+  u32 ethertype_init;
+  u16 max_filters;
+  u8 pad[2];
+} virtchnl_vlan_filtering_caps_t;
+
+typedef struct virtchnl_vlan_offload_caps
+{
+  virtchnl_vlan_supported_caps_t stripping_support;
+  virtchnl_vlan_supported_caps_t insertion_support;
+  u32 ethertype_init;
+  u8 ethertype_match;
+  u8 pad[3];
+} virtchnl_vlan_offload_caps_t;
+
+typedef struct
+{
+  virtchnl_vlan_filtering_caps_t filtering;
+  virtchnl_vlan_offload_caps_t offloads;
+} virtchnl_vlan_caps_t;
+
+#define foreach_virtchnl_vlan_support_bit                                     \
+  _ (0, ETHERTYPE_8100, "dot1Q")                                              \
+  _ (1, ETHERTYPE_88A8, "dot1AD")                                             \
+  _ (2, ETHERTYPE_9100, "QinQ")                                               \
+  _ (8, TAG_LOCATION_L2TAG1, "l2tag1")                                        \
+  _ (9, TAG_LOCATION_L2TAG2, "l2tag2")                                        \
+  _ (10, TAG_LOCATION_L2TAG2_2, "l2tag2_2")                                   \
+  _ (24, PRIO, "prio")                                                        \
+  _ (28, FILTER_MASK, "filter-mask")                                          \
+  _ (29, ETHERTYPE_AND, "etype-and")                                          \
+  _ (30, ETHERTYPE_XOR, "etype-xor")                                          \
+  _ (31, TOGGLE, "toggle")
+
+typedef enum
+{
+  VIRTCHNL_VLAN_UNSUPPORTED = 0,
+#define _(a, b, c) VIRTCHNL_VLAN_##b = (1 << a),
+  foreach_virtchnl_vlan_support_bit
+#undef _
+} virtchnl_vlan_support_caps_t;
+
+typedef struct
+{
+  u32 outer_ethertype_setting;
+  u32 inner_ethertype_setting;
+  u16 vport_id;
+  u8 pad[6];
+} virtchnl_vlan_setting_t;
+
+typedef struct
+{
+  u16 vsi_id;
+  union
+  {
+    struct
+    {
+      u16 unicast_promisc : 1;
+      u16 multicast_promisc : 1;
+    };
+    u16 flags;
+  };
+} virtchnl_promisc_info_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl_promisc_info_t, 4);
+
+#endif /* IAVF_VIRTCHNL_H */
diff --git a/src/plugins/dev_iavf/virtchnl_funcs.h b/src/plugins/dev_iavf/virtchnl_funcs.h
new file mode 100644 (file)
index 0000000..e7f3901
--- /dev/null
@@ -0,0 +1,241 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _IIAVF_VIRTCHNL_FUNCS_H_
+#define _IIAVF_VIRTCHNL_FUNCS_H_
+
+#include <vppinfra/clib.h>
+#include <vnet/dev/dev.h>
+#include <dev_iavf/iavf.h>
+
+#define VIRTCHNL_MSG_SZ(s, e, n) STRUCT_OFFSET_OF (s, e[(n) + 1])
+
+typedef struct
+{
+  virtchnl_op_t op;
+  u8 no_reply : 1;
+  u16 req_sz;
+  u16 resp_sz;
+  virtchnl_status_t status;
+  const void *req;
+  void *resp;
+} iavf_virtchnl_req_t;
+
+vnet_dev_rv_t iavf_virtchnl_req (vlib_main_t *, vnet_dev_t *,
+                                iavf_virtchnl_req_t *);
+
+static_always_inline vnet_dev_rv_t
+iavf_vc_op_version (vlib_main_t *vm, vnet_dev_t *dev,
+                   const virtchnl_version_info_t *req,
+                   virtchnl_version_info_t *resp)
+{
+  iavf_virtchnl_req_t vr = {
+    .op = VIRTCHNL_OP_VERSION,
+    .req = req,
+    .req_sz = sizeof (*req),
+    .resp = resp,
+    .resp_sz = sizeof (*resp),
+  };
+
+  return iavf_virtchnl_req (vm, dev, &vr);
+}
+
+static_always_inline vnet_dev_rv_t
+iavf_vc_op_reset_vf (vlib_main_t *vm, vnet_dev_t *dev)
+{
+  iavf_virtchnl_req_t vr = {
+    .op = VIRTCHNL_OP_RESET_VF,
+    .no_reply = 1,
+  };
+
+  return iavf_virtchnl_req (vm, dev, &vr);
+}
+
+static_always_inline vnet_dev_rv_t
+iavf_vc_op_get_vf_resources (vlib_main_t *vm, vnet_dev_t *dev, const u32 *req,
+                            virtchnl_vf_resource_t *resp)
+{
+  iavf_virtchnl_req_t vr = {
+    .op = VIRTCHNL_OP_GET_VF_RESOURCES,
+    .req = req,
+    .req_sz = sizeof (*req),
+    .resp = resp,
+    .resp_sz = sizeof (*resp),
+  };
+
+  return iavf_virtchnl_req (vm, dev, &vr);
+}
+
+static_always_inline vnet_dev_rv_t
+iavf_vc_op_enable_queues (vlib_main_t *vm, vnet_dev_t *dev,
+                         const virtchnl_queue_select_t *req)
+{
+  iavf_virtchnl_req_t vr = {
+    .op = VIRTCHNL_OP_ENABLE_QUEUES,
+    .req = req,
+    .req_sz = sizeof (*req),
+  };
+  return iavf_virtchnl_req (vm, dev, &vr);
+}
+
+static_always_inline vnet_dev_rv_t
+iavf_vc_op_disable_queues (vlib_main_t *vm, vnet_dev_t *dev,
+                          const virtchnl_queue_select_t *req)
+{
+  iavf_virtchnl_req_t vr = {
+    .op = VIRTCHNL_OP_DISABLE_QUEUES,
+    .req = req,
+    .req_sz = sizeof (*req),
+  };
+  return iavf_virtchnl_req (vm, dev, &vr);
+}
+
+static_always_inline vnet_dev_rv_t
+iavf_vc_op_config_vsi_queues (vlib_main_t *vm, vnet_dev_t *dev,
+                             const virtchnl_vsi_queue_config_info_t *req)
+{
+  iavf_virtchnl_req_t vr = {
+    .op = VIRTCHNL_OP_CONFIG_VSI_QUEUES,
+    .req = req,
+    .req_sz = VIRTCHNL_MSG_SZ (virtchnl_vsi_queue_config_info_t, qpair,
+                              req->num_queue_pairs),
+  };
+
+  return iavf_virtchnl_req (vm, dev, &vr);
+}
+
+static_always_inline vnet_dev_rv_t
+iavf_vc_op_config_irq_map (vlib_main_t *vm, vnet_dev_t *dev,
+                          const virtchnl_irq_map_info_t *req)
+{
+  iavf_virtchnl_req_t vr = {
+    .op = VIRTCHNL_OP_CONFIG_IRQ_MAP,
+    .req = req,
+    .req_sz =
+      VIRTCHNL_MSG_SZ (virtchnl_irq_map_info_t, vecmap, req->num_vectors),
+  };
+
+  return iavf_virtchnl_req (vm, dev, &vr);
+}
+
+static_always_inline vnet_dev_rv_t
+iavf_vc_op_config_rss_lut (vlib_main_t *vm, vnet_dev_t *dev,
+                          const virtchnl_rss_lut_t *req)
+{
+  iavf_virtchnl_req_t vr = {
+    .op = VIRTCHNL_OP_CONFIG_RSS_LUT,
+    .req = req,
+    .req_sz = VIRTCHNL_MSG_SZ (virtchnl_rss_lut_t, lut, req->lut_entries),
+  };
+
+  return iavf_virtchnl_req (vm, dev, &vr);
+}
+
+static_always_inline vnet_dev_rv_t
+iavf_vc_op_config_rss_key (vlib_main_t *vm, vnet_dev_t *dev,
+                          const virtchnl_rss_key_t *req)
+{
+  iavf_virtchnl_req_t vr = {
+    .op = VIRTCHNL_OP_CONFIG_RSS_KEY,
+    .req = req,
+    .req_sz = VIRTCHNL_MSG_SZ (virtchnl_rss_key_t, key, req->key_len),
+  };
+
+  return iavf_virtchnl_req (vm, dev, &vr);
+}
+
+static_always_inline vnet_dev_rv_t
+iavf_vc_op_config_promisc_mode (vlib_main_t *vm, vnet_dev_t *dev,
+                               const virtchnl_promisc_info_t *req)
+{
+  iavf_virtchnl_req_t vr = {
+    .op = VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE,
+    .req = req,
+    .req_sz = sizeof (*req),
+  };
+
+  return iavf_virtchnl_req (vm, dev, &vr);
+}
+
+static_always_inline vnet_dev_rv_t
+iavf_vc_op_disable_vlan_stripping (vlib_main_t *vm, vnet_dev_t *dev)
+{
+  iavf_virtchnl_req_t vr = {
+    .op = VIRTCHNL_OP_DISABLE_VLAN_STRIPPING,
+  };
+
+  return iavf_virtchnl_req (vm, dev, &vr);
+}
+
+static_always_inline vnet_dev_rv_t
+iavf_vc_op_add_eth_addr (vlib_main_t *vm, vnet_dev_t *dev,
+                        const virtchnl_ether_addr_list_t *req)
+{
+  iavf_virtchnl_req_t vr = {
+    .op = VIRTCHNL_OP_ADD_ETH_ADDR,
+    .req = req,
+    .req_sz =
+      VIRTCHNL_MSG_SZ (virtchnl_ether_addr_list_t, list, req->num_elements),
+  };
+
+  return iavf_virtchnl_req (vm, dev, &vr);
+}
+
+static_always_inline vnet_dev_rv_t
+iavf_vc_op_del_eth_addr (vlib_main_t *vm, vnet_dev_t *dev,
+                        const virtchnl_ether_addr_list_t *req)
+{
+  iavf_virtchnl_req_t vr = {
+    .op = VIRTCHNL_OP_DEL_ETH_ADDR,
+    .req = req,
+    .req_sz =
+      VIRTCHNL_MSG_SZ (virtchnl_ether_addr_list_t, list, req->num_elements),
+  };
+
+  return iavf_virtchnl_req (vm, dev, &vr);
+}
+
+static_always_inline vnet_dev_rv_t
+iavf_vc_op_get_offload_vlan_v2_caps (vlib_main_t *vm, vnet_dev_t *dev,
+                                    virtchnl_vlan_caps_t *resp)
+{
+  iavf_virtchnl_req_t vr = {
+    .op = VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS,
+    .resp = resp,
+    .resp_sz = sizeof (*resp),
+  };
+
+  return iavf_virtchnl_req (vm, dev, &vr);
+}
+
+static_always_inline vnet_dev_rv_t
+iavf_vc_op_get_stats (vlib_main_t *vm, vnet_dev_t *dev,
+                     const virtchnl_queue_select_t *req,
+                     virtchnl_eth_stats_t *resp)
+{
+  iavf_virtchnl_req_t vr = {
+    .op = VIRTCHNL_OP_GET_STATS,
+    .req = req,
+    .req_sz = sizeof (*req),
+    .resp = resp,
+    .resp_sz = sizeof (*resp),
+  };
+
+  return iavf_virtchnl_req (vm, dev, &vr);
+}
+
+static_always_inline vnet_dev_rv_t
+iavf_vc_op_disable_vlan_stripping_v2 (vlib_main_t *vm, vnet_dev_t *dev,
+                                     const virtchnl_vlan_setting_t *req)
+{
+  iavf_virtchnl_req_t vr = {
+    .op = VIRTCHNL_OP_DISABLE_VLAN_STRIPPING_V2,
+    .req = req,
+    .req_sz = sizeof (*req),
+  };
+
+  return iavf_virtchnl_req (vm, dev, &vr);
+}
+
+#endif /* _IIAVF_VIRTCHNL_FUNCS_H_ */
index 8527fdb..ae25e40 100644 (file)
@@ -32,20 +32,25 @@ clib_ring_header (void *v)
   return vec_header (v);
 }
 
+always_inline void
+clib_ring_reset (void *v)
+{
+  clib_ring_header_t *h = clib_ring_header (v);
+  h->next = 0;
+  h->n_enq = 0;
+}
+
 always_inline void
 clib_ring_new_inline (void **p, u32 elt_bytes, u32 size, u32 align)
 {
   void *v;
-  clib_ring_header_t *h;
   vec_attr_t va = { .elt_sz = elt_bytes,
                    .hdr_sz = sizeof (clib_ring_header_t),
                    .align = align };
 
   v = _vec_alloc_internal (size, &va);
 
-  h = clib_ring_header (v);
-  h->next = 0;
-  h->n_enq = 0;
+  clib_ring_reset (v);
   p[0] = v;
 }