ena: Amazon Elastic Network Adapter (ENA) native driver 19/38819/42
authorDamjan Marion <damarion@cisco.com>
Mon, 13 Nov 2023 12:18:24 +0000 (12:18 +0000)
committerDamjan Marion <damarion@cisco.com>
Mon, 13 Nov 2023 19:05:17 +0000 (19:05 +0000)
Type: feature
Change-Id: Icd9de05f2cbac0e5a6dfb1f1414f21dc4b893104
Signed-off-by: Damjan Marion <damarion@cisco.com>
22 files changed:
MAINTAINERS
docs/spelling_wordlist.txt
src/plugins/dev_ena/CMakeLists.txt [new file with mode: 0644]
src/plugins/dev_ena/aenq.c [new file with mode: 0644]
src/plugins/dev_ena/aq.c [new file with mode: 0644]
src/plugins/dev_ena/ena.c [new file with mode: 0644]
src/plugins/dev_ena/ena.h [new file with mode: 0644]
src/plugins/dev_ena/ena_admin_defs.h [new file with mode: 0644]
src/plugins/dev_ena/ena_aenq_defs.h [new file with mode: 0644]
src/plugins/dev_ena/ena_defs.h [new file with mode: 0644]
src/plugins/dev_ena/ena_inlines.h [new file with mode: 0644]
src/plugins/dev_ena/ena_io_defs.h [new file with mode: 0644]
src/plugins/dev_ena/ena_reg_defs.h [new file with mode: 0644]
src/plugins/dev_ena/format.c [new file with mode: 0644]
src/plugins/dev_ena/format_aq.c [new file with mode: 0644]
src/plugins/dev_ena/port.c [new file with mode: 0644]
src/plugins/dev_ena/queue.c [new file with mode: 0644]
src/plugins/dev_ena/reg.c [new file with mode: 0644]
src/plugins/dev_ena/rx_node.c [new file with mode: 0644]
src/plugins/dev_ena/tx_node.c [new file with mode: 0644]
src/vlib/buffer.h
src/vppinfra/vector_avx2.h

index 2abc3d7..1ed8378 100644 (file)
@@ -415,6 +415,11 @@ I: iavf
 M:     Damjan Marion <damarion@cisco.com>
 F:     src/plugins/dev_iavf/
 
+Plugin - Amazon Elastic Network Adapter (ENA) device driver
+I:     ena
+M:     Damjan Marion <damarion@cisco.com>
+F:     src/plugins/dev_ena/
+
 Plugin - Dispatch Trace PCAP
 I:     dispatch-trace
 M:     Dave Barach <vpp@barachs.net>
index 7fec295..1aa5249 100644 (file)
@@ -314,6 +314,7 @@ elts
 emacs
 emerg
 emphasise
+ena
 enablement
 encap
 encap
diff --git a/src/plugins/dev_ena/CMakeLists.txt b/src/plugins/dev_ena/CMakeLists.txt
new file mode 100644 (file)
index 0000000..d9224d6
--- /dev/null
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: Apache-2.0
+# Copyright(c) 2022 Cisco Systems, Inc.
+
+add_vpp_plugin(dev_ena
+  SOURCES
+  aq.c
+  aenq.c
+  ena.c
+  format.c
+  format_aq.c
+  port.c
+  queue.c
+  rx_node.c
+  tx_node.c
+  reg.c
+
+  MULTIARCH_SOURCES
+  rx_node.c
+  tx_node.c
+)
+
diff --git a/src/plugins/dev_ena/aenq.c b/src/plugins/dev_ena/aenq.c
new file mode 100644 (file)
index 0000000..64be3c4
--- /dev/null
@@ -0,0 +1,186 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/dev/dev.h>
+
+#include <dev_ena/ena.h>
+#include <dev_ena/ena_inlines.h>
+
+#define ENA_AENQ_POLL_INTERVAL 0.2
+
+VLIB_REGISTER_LOG_CLASS (ena_log, static) = {
+  .class_name = "ena",
+  .subclass_name = "aenq",
+};
+
+void
+ena_aenq_free (vlib_main_t *vm, vnet_dev_t *dev)
+{
+  ena_device_t *ed = vnet_dev_get_data (dev);
+
+  log_debug (dev, "");
+
+  ASSERT (ed->aenq_started == 0);
+
+  vnet_dev_dma_mem_free (vm, dev, ed->aenq.entries);
+  ed->aenq.entries = 0;
+  ed->aenq.depth = 0;
+}
+
+vnet_dev_rv_t
+ena_aenq_olloc (vlib_main_t *vm, vnet_dev_t *dev, u16 depth)
+{
+  ena_device_t *ed = vnet_dev_get_data (dev);
+  u32 alloc_sz = sizeof (ena_aenq_entry_t) * depth;
+  vnet_dev_rv_t rv;
+
+  log_debug (dev, "");
+
+  ASSERT (ed->aenq.entries == 0);
+
+  if ((rv = vnet_dev_dma_mem_alloc (vm, dev, alloc_sz, 0,
+                                   (void **) &ed->aenq.entries)))
+    goto err;
+
+  ed->aenq.depth = depth;
+
+  return VNET_DEV_OK;
+err:
+  ena_aenq_free (vm, dev);
+  return rv;
+}
+
+static ena_aenq_entry_t *
+ena_get_next_aenq_entry (vnet_dev_t *dev)
+{
+  ena_device_t *ed = vnet_dev_get_data (dev);
+  u16 index = ed->aenq.head & pow2_mask (ENA_ASYNC_QUEUE_LOG2_DEPTH);
+  u16 phase = 1 & (ed->aenq.head >> ENA_ASYNC_QUEUE_LOG2_DEPTH);
+  ena_aenq_entry_t *e = ed->aenq.entries + index;
+
+  if (e->phase != phase)
+    return 0;
+
+  ed->aenq.head++;
+
+  return e;
+}
+
+static void
+ena_aenq_poll (vlib_main_t *vm, vnet_dev_t *dev)
+{
+  ena_aenq_entry_t *ae;
+
+  while ((ae = ena_get_next_aenq_entry (dev)))
+    {
+      ena_device_t *ed = vnet_dev_get_data (dev);
+      vnet_dev_port_state_changes_t changes = {};
+
+      log_debug (dev, "aenq: group %u syndrome %u phase %u timestamp %lu",
+                ae->group, ae->syndrome, ae->phase, ae->timestamp);
+
+      switch (ae->group)
+       {
+       case ENA_AENQ_GROUP_LINK_CHANGE:
+         log_debug (dev, "link_change: status %u",
+                    ae->link_change.link_status);
+         changes.link_state = 1;
+         changes.change.link_state = 1;
+         foreach_vnet_dev_port (p, dev)
+           vnet_dev_port_state_change (vm, p, changes);
+         break;
+
+       case ENA_AENQ_GROUP_NOTIFICATION:
+         log_warn (dev, "unhandled AENQ notification received [syndrome %u]",
+                   ae->syndrome);
+         break;
+
+       case ENA_AENQ_GROUP_KEEP_ALIVE:
+         if (ae->keep_alive.rx_drops || ae->keep_alive.tx_drops)
+           log_debug (dev, "keep_alive: rx_drops %lu tx_drops %lu",
+                      ae->keep_alive.rx_drops, ae->keep_alive.tx_drops);
+         ed->aenq.rx_drops = ae->keep_alive.rx_drops - ed->aenq.rx_drops0;
+         ed->aenq.tx_drops = ae->keep_alive.tx_drops - ed->aenq.tx_drops0;
+         ed->aenq.last_keepalive = vlib_time_now (vm);
+         break;
+
+       default:
+         log_debug (dev, "unknown aenq entry (group %u) %U", ae->group,
+                    format_hexdump, ae, sizeof (*ae));
+       };
+    }
+}
+
+vnet_dev_rv_t
+ena_aenq_start (vlib_main_t *vm, vnet_dev_t *dev)
+{
+  ena_device_t *ed = vnet_dev_get_data (dev);
+  u16 depth = ed->aenq.depth;
+  u32 alloc_sz = sizeof (ena_aenq_entry_t) * depth;
+
+  ASSERT (ed->aenq_started == 0);
+  ASSERT (ed->aq_started == 1);
+
+  ena_reg_aenq_caps_t aenq_caps = {
+    .depth = depth,
+    .entry_size = sizeof (ena_aenq_entry_t),
+  };
+
+  if (ena_aq_feature_is_supported (dev, ENA_ADMIN_FEAT_ID_AENQ_CONFIG))
+    {
+      ena_aq_feat_aenq_config_t aenq;
+      vnet_dev_rv_t rv;
+
+      if ((rv = ena_aq_get_feature (vm, dev, ENA_ADMIN_FEAT_ID_AENQ_CONFIG,
+                                   &aenq)))
+       {
+         log_err (dev, "aenq_start: get_Feature(AENQ_CONFIG) failed");
+         return rv;
+       }
+
+      aenq.enabled_groups.link_change = 1;
+      aenq.enabled_groups.fatal_error = 1;
+      aenq.enabled_groups.warning = 1;
+      aenq.enabled_groups.notification = 1;
+      aenq.enabled_groups.keep_alive = 1;
+      aenq.enabled_groups.as_u32 &= aenq.supported_groups.as_u32;
+      aenq.supported_groups.as_u32 = 0;
+
+      if ((rv = ena_aq_set_feature (vm, dev, ENA_ADMIN_FEAT_ID_AENQ_CONFIG,
+                                   &aenq)))
+       {
+         log_err (dev, "aenq_start: set_Feature(AENQ_CONFIG) failed");
+         return rv;
+       }
+    }
+
+  clib_memset (ed->aenq.entries, 0, alloc_sz);
+  ed->aenq.head = depth;
+
+  ena_reg_set_dma_addr (vm, dev, ENA_REG_AENQ_BASE_LO, ENA_REG_AENQ_BASE_HI,
+                       ed->aenq.entries);
+
+  ena_reg_write (dev, ENA_REG_AENQ_CAPS, &aenq_caps);
+  ena_reg_write (dev, ENA_REG_AENQ_HEAD_DB, &(u32){ depth });
+
+  ed->aenq_started = 1;
+
+  vnet_dev_poll_dev_add (vm, dev, ENA_AENQ_POLL_INTERVAL, ena_aenq_poll);
+
+  return VNET_DEV_OK;
+}
+
+void
+ena_aenq_stop (vlib_main_t *vm, vnet_dev_t *dev)
+{
+  ena_device_t *ed = vnet_dev_get_data (dev);
+  if (ed->aenq_started == 1)
+    {
+      ena_reg_aenq_caps_t aenq_caps = {};
+      vnet_dev_poll_dev_remove (vm, dev, ena_aenq_poll);
+      ena_reg_write (dev, ENA_REG_AENQ_CAPS, &aenq_caps);
+      ed->aenq_started = 0;
+    }
+}
diff --git a/src/plugins/dev_ena/aq.c b/src/plugins/dev_ena/aq.c
new file mode 100644 (file)
index 0000000..290d5bd
--- /dev/null
@@ -0,0 +1,359 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/dev/dev.h>
+
+#include <dev_ena/ena.h>
+#include <dev_ena/ena_inlines.h>
+#include <vnet/ethernet/ethernet.h>
+
+VLIB_REGISTER_LOG_CLASS (ena_log, static) = {
+  .class_name = "ena",
+  .subclass_name = "admin",
+};
+
+VLIB_REGISTER_LOG_CLASS (ena_stats_log, static) = {
+  .class_name = "ena",
+  .subclass_name = "admin-stats",
+};
+
+ena_aq_feat_info_t feat_info[] = {
+#define _(v, ver, gt, st, n, s)                                               \
+  [v] = { .name = #n,                                                         \
+         .version = (ver),                                                   \
+         .data_sz = sizeof (s),                                              \
+         .get = (gt),                                                        \
+         .set = (st) },
+  foreach_ena_aq_feature_id
+#undef _
+};
+
+ena_aq_feat_info_t *
+ena_aq_get_feat_info (ena_aq_feature_id_t id)
+{
+  if (id >= ARRAY_LEN (feat_info) || feat_info[id].data_sz == 0)
+    return 0;
+
+  return feat_info + id;
+}
+
+void
+ena_aq_free (vlib_main_t *vm, vnet_dev_t *dev)
+{
+  ena_device_t *ed = vnet_dev_get_data (dev);
+  vnet_dev_dma_mem_free (vm, dev, ed->aq.cq_entries);
+  vnet_dev_dma_mem_free (vm, dev, ed->aq.sq_entries);
+  ed->aq.depth = 0;
+}
+
+vnet_dev_rv_t
+ena_aq_olloc (vlib_main_t *vm, vnet_dev_t *dev, u16 depth)
+{
+  ena_device_t *ed = vnet_dev_get_data (dev);
+  vnet_dev_dma_mem_free (vm, dev, ed->aq.cq_entries);
+  vnet_dev_dma_mem_free (vm, dev, ed->aq.sq_entries);
+  u32 sq_alloc_sz = sizeof (ena_aq_sq_entry_t) * depth;
+  u32 cq_alloc_sz = sizeof (ena_aq_cq_entry_t) * depth;
+  vnet_dev_rv_t rv;
+
+  ASSERT (ed->aq.sq_entries == 0);
+  ASSERT (ed->aq.cq_entries == 0);
+
+  rv = vnet_dev_dma_mem_alloc (vm, dev, sq_alloc_sz, 0,
+                              (void **) &ed->aq.sq_entries);
+  if (rv != VNET_DEV_OK)
+    goto err;
+
+  rv = vnet_dev_dma_mem_alloc (vm, dev, cq_alloc_sz, 0,
+                              (void **) &ed->aq.cq_entries);
+  if (rv != VNET_DEV_OK)
+    goto err;
+
+  ed->aq.depth = depth;
+
+  return VNET_DEV_OK;
+err:
+  ena_aq_free (vm, dev);
+  return rv;
+}
+
+vnet_dev_rv_t
+ena_aq_start (vlib_main_t *vm, vnet_dev_t *dev)
+{
+  ena_device_t *ed = vnet_dev_get_data (dev);
+  u16 depth = ed->aq.depth;
+  u32 sq_alloc_sz = sizeof (ena_aq_sq_entry_t) * depth;
+  u32 cq_alloc_sz = sizeof (ena_aq_cq_entry_t) * depth;
+
+  ASSERT (ed->aq_started == 0);
+
+  ena_reg_aq_caps_t aq_caps = {
+    .depth = depth,
+    .entry_size = sizeof (ena_aq_sq_entry_t),
+  };
+
+  ena_reg_acq_caps_t acq_caps = {
+    .depth = depth,
+    .entry_size = sizeof (ena_aq_cq_entry_t),
+  };
+
+  clib_memset (ed->aq.sq_entries, 0, sq_alloc_sz);
+  clib_memset (ed->aq.cq_entries, 0, cq_alloc_sz);
+
+  ed->aq.sq_next = 0;
+  ed->aq.cq_head = 0;
+
+  ena_reg_set_dma_addr (vm, dev, ENA_REG_AQ_BASE_LO, ENA_REG_AQ_BASE_HI,
+                       ed->aq.sq_entries);
+  ena_reg_set_dma_addr (vm, dev, ENA_REG_ACQ_BASE_LO, ENA_REG_ACQ_BASE_HI,
+                       ed->aq.cq_entries);
+
+  ena_reg_write (dev, ENA_REG_AQ_CAPS, &aq_caps);
+  ena_reg_write (dev, ENA_REG_ACQ_CAPS, &acq_caps);
+
+  ed->aq_started = 1;
+
+  return VNET_DEV_OK;
+}
+
+void
+ena_aq_stop (vlib_main_t *vm, vnet_dev_t *dev)
+{
+  ena_device_t *ed = vnet_dev_get_data (dev);
+  ena_reg_aq_caps_t aq_caps = {};
+  ena_reg_acq_caps_t acq_caps = {};
+
+  if (ed->aq_started)
+    {
+      ena_reg_write (dev, ENA_REG_AQ_CAPS, &aq_caps);
+      ena_reg_write (dev, ENA_REG_ACQ_CAPS, &acq_caps);
+      ed->aq_started = 0;
+    }
+}
+vnet_dev_rv_t
+ena_aq_req (vlib_main_t *vm, vnet_dev_t *dev, ena_aq_opcode_t opcode,
+           void *sqe_data, u8 sqe_data_sz, void *cqe_data, u8 cqe_data_sz)
+{
+  ena_device_t *ed = vnet_dev_get_data (dev);
+  u32 next = ed->aq.sq_next++;
+  u32 index = next & pow2_mask (ENA_ADMIN_QUEUE_LOG2_DEPTH);
+  u8 phase = 1 & (~(next >> ENA_ADMIN_QUEUE_LOG2_DEPTH));
+  ena_aq_sq_entry_t *sqe = ed->aq.sq_entries + index;
+  ena_aq_cq_entry_t *cqe = ed->aq.cq_entries + index;
+  f64 suspend_time = 1e-6;
+
+  clib_memcpy_fast (&sqe->data, sqe_data, sqe_data_sz);
+  sqe->opcode = opcode;
+  sqe->command_id = index;
+  sqe->phase = phase;
+
+  ena_reg_write (dev, ENA_REG_AQ_DB, &ed->aq.sq_next);
+
+  while (cqe->phase != phase)
+    {
+      vlib_process_suspend (vm, suspend_time);
+      suspend_time *= 2;
+      if (suspend_time > 1e-3)
+       {
+         log_err (dev, "admin queue timeout (opcode %U)",
+                  format_ena_aq_opcode, opcode);
+         return VNET_DEV_ERR_TIMEOUT;
+       }
+    }
+
+  if (cqe->status != ENA_ADMIN_COMPL_STATUS_SUCCESS)
+    {
+      log_err (dev,
+              "cqe[%u]: opcode %U status %U ext_status %u sq_head_idx %u",
+              cqe - ed->aq.cq_entries, format_ena_aq_opcode, opcode,
+              format_ena_aq_status, cqe->status, cqe->extended_status,
+              cqe->sq_head_indx);
+      return VNET_DEV_ERR_DEVICE_NO_REPLY;
+    }
+
+  log_debug (dev, "cqe: status %u ext_status %u sq_head_idx %u", cqe->status,
+            cqe->extended_status, cqe->sq_head_indx);
+
+  if (cqe_data && cqe_data_sz)
+    clib_memcpy_fast (cqe_data, &cqe->data, cqe_data_sz);
+  return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+ena_aq_set_feature (vlib_main_t *vm, vnet_dev_t *dev,
+                   ena_aq_feature_id_t feat_id, void *data)
+{
+  vnet_dev_rv_t rv;
+
+  struct
+  {
+    ena_aq_aq_ctrl_buff_info_t control_buffer;
+    ena_aq_get_set_feature_common_desc_t feat_common;
+    u32 data[11];
+  } fd = {
+    .feat_common.feature_id = feat_id,
+    .feat_common.feature_version = feat_info[feat_id].version,
+  };
+
+  log_debug (dev, "set_feature(%s):\n  %U", feat_info[feat_id].name,
+            format_ena_aq_feat_desc, feat_id, data);
+
+  ASSERT (feat_info[feat_id].data_sz > 1);
+  clib_memcpy (&fd.data, data, feat_info[feat_id].data_sz);
+
+  rv = ena_aq_req (vm, dev, ENA_AQ_OPCODE_SET_FEATURE, &fd, sizeof (fd), 0, 0);
+
+  if (rv != VNET_DEV_OK)
+    log_err (dev, "get_feature(%U) failed", format_ena_aq_feat_name, feat_id);
+
+  return rv;
+}
+
+vnet_dev_rv_t
+ena_aq_get_feature (vlib_main_t *vm, vnet_dev_t *dev,
+                   ena_aq_feature_id_t feat_id, void *data)
+{
+  vnet_dev_rv_t rv;
+
+  struct
+  {
+    ena_aq_aq_ctrl_buff_info_t control_buffer;
+    ena_aq_get_set_feature_common_desc_t feat_common;
+    u32 data[11];
+  } fd = {
+    .feat_common.feature_id = feat_id,
+    .feat_common.feature_version = feat_info[feat_id].version,
+  };
+
+  rv = ena_aq_req (vm, dev, ENA_AQ_OPCODE_GET_FEATURE, &fd, sizeof (fd), data,
+                  feat_info[feat_id].data_sz);
+
+  if (rv != VNET_DEV_OK)
+    {
+      log_err (dev, "get_feature(%U) failed", format_ena_aq_feat_name,
+              feat_id);
+      return rv;
+    }
+
+  ASSERT (feat_info[feat_id].data_sz > 1);
+
+  log_debug (dev, "get_feature(%s):\n  %U", feat_info[feat_id].name,
+            format_ena_aq_feat_desc, feat_id, data);
+
+  return 0;
+}
+
+vnet_dev_rv_t
+ena_aq_create_sq (vlib_main_t *vm, vnet_dev_t *dev,
+                 ena_aq_create_sq_cmd_t *cmd, ena_aq_create_sq_resp_t *resp)
+{
+  vnet_dev_rv_t rv;
+
+  log_debug (dev, "create_sq_cmd_req:\n  %U", format_ena_aq_create_sq_cmd,
+            cmd);
+
+  rv = ena_aq_req (vm, dev, ENA_AQ_OPCODE_CREATE_SQ, cmd, sizeof (*cmd), resp,
+                  sizeof (*resp));
+
+  if (rv != VNET_DEV_OK)
+    log_debug (dev, "create_sq_cmd_resp:\n  %U", format_ena_aq_create_sq_resp,
+              resp);
+  return rv;
+}
+
+vnet_dev_rv_t
+ena_aq_create_cq (vlib_main_t *vm, vnet_dev_t *dev,
+                 ena_aq_create_cq_cmd_t *cmd, ena_aq_create_cq_resp_t *resp)
+{
+  vnet_dev_rv_t rv;
+
+  log_debug (dev, "create_cq_cmd_req:\n  %U", format_ena_aq_create_cq_cmd,
+            cmd);
+
+  rv = ena_aq_req (vm, dev, ENA_AQ_OPCODE_CREATE_CQ, cmd, sizeof (*cmd), resp,
+                  sizeof (*resp));
+
+  if (rv != VNET_DEV_OK)
+    log_debug (dev, "create_cq_cmd_resp:\n  %U", format_ena_aq_create_cq_resp,
+              resp);
+
+  return rv;
+}
+
+vnet_dev_rv_t
+ena_aq_destroy_sq (vlib_main_t *vm, vnet_dev_t *dev,
+                  ena_aq_destroy_sq_cmd_t *cmd)
+{
+  log_debug (dev, "destroy_sq_cmd_req:\n  %U", format_ena_aq_destroy_sq_cmd,
+            cmd);
+
+  return ena_aq_req (vm, dev, ENA_AQ_OPCODE_DESTROY_SQ, cmd, sizeof (*cmd), 0,
+                    0);
+}
+
+vnet_dev_rv_t
+ena_aq_destroy_cq (vlib_main_t *vm, vnet_dev_t *dev,
+                  ena_aq_destroy_cq_cmd_t *cmd)
+{
+  log_debug (dev, "destroy_cq_cmd_req:\n  %U", format_ena_aq_destroy_cq_cmd,
+            cmd);
+
+  return ena_aq_req (vm, dev, ENA_AQ_OPCODE_DESTROY_CQ, cmd, sizeof (*cmd), 0,
+                    0);
+}
+
+vnet_dev_rv_t
+ena_aq_get_stats (vlib_main_t *vm, vnet_dev_t *dev, ena_aq_stats_type_t type,
+                 ena_aq_stats_scope_t scope, u16 queue_idx, void *data)
+{
+  vnet_dev_rv_t rv;
+  format_function_t *ff = 0;
+  u8 data_sz[] = {
+    [ENA_ADMIN_STATS_TYPE_BASIC] = sizeof (ena_aq_basic_stats_t),
+    [ENA_ADMIN_STATS_TYPE_EXTENDED] = 0,
+    [ENA_ADMIN_STATS_TYPE_ENI] = sizeof (ena_aq_eni_stats_t),
+  };
+
+  char *type_str[] = {
+#define _(n, s) [n] = #s,
+    foreach_ena_aq_stats_type
+#undef _
+  };
+
+  char *scope_str[] = {
+#define _(n, s) [n] = #s,
+    foreach_ena_aq_stats_scope
+#undef _
+  };
+
+  ena_aq_get_stats_cmd_t cmd = {
+    .type = type,
+    .scope = scope,
+    .queue_idx = scope == ENA_ADMIN_STATS_SCOPE_SPECIFIC_QUEUE ? queue_idx : 0,
+    .device_id = 0xffff,
+  };
+
+  if ((rv = ena_aq_req (vm, dev, ENA_AQ_OPCODE_GET_STATS, &cmd, sizeof (cmd),
+                       data, data_sz[type])))
+    {
+      ena_stats_log_err (dev, "get_stats(%s, %s) failed", type_str[type],
+                        scope_str[scope]);
+      return rv;
+    }
+
+  if (type == ENA_ADMIN_STATS_TYPE_BASIC)
+    ff = format_ena_aq_basic_stats;
+  else if (type == ENA_ADMIN_STATS_TYPE_ENI)
+    ff = format_ena_aq_eni_stats;
+
+  if (ff)
+    ena_stats_log_debug (dev, "get_stats(%s, %s, %u):\n  %U", type_str[type],
+                        scope_str[scope], queue_idx, ff, data);
+  else
+    ena_stats_log_debug (dev, "get_stats(%s, %s, %u): unknown data",
+                        type_str[type], scope_str[scope], queue_idx);
+
+  return VNET_DEV_OK;
+}
diff --git a/src/plugins/dev_ena/ena.c b/src/plugins/dev_ena/ena.c
new file mode 100644 (file)
index 0000000..ead0908
--- /dev/null
@@ -0,0 +1,265 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/pci.h>
+#include <dev_ena/ena.h>
+#include <dev_ena/ena_inlines.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+
+static ena_aq_host_info_t host_info = {
+  .os_type = 3, /* DPDK */
+  .kernel_ver_str = VPP_BUILD_VER,
+  .os_dist_str = VPP_BUILD_VER,
+  .driver_version = {
+    .major = 16,
+    .minor = 0,
+    .sub_minor = 0,
+  },
+  .ena_spec_version = {
+    .major = 2,
+    .minor = 0,
+  },
+  .driver_supported_features = {
+    .rx_offset = 1,
+    .rss_configurable_function_key = 1,
+  }
+};
+
+VLIB_REGISTER_LOG_CLASS (ena_log, static) = {
+  .class_name = "ena",
+  .subclass_name = "init",
+};
+
+#define _(f, n, s, d)                                                         \
+  { .name = #n, .desc = d, .severity = VL_COUNTER_SEVERITY_##s },
+
+static vlib_error_desc_t ena_rx_node_counters[] = {
+  foreach_ena_rx_node_counter
+};
+static vlib_error_desc_t ena_tx_node_counters[] = {
+  foreach_ena_tx_node_counter
+};
+#undef _
+
+vnet_dev_node_t ena_rx_node = {
+  .error_counters = ena_rx_node_counters,
+  .n_error_counters = ARRAY_LEN (ena_rx_node_counters),
+  .format_trace = format_ena_rx_trace,
+};
+
+vnet_dev_node_t ena_tx_node = {
+  .error_counters = ena_tx_node_counters,
+  .n_error_counters = ARRAY_LEN (ena_tx_node_counters),
+};
+
+static void
+ena_deinit (vlib_main_t *vm, vnet_dev_t *dev)
+{
+  ena_aenq_stop (vm, dev);
+  ena_aq_stop (vm, dev);
+}
+
+static vnet_dev_rv_t
+ena_alloc (vlib_main_t *vm, vnet_dev_t *dev)
+{
+  ena_device_t *ed = vnet_dev_get_data (dev);
+  vnet_dev_rv_t rv;
+
+  if ((rv = vnet_dev_dma_mem_alloc (vm, dev, 4096, 4096,
+                                   (void **) &ed->host_info)))
+    return rv;
+
+  if ((rv = vnet_dev_dma_mem_alloc (vm, dev, sizeof (ena_mmio_resp_t), 0,
+                                   (void **) &ed->mmio_resp)))
+    return rv;
+
+  if ((rv = ena_aq_olloc (vm, dev, ENA_ADMIN_QUEUE_DEPTH)))
+    return rv;
+
+  if ((rv = ena_aenq_olloc (vm, dev, ENA_ASYNC_QUEUE_DEPTH)))
+    return rv;
+
+  return VNET_DEV_OK;
+}
+
+static void
+ena_free (vlib_main_t *vm, vnet_dev_t *dev)
+{
+  ena_device_t *ed = vnet_dev_get_data (dev);
+
+  ena_aenq_free (vm, dev);
+  ena_aq_free (vm, dev);
+
+  vnet_dev_dma_mem_free (vm, dev, ed->host_info);
+  vnet_dev_dma_mem_free (vm, dev, ed->mmio_resp);
+}
+
+static vnet_dev_rv_t
+ena_init (vlib_main_t *vm, vnet_dev_t *dev)
+{
+  ena_device_t *ed = vnet_dev_get_data (dev);
+  ena_aq_feat_host_attr_config_t host_attr = {};
+  vlib_pci_config_hdr_t pci_cfg_hdr;
+  vnet_dev_rv_t rv = VNET_DEV_OK;
+
+  vnet_dev_port_add_args_t port = {
+    .port = {
+      .attr = {
+        .type = VNET_DEV_PORT_TYPE_ETHERNET,
+      },
+      .ops = {
+        .init = ena_port_init,
+        .start = ena_port_start,
+        .stop = ena_port_stop,
+        .config_change = ena_port_cfg_change,
+        .config_change_validate = ena_port_cfg_change_validate,
+      },
+      .data_size = sizeof (ena_port_t),
+    },
+    .rx_node = &ena_rx_node,
+    .tx_node = &ena_tx_node,
+    .rx_queue = {
+      .config = {
+        .data_size = sizeof (ena_rxq_t),
+        .default_size = 512,
+        .min_size = 32,
+        .size_is_power_of_two = 1,
+      },
+      .ops = {
+        .alloc = ena_rx_queue_alloc,
+        .start = ena_rx_queue_start,
+        .stop = ena_rx_queue_stop,
+        .free = ena_rx_queue_free,
+      },
+    },
+    .tx_queue = {
+      .config = {
+        .data_size = sizeof (ena_txq_t),
+        .default_size = 512,
+        .min_size = 32,
+        .size_is_power_of_two = 1,
+      },
+      .ops = {
+        .alloc = ena_tx_queue_alloc,
+        .start = ena_tx_queue_start,
+        .stop = ena_tx_queue_stop,
+        .free = ena_tx_queue_free,
+      },
+    },
+  };
+
+  if ((rv = vnet_dev_pci_read_config_header (vm, dev, &pci_cfg_hdr)))
+    goto err;
+
+  log_debug (dev, "revision_id 0x%x", pci_cfg_hdr.revision_id);
+
+  ed->readless = (pci_cfg_hdr.revision_id & 1) == 0;
+
+  if ((rv = vnet_dev_pci_map_region (vm, dev, 0, &ed->reg_bar)))
+    goto err;
+
+  if ((rv = ena_reg_reset (vm, dev, ENA_RESET_REASON_NORMAL)))
+    goto err;
+
+  if ((rv = ena_aq_start (vm, dev)))
+    goto err;
+
+  *ed->host_info = host_info;
+  ed->host_info->num_cpus = vlib_get_n_threads ();
+  ena_set_mem_addr (vm, dev, &host_attr.os_info_ba, ed->host_info);
+
+  if ((rv = ena_aq_set_feature (vm, dev, ENA_ADMIN_FEAT_ID_HOST_ATTR_CONFIG,
+                               &host_attr)))
+    return rv;
+
+  if ((rv = ena_aq_get_feature (vm, dev, ENA_ADMIN_FEAT_ID_DEVICE_ATTRIBUTES,
+                               &ed->dev_attr)))
+    return rv;
+
+  if (ena_aq_feature_is_supported (dev, ENA_ADMIN_FEAT_ID_MAX_QUEUES_EXT))
+    {
+      ena_aq_feat_max_queue_ext_t max_q_ext;
+      if ((rv = ena_aq_get_feature (vm, dev, ENA_ADMIN_FEAT_ID_MAX_QUEUES_EXT,
+                                   &max_q_ext)))
+       goto err;
+      port.port.attr.max_rx_queues =
+       clib_min (max_q_ext.max_rx_cq_num, max_q_ext.max_rx_sq_num);
+      port.port.attr.max_tx_queues =
+       clib_min (max_q_ext.max_tx_cq_num, max_q_ext.max_tx_sq_num);
+      port.rx_queue.config.max_size =
+       clib_min (max_q_ext.max_rx_cq_depth, max_q_ext.max_rx_sq_depth);
+      port.tx_queue.config.max_size =
+       clib_min (max_q_ext.max_tx_cq_depth, max_q_ext.max_tx_sq_depth);
+    }
+  else
+    {
+      log_err (dev, "device doesn't support MAX_QUEUES_EXT");
+      return VNET_DEV_ERR_UNSUPPORTED_DEVICE_VER;
+    }
+
+  if ((rv = ena_aenq_start (vm, dev)))
+    goto err;
+
+  port.port.attr.max_supported_rx_frame_size = ed->dev_attr.max_mtu;
+
+  if (ena_aq_feature_is_supported (dev, ENA_ADMIN_FEAT_ID_MTU))
+    port.port.attr.caps.change_max_rx_frame_size = 1;
+
+  vnet_dev_set_hw_addr_eth_mac (&port.port.attr.hw_addr,
+                               ed->dev_attr.mac_addr);
+
+  return vnet_dev_port_add (vm, dev, 0, &port);
+
+err:
+  ena_free (vm, dev);
+  return rv;
+}
+
+static u8 *
+ena_probe (vlib_main_t *vm, vnet_dev_bus_index_t bus_index, void *dev_info)
+{
+  vnet_dev_bus_pci_device_info_t *di = dev_info;
+  const struct
+  {
+    u16 device_id;
+    char *description;
+  } ena_dev_types[] = {
+    { .device_id = 0x0ec2, .description = "Elastic Network Adapter (ENA) PF" },
+    { .device_id = 0xec20, .description = "Elastic Network Adapter (ENA) VF" },
+  };
+
+  if (di->vendor_id != 0x1d0f) /* AMAZON */
+    return 0;
+
+  FOREACH_ARRAY_ELT (dt, ena_dev_types)
+    {
+      if (dt->device_id == di->device_id)
+       return format (0, "%s", dt->description);
+    }
+
+  return 0;
+}
+
+VNET_DEV_REGISTER_DRIVER (ena) = {
+  .name = "ena",
+  .bus = "pci",
+  .device_data_sz = sizeof (ena_device_t),
+  .ops = {
+    .alloc = ena_alloc,
+    .init = ena_init,
+    .deinit = ena_deinit,
+    .free = ena_free,
+    .format_info = format_ena_dev_info,
+    .probe = ena_probe,
+  },
+};
+
+VLIB_PLUGIN_REGISTER () = {
+  .version = VPP_BUILD_VER,
+  .description = "dev_ena",
+};
diff --git a/src/plugins/dev_ena/ena.h b/src/plugins/dev_ena/ena.h
new file mode 100644 (file)
index 0000000..4acb8d9
--- /dev/null
@@ -0,0 +1,234 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _ENA_H_
+#define _ENA_H_
+
+#include <vppinfra/clib.h>
+#include <vppinfra/error_bootstrap.h>
+#include <vppinfra/format.h>
+#include <vnet/vnet.h>
+#include <vnet/dev/types.h>
+#include <dev_ena/ena_defs.h>
+
+#define ENA_ADMIN_QUEUE_LOG2_DEPTH 2
+#define ENA_ASYNC_QUEUE_LOG2_DEPTH 5
+#define ENA_ADMIN_QUEUE_DEPTH     (1 << ENA_ADMIN_QUEUE_LOG2_DEPTH)
+#define ENA_ASYNC_QUEUE_DEPTH     (1 << ENA_ASYNC_QUEUE_LOG2_DEPTH)
+
+typedef struct
+{
+  u8 readless : 1;
+  u8 aq_started : 1;
+  u8 aenq_started : 1;
+  u8 llq : 1;
+
+  void *reg_bar;
+
+  /* mmio */
+  ena_mmio_resp_t *mmio_resp;
+
+  /* admin queue */
+  struct
+  {
+    ena_aq_sq_entry_t *sq_entries;
+    ena_aq_cq_entry_t *cq_entries;
+    u16 sq_next;
+    u16 cq_head;
+    u16 depth;
+  } aq;
+
+  /* host info */
+  ena_aq_host_info_t *host_info;
+
+  /* device info */
+  ena_aq_feat_device_attr_t dev_attr;
+
+  /* async event notification */
+  struct
+  {
+    ena_aenq_entry_t *entries;
+    u16 head;
+    u16 depth;
+    f64 last_keepalive;
+    u64 tx_drops, tx_drops0;
+    u64 rx_drops, rx_drops0;
+  } aenq;
+
+} ena_device_t;
+
+typedef struct
+{
+} ena_port_t;
+
+typedef struct
+{
+  u32 *buffer_indices;
+  u16 *compl_sqe_indices;
+  ena_rx_desc_t *sqes;
+  ena_rx_cdesc_t *cqes;
+  u32 *sq_db;
+  u32 sq_next;
+  u32 cq_next;
+  u16 cq_idx;
+  u16 sq_idx;
+  u16 n_compl_sqes;
+  u8 cq_created : 1;
+  u8 sq_created : 1;
+} ena_rxq_t;
+
+typedef struct
+{
+  u32 *buffer_indices;
+  ena_tx_desc_t *sqes;
+  ena_tx_llq_desc128_t *llq_descs;
+  ena_tx_cdesc_t *cqes;
+  u64 *sqe_templates;
+  u32 *sq_db;
+  u32 sq_tail;
+  u32 sq_head;
+  u32 cq_next;
+  u16 cq_idx;
+  u16 sq_idx;
+  u8 cq_created : 1;
+  u8 sq_created : 1;
+  u8 llq : 1;
+} ena_txq_t;
+
+typedef struct
+{
+  u16 qid;
+  u16 next_index;
+  u32 hw_if_index;
+  ena_rx_cdesc_status_t status;
+  u16 length;
+  u16 n_desc;
+  u16 req_id;
+} ena_rx_trace_t;
+
+/* admin.c */
+typedef struct
+{
+  char *name;
+  u8 version;
+  u8 data_sz;
+  u8 get;
+  u8 set;
+} ena_aq_feat_info_t;
+
+ena_aq_feat_info_t *ena_aq_get_feat_info (ena_aq_feature_id_t);
+vnet_dev_rv_t ena_aq_olloc (vlib_main_t *, vnet_dev_t *, u16);
+vnet_dev_rv_t ena_aq_start (vlib_main_t *, vnet_dev_t *);
+void ena_aq_stop (vlib_main_t *, vnet_dev_t *);
+void ena_aq_free (vlib_main_t *, vnet_dev_t *);
+vnet_dev_rv_t ena_aq_create_sq (vlib_main_t *, vnet_dev_t *,
+                               ena_aq_create_sq_cmd_t *,
+                               ena_aq_create_sq_resp_t *);
+vnet_dev_rv_t ena_aq_create_cq (vlib_main_t *, vnet_dev_t *,
+                               ena_aq_create_cq_cmd_t *,
+                               ena_aq_create_cq_resp_t *);
+vnet_dev_rv_t ena_aq_destroy_sq (vlib_main_t *, vnet_dev_t *,
+                                ena_aq_destroy_sq_cmd_t *);
+vnet_dev_rv_t ena_aq_destroy_cq (vlib_main_t *, vnet_dev_t *,
+                                ena_aq_destroy_cq_cmd_t *);
+vnet_dev_rv_t ena_aq_set_feature (vlib_main_t *, vnet_dev_t *,
+                                 ena_aq_feature_id_t, void *);
+vnet_dev_rv_t ena_aq_get_feature (vlib_main_t *, vnet_dev_t *,
+                                 ena_aq_feature_id_t, void *);
+vnet_dev_rv_t ena_aq_get_stats (vlib_main_t *, vnet_dev_t *,
+                               ena_aq_stats_type_t, ena_aq_stats_scope_t, u16,
+                               void *);
+
+/* aenq.c */
+vnet_dev_rv_t ena_aenq_olloc (vlib_main_t *, vnet_dev_t *, u16);
+vnet_dev_rv_t ena_aenq_start (vlib_main_t *, vnet_dev_t *);
+void ena_aenq_stop (vlib_main_t *, vnet_dev_t *);
+void ena_aenq_free (vlib_main_t *, vnet_dev_t *);
+
+/* reg.c */
+void ena_reg_write (vnet_dev_t *, ena_reg_t, void *);
+void ena_reg_read (vnet_dev_t *, ena_reg_t, const void *);
+void ena_reg_set_dma_addr (vlib_main_t *, vnet_dev_t *, u32, u32, void *);
+vnet_dev_rv_t ena_reg_reset (vlib_main_t *, vnet_dev_t *, ena_reset_reason_t);
+
+/* port.c */
+vnet_dev_rv_t ena_port_init (vlib_main_t *, vnet_dev_port_t *);
+vnet_dev_rv_t ena_port_start (vlib_main_t *, vnet_dev_port_t *);
+void ena_port_stop (vlib_main_t *, vnet_dev_port_t *);
+vnet_dev_rv_t ena_port_cfg_change (vlib_main_t *, vnet_dev_port_t *,
+                                  vnet_dev_port_cfg_change_req_t *);
+vnet_dev_rv_t ena_port_cfg_change_validate (vlib_main_t *, vnet_dev_port_t *,
+                                           vnet_dev_port_cfg_change_req_t *);
+
+/* queue.c */
+vnet_dev_rv_t ena_rx_queue_alloc (vlib_main_t *, vnet_dev_rx_queue_t *);
+vnet_dev_rv_t ena_tx_queue_alloc (vlib_main_t *, vnet_dev_tx_queue_t *);
+void ena_rx_queue_free (vlib_main_t *, vnet_dev_rx_queue_t *);
+void ena_tx_queue_free (vlib_main_t *, vnet_dev_tx_queue_t *);
+vnet_dev_rv_t ena_rx_queue_start (vlib_main_t *, vnet_dev_rx_queue_t *);
+vnet_dev_rv_t ena_tx_queue_start (vlib_main_t *, vnet_dev_tx_queue_t *);
+void ena_rx_queue_stop (vlib_main_t *, vnet_dev_rx_queue_t *);
+void ena_tx_queue_stop (vlib_main_t *, vnet_dev_tx_queue_t *);
+
+/* format.c */
+format_function_t format_ena_dev_info;
+format_function_t format_ena_mem_addr;
+format_function_t format_ena_tx_desc;
+format_function_t format_ena_rx_trace;
+
+/* format_admin.c */
+format_function_t format_ena_aq_feat_desc;
+format_function_t format_ena_aq_feat_name;
+format_function_t format_ena_aq_opcode;
+format_function_t format_ena_aq_status;
+format_function_t format_ena_aq_feat_id_bitmap;
+format_function_t format_ena_aq_create_sq_cmd;
+format_function_t format_ena_aq_create_cq_cmd;
+format_function_t format_ena_aq_create_sq_resp;
+format_function_t format_ena_aq_create_cq_resp;
+format_function_t format_ena_aq_destroy_sq_cmd;
+format_function_t format_ena_aq_destroy_cq_cmd;
+format_function_t format_ena_aq_basic_stats;
+format_function_t format_ena_aq_eni_stats;
+
+#define foreach_ena_rx_node_counter                                           \
+  _ (BUFFER_ALLOC, buffer_alloc, ERROR, "buffer alloc error")
+
+typedef enum
+{
+#define _(f, lf, t, s) ENA_RX_NODE_CTR_##f,
+  foreach_ena_rx_node_counter
+#undef _
+    ENA_RX_NODE_N_CTRS,
+} ena_rx_node_ctr_t;
+
+#define foreach_ena_tx_node_counter                                           \
+  _ (CHAIN_TOO_LONG, chain_too_long, ERROR, "buffer chain too long")          \
+  _ (NO_FREE_SLOTS, no_free_slots, ERROR, "no free tx slots")
+
+typedef enum
+{
+#define _(f, lf, t, s) ENA_TX_NODE_CTR_##f,
+  foreach_ena_tx_node_counter
+#undef _
+    ENA_TX_NODE_N_CTRS,
+} ena_tx_node_ctr_t;
+
+#define log_debug(dev, f, ...)                                                \
+  vlib_log (VLIB_LOG_LEVEL_DEBUG, ena_log.class, "%U" f, format_vnet_dev_log, \
+           (dev), clib_string_skip_prefix (__func__, "ena_"), ##__VA_ARGS__)
+#define log_info(dev, f, ...)                                                 \
+  vlib_log (VLIB_LOG_LEVEL_INFO, ena_log.class, "%U: " f,                     \
+           format_vnet_dev_addr, (dev), ##__VA_ARGS__)
+#define log_notice(dev, f, ...)                                               \
+  vlib_log (VLIB_LOG_LEVEL_NOTICE, ena_log.class, "%U: " f,                   \
+           format_vnet_dev_addr, (dev), ##__VA_ARGS__)
+#define log_warn(dev, f, ...)                                                 \
+  vlib_log (VLIB_LOG_LEVEL_WARNING, ena_log.class, "%U: " f,                  \
+           format_vnet_dev_addr, (dev), ##__VA_ARGS__)
+#define log_err(dev, f, ...)                                                  \
+  vlib_log (VLIB_LOG_LEVEL_ERR, ena_log.class, "%U: " f,                      \
+           format_vnet_dev_addr, (dev), ##__VA_ARGS__)
+
+#endif /* _ENA_H_ */
diff --git a/src/plugins/dev_ena/ena_admin_defs.h b/src/plugins/dev_ena/ena_admin_defs.h
new file mode 100644 (file)
index 0000000..6433a15
--- /dev/null
@@ -0,0 +1,685 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _ENA_ADMIN_DEFS_H_
+#define _ENA_ADMIN_DEFS_H_
+
+#include <vppinfra/clib.h>
+#include <vppinfra/error_bootstrap.h>
+
+#define foreach_ena_aq_opcode                                                 \
+  _ (1, CREATE_SQ)                                                            \
+  _ (2, DESTROY_SQ)                                                           \
+  _ (3, CREATE_CQ)                                                            \
+  _ (4, DESTROY_CQ)                                                           \
+  _ (8, GET_FEATURE)                                                          \
+  _ (9, SET_FEATURE)                                                          \
+  _ (11, GET_STATS)
+
+typedef enum
+{
+#define _(v, n) ENA_AQ_OPCODE_##n = (v),
+  foreach_ena_aq_opcode
+#undef _
+} __clib_packed ena_aq_opcode_t;
+
+#define foreach_ena_aq_compl_status                                           \
+  _ (0, SUCCESS)                                                              \
+  _ (1, RESOURCE_ALLOCATION_FAILURE)                                          \
+  _ (2, BAD_OPCODE)                                                           \
+  _ (3, UNSUPPORTED_OPCODE)                                                   \
+  _ (4, MALFORMED_REQUEST)                                                    \
+  _ (5, ILLEGAL_PARAMETER)                                                    \
+  _ (6, UNKNOWN_ERROR)                                                        \
+  _ (7, RESOURCE_BUSY)
+
+typedef enum
+{
+#define _(v, n) ENA_ADMIN_COMPL_STATUS_##n = (v),
+  foreach_ena_aq_compl_status
+#undef _
+} __clib_packed ena_aq_compl_status_t;
+
+/* id, versiom, get, set, name, struct */
+#define foreach_ena_aq_feature_id                                             \
+  _ (1, 0, 1, 0, DEVICE_ATTRIBUTES, ena_aq_feat_device_attr_t)                \
+  _ (2, 0, 1, 0, MAX_QUEUES_NUM, ena_aq_feat_max_queue_num_t)                 \
+  _ (3, 0, 1, 0, HW_HINTS, ena_aq_feat_hw_hints_t)                            \
+  _ (4, 0, 1, 1, LLQ, ena_aq_feat_llq_t)                                      \
+  _ (5, 0, 1, 0, EXTRA_PROPERTIES_STRINGS,                                    \
+     ena_aq_feat_extra_properties_strings_t)                                  \
+  _ (6, 0, 1, 0, EXTRA_PROPERTIES_FLAGS,                                      \
+     ena_aq_feat_extra_properties_flags_t)                                    \
+  _ (7, 1, 1, 0, MAX_QUEUES_EXT, ena_aq_feat_max_queue_ext_t)                 \
+  _ (10, 0, 1, 1, RSS_HASH_FUNCTION, ena_aq_feat_rss_hash_function_t)         \
+  _ (11, 0, 1, 0, STATELESS_OFFLOAD_CONFIG,                                   \
+     ena_aq_feat_stateless_offload_config_t)                                  \
+  _ (12, 0, 1, 1, RSS_INDIRECTION_TABLE_CONFIG,                               \
+     ena_aq_feat_rss_ind_table_config_t)                                      \
+  _ (14, 0, 0, 1, MTU, ena_aq_feat_mtu_t)                                     \
+  _ (18, 0, 1, 1, RSS_HASH_INPUT, ena_aq_feat_rss_hash_input_t)               \
+  _ (20, 0, 1, 0, INTERRUPT_MODERATION, ena_aq_feat_intr_moder_t)             \
+  _ (26, 0, 1, 1, AENQ_CONFIG, ena_aq_feat_aenq_config_t)                     \
+  _ (27, 0, 1, 0, LINK_CONFIG, ena_aq_feat_link_config_t)                     \
+  _ (28, 0, 0, 1, HOST_ATTR_CONFIG, ena_aq_feat_host_attr_config_t)           \
+  _ (29, 0, 1, 1, PHC_CONFIG, ena_aq_feat_phc_config_t)
+
+typedef enum
+{
+#define _(v, ver, r, w, n, s) ENA_ADMIN_FEAT_ID_##n = (v),
+  foreach_ena_aq_feature_id
+#undef _
+} __clib_packed ena_aq_feature_id_t;
+
+#define foreach_ena_aq_stats_type                                             \
+  _ (0, BASIC)                                                                \
+  _ (1, EXTENDED)                                                             \
+  _ (2, ENI)
+
+#define foreach_ena_aq_stats_scope                                            \
+  _ (0, SPECIFIC_QUEUE)                                                       \
+  _ (1, ETH_TRAFFIC)
+
+typedef enum
+{
+#define _(v, n) ENA_ADMIN_STATS_TYPE_##n = (v),
+  foreach_ena_aq_stats_type
+#undef _
+} __clib_packed ena_aq_stats_type_t;
+
+typedef enum
+{
+#define _(v, n) ENA_ADMIN_STATS_SCOPE_##n = (v),
+  foreach_ena_aq_stats_scope
+#undef _
+} __clib_packed ena_aq_stats_scope_t;
+
+typedef struct
+{
+  u32 addr_lo;
+  u16 addr_hi;
+  u16 _reserved_16;
+} ena_mem_addr_t;
+
+#define foreach_ena_aq_aenq_groups                                            \
+  _ (link_change)                                                             \
+  _ (fatal_error)                                                             \
+  _ (warning)                                                                 \
+  _ (notification)                                                            \
+  _ (keep_alive)                                                              \
+  _ (refresh_capabilities)                                                    \
+  _ (conf_notifications)
+
+typedef union
+{
+  struct
+  {
+#define _(g) u32 g : 1;
+    foreach_ena_aq_aenq_groups
+#undef _
+  };
+  u32 as_u32;
+} ena_aq_aenq_groups_t;
+
+STATIC_ASSERT_SIZEOF (ena_aq_aenq_groups_t, 4);
+
+typedef struct
+{
+  u32 length;
+  ena_mem_addr_t addr;
+} ena_aq_aq_ctrl_buff_info_t;
+
+typedef struct
+{
+  u32 impl_id;
+  u32 device_version;
+  u32 supported_features;
+  u32 _reserved3;
+  u32 phys_addr_width;
+  u32 virt_addr_width;
+  u8 mac_addr[6];
+  u8 _reserved7[2];
+  u32 max_mtu;
+} ena_aq_feat_device_attr_t;
+
+typedef struct
+{
+  union
+  {
+    struct
+    {
+      u16 l3_sort : 1;
+      u16 l4_sort : 1;
+    };
+    u16 supported_input_sort;
+  };
+  union
+  {
+    struct
+    {
+      u16 enable_l3_sort : 1;
+      u16 enable_l4_sort : 1;
+    };
+    u16 enabled_input_sort;
+  };
+} ena_aq_feat_rss_hash_input_t;
+
+STATIC_ASSERT_SIZEOF (ena_aq_feat_rss_hash_input_t, 4);
+
+typedef struct
+{
+  u16 intr_delay_resolution;
+  u16 reserved;
+} ena_aq_feat_intr_moder_t;
+
+typedef struct
+{
+  ena_aq_aenq_groups_t supported_groups;
+  ena_aq_aenq_groups_t enabled_groups;
+} ena_aq_feat_aenq_config_t;
+
+#define foreach_ena_aq_link_types                                             \
+  _ (0, 1000, 1G)                                                             \
+  _ (1, 2500, 2_5G)                                                           \
+  _ (2, 5000, 5G)                                                             \
+  _ (3, 10000, 10G)                                                           \
+  _ (4, 25000, 25G)                                                           \
+  _ (5, 40000, 40G)                                                           \
+  _ (6, 50000, 50G)                                                           \
+  _ (7, 100000, 100G)                                                         \
+  _ (8, 200000, 200G)                                                         \
+  _ (9, 400000, 400G)
+
+typedef enum
+{
+#define _(b, v, n) ENA_ADMIN_LINK_TYPE_##n = (1U << b),
+  foreach_ena_aq_link_types
+#undef _
+} ena_aq_link_types_t;
+
+typedef struct
+{
+  u32 speed;
+  ena_aq_link_types_t supported;
+  u32 autoneg : 1;
+  u32 duplex : 1;
+} ena_aq_feat_link_config_t;
+
+STATIC_ASSERT_SIZEOF (ena_aq_feat_link_config_t, 12);
+
+typedef struct
+{
+  u32 tx;
+  u32 rx_supported;
+  u32 rx_enabled;
+} ena_aq_feat_stateless_offload_config_t;
+
+typedef struct
+{
+  u16 cq_idx;
+  u16 reserved;
+} ena_aq_feat_rss_ind_table_entry_t;
+
+typedef struct
+{
+  u16 min_size;
+  u16 max_size;
+  u16 size;
+  u8 one_entry_update : 1;
+  u8 reserved;
+  u32 inline_index;
+  ena_aq_feat_rss_ind_table_entry_t inline_entry;
+} ena_aq_feat_rss_ind_table_config_t;
+
+typedef struct
+{
+  u32 mtu;
+} ena_aq_feat_mtu_t;
+
+typedef struct
+{
+  u32 count;
+} ena_aq_feat_extra_properties_strings_t;
+
+typedef struct
+{
+  u32 flags;
+} ena_aq_feat_extra_properties_flags_t;
+
+typedef struct
+{
+  u32 max_sq_num;
+  u32 max_sq_depth;
+  u32 max_cq_num;
+  u32 max_cq_depth;
+  u32 max_legacy_llq_num;
+  u32 max_legacy_llq_depth;
+  u32 max_header_size;
+  u16 max_packet_tx_descs;
+  u16 max_packet_rx_descs;
+} ena_aq_feat_max_queue_num_t;
+
+typedef struct
+{
+  u16 mmio_read_timeout;
+  u16 driver_watchdog_timeout;
+  u16 missing_tx_completion_timeout;
+  u16 missed_tx_completion_count_threshold_to_reset;
+  u16 admin_completion_tx_timeout;
+  u16 netdev_wd_timeout;
+  u16 max_tx_sgl_size;
+  u16 max_rx_sgl_size;
+  u16 reserved[8];
+} ena_aq_feat_hw_hints_t;
+
+typedef struct
+{
+  u8 version;
+  u8 _reserved1[3];
+  u32 max_tx_sq_num;
+  u32 max_tx_cq_num;
+  u32 max_rx_sq_num;
+  u32 max_rx_cq_num;
+  u32 max_tx_sq_depth;
+  u32 max_tx_cq_depth;
+  u32 max_rx_sq_depth;
+  u32 max_rx_cq_depth;
+  u32 max_tx_header_size;
+  u16 max_per_packet_tx_descs;
+  u16 max_per_packet_rx_descs;
+} ena_aq_feat_max_queue_ext_t;
+
+typedef struct
+{
+  u32 supported_func;
+  u32 selected_func;
+  u32 init_val;
+} ena_aq_feat_rss_hash_function_t;
+
+typedef struct
+{
+  ena_mem_addr_t os_info_ba;
+  ena_mem_addr_t debug_ba;
+  u32 debug_area_size;
+} ena_aq_feat_host_attr_config_t;
+
+typedef struct
+{
+  u8 type;
+  u8 reserved1[3];
+  u32 doorbell_offset;
+  u32 expire_timeout_usec;
+  u32 block_timeout_usec;
+  ena_mem_addr_t output_address;
+  u32 output_length;
+} ena_aq_feat_phc_config_t;
+
+typedef struct
+{
+  u32 max_llq_num;
+  u32 max_llq_depth;
+  u16 header_location_ctrl_supported;
+  u16 header_location_ctrl_enabled;
+  u16 entry_size_ctrl_supported;
+  u16 entry_size_ctrl_enabled;
+  u16 desc_num_before_header_supported;
+  u16 desc_num_before_header_enabled;
+  u16 descriptors_stride_ctrl_supported;
+  u16 descriptors_stride_ctrl_enabled;
+  union
+  {
+    struct
+    {
+      u16 supported_flags;
+      u16 max_tx_burst_size;
+    } get;
+    struct
+    {
+      u16 enabled_flags;
+    } set;
+  } accel_mode;
+} ena_aq_feat_llq_t;
+
+typedef struct
+{
+  /* feat common */
+  u8 flags;
+  ena_aq_feature_id_t feature_id;
+  u8 feature_version;
+  u8 _reserved;
+} ena_aq_get_set_feature_common_desc_t;
+
+STATIC_ASSERT_SIZEOF (ena_aq_get_set_feature_common_desc_t, 4);
+
+typedef struct
+{
+  ena_aq_aq_ctrl_buff_info_t control_buffer;
+  ena_aq_stats_type_t type;
+  ena_aq_stats_scope_t scope;
+  u16 _reserved3;
+  u16 queue_idx;
+  u16 device_id;
+} ena_aq_get_stats_cmd_t;
+STATIC_ASSERT_SIZEOF (ena_aq_get_stats_cmd_t, 20);
+
+typedef enum
+{
+  ENA_ADMIN_SQ_DIRECTION_TX = 1,
+  ENA_ADMIN_SQ_DIRECTION_RX = 2,
+} ena_aq_sq_direction_t;
+
+typedef enum
+{
+  ENA_ADMIN_SQ_PLACEMENT_POLICY_HOST = 1,
+  ENA_ADMIN_SQ_PLACEMENT_POLICY_DEVICE = 3,
+} ena_aq_sq_placement_policy_t;
+
+typedef enum
+{
+  ENA_ADMIN_SQ_COMPLETION_POLICY_DESC = 0,
+  ENA_ADMIN_SQ_COMPLETION_POLICY_DESC_ON_DEMAND = 1,
+  ENA_ADMIN_SQ_COMPLETION_POLICY_HEAD_ON_DEMAND = 2,
+  ENA_ADMIN_SQ_COMPLETION_POLICY_HEAD = 3,
+} ena_aq_completion_policy_t;
+
+typedef struct
+{
+  union
+  {
+    struct
+    {
+      u8 _reserved0_0 : 5;
+      u8 sq_direction : 3; /* ena_aq_sq_direction_t */
+    };
+    u8 sq_identity;
+  };
+
+  u8 _reserved1;
+
+  union
+  {
+    struct
+    {
+      u8 placement_policy : 4; /* ena_aq_sq_placement_policy_t */
+      u8 completion_policy : 3; /* ena_aq_completion_policy_t */
+      u8 _reserved2_7 : 1;
+    };
+    u8 sq_caps_2;
+  };
+
+  union
+  {
+    struct
+    {
+      u8 is_physically_contiguous : 1;
+      u8 _reserved3_1 : 7;
+    };
+    u8 sq_caps_3;
+  };
+
+  u16 cq_idx;
+  u16 sq_depth;
+  ena_mem_addr_t sq_ba;
+  ena_mem_addr_t sq_head_writeback; /* used if completion_policy is 2 or 3 */
+  u32 _reserved0_w7;
+  u32 _reserved0_w8;
+} ena_aq_create_sq_cmd_t;
+
+typedef struct
+{
+  u16 sq_idx;
+  u16 _reserved;
+  u32 sq_doorbell_offset;     /* REG BAR offset of queue dorbell */
+  u32 llq_descriptors_offset; /* LLQ MEM BAR offset of descriptors */
+  u32 llq_headers_offset;     /* LLQ MEM BAR offset of headers mem */
+} ena_aq_create_sq_resp_t;
+
+typedef struct
+{
+  union
+  {
+    struct
+    {
+      u8 _reserved0_0 : 5;
+      u8 interrupt_mode_enabled : 1;
+      u8 _reserved0_6 : 2;
+    };
+    u8 cq_caps_1;
+  };
+
+  union
+  {
+    struct
+    {
+      u8 cq_entry_size_words : 4;
+      u8 _reserved1_4 : 4;
+    };
+    u8 cq_caps_2;
+  };
+
+  u16 cq_depth;
+  u32 msix_vector;
+  ena_mem_addr_t cq_ba;
+} ena_aq_create_cq_cmd_t;
+
+typedef struct
+{
+  u16 cq_idx;
+  u16 cq_actual_depth;
+  u32 numa_node_register_offset;
+  u32 cq_head_db_register_offset;
+  u32 cq_interrupt_unmask_register_offset;
+} ena_aq_create_cq_resp_t;
+
+typedef struct
+{
+  u16 sq_idx;
+  union
+  {
+    struct
+    {
+      u8 _reserved : 5;
+      u8 sq_direction : 3; /* ena_aq_sq_direction_t */
+    };
+    u8 sq_identity;
+  };
+  u8 _reserved1;
+} ena_aq_destroy_sq_cmd_t;
+
+typedef struct
+{
+  u16 cq_idx;
+  u16 _reserved1;
+} ena_aq_destroy_cq_cmd_t;
+
+STATIC_ASSERT_SIZEOF (ena_aq_create_sq_cmd_t, 32);
+STATIC_ASSERT_SIZEOF (ena_aq_create_sq_resp_t, 16);
+STATIC_ASSERT_SIZEOF (ena_aq_create_cq_cmd_t, 16);
+STATIC_ASSERT_SIZEOF (ena_aq_create_cq_resp_t, 16);
+STATIC_ASSERT_SIZEOF (ena_aq_destroy_sq_cmd_t, 4);
+STATIC_ASSERT_SIZEOF (ena_aq_destroy_cq_cmd_t, 4);
+
+typedef struct
+{
+  /* common desc */
+  u16 command_id;
+  ena_aq_opcode_t opcode;
+
+  union
+  {
+    struct
+    {
+      u8 phase : 1;
+      u8 ctrl_data : 1;
+      u8 ctrl_data_indirect : 1;
+      u8 _reserved_3_3 : 5;
+    };
+    u8 flags;
+  };
+
+  u32 data[15];
+} ena_aq_sq_entry_t;
+
+STATIC_ASSERT_SIZEOF (ena_aq_sq_entry_t, 64);
+
+typedef struct
+{
+  u32 os_type;
+  u8 os_dist_str[128];
+  u32 os_dist;
+  u8 kernel_ver_str[32];
+  u32 kernel_ver;
+
+  struct
+  {
+    u8 major;
+    u8 minor;
+    u8 sub_minor;
+    u8 module_type;
+  } driver_version;
+
+  u32 supported_network_features[2];
+
+  struct
+  {
+    u16 minor : 8;
+    u16 major : 8;
+  } ena_spec_version;
+
+  struct
+  {
+    u16 function : 3;
+    u16 device : 5;
+    u16 bus : 8;
+  } bdf;
+
+  u16 num_cpus;
+  u16 _reserved;
+
+  union
+  {
+    struct
+    {
+      u32 _reserved0 : 1;
+      u32 rx_offset : 1;
+      u32 interrupt_moderation : 1;
+      u32 rx_buf_mirroring : 1;
+      u32 rss_configurable_function_key : 1;
+      u32 _reserved5 : 1;
+      u32 rx_page_reuse : 1;
+      u32 tx_ipv6_csum_offload : 1;
+      u32 _reserved8 : 24;
+    };
+    u32 as_u32;
+  } driver_supported_features;
+
+} ena_aq_host_info_t;
+
+STATIC_ASSERT_SIZEOF (ena_aq_host_info_t, 196);
+
+typedef struct
+{
+  union
+  {
+    u64 tx_bytes;
+    struct
+    {
+      u32 tx_bytes_low;
+      u32 tx_bytes_high;
+    };
+  };
+  union
+  {
+    u64 tx_pkts;
+    struct
+    {
+      u32 tx_pkts_low;
+      u32 tx_pkts_high;
+    };
+  };
+  union
+  {
+    u64 rx_bytes;
+    struct
+    {
+      u32 rx_bytes_low;
+      u32 rx_bytes_high;
+    };
+  };
+  union
+  {
+    u64 rx_pkts;
+    struct
+    {
+      u32 rx_pkts_low;
+      u32 rx_pkts_high;
+    };
+  };
+  union
+  {
+    u64 rx_drops;
+    struct
+    {
+      u32 rx_drops_low;
+      u32 rx_drops_high;
+    };
+  };
+  union
+  {
+    u64 tx_drops;
+    struct
+    {
+      u32 tx_drops_low;
+      u32 tx_drops_high;
+    };
+  };
+} ena_aq_basic_stats_t;
+
+#define foreach_ena_aq_basic_counter                                          \
+  _ (rx_pkts, "RX Packets")                                                   \
+  _ (tx_pkts, "TX Packets")                                                   \
+  _ (rx_bytes, "RX Bytes")                                                    \
+  _ (tx_bytes, "TX Bytes")                                                    \
+  _ (rx_drops, "RX Packet Drops")                                             \
+  _ (tx_drops, "TX Packet Drops")
+
+typedef struct
+{
+  u64 bw_in_allowance_exceeded;
+  u64 bw_out_allowance_exceeded;
+  u64 pps_allowance_exceeded;
+  u64 conntrack_allowance_exceeded;
+  u64 linklocal_allowance_exceeded;
+} ena_aq_eni_stats_t;
+
+#define foreach_ena_aq_eni_counter                                            \
+  _ (bw_in_allowance_exceeded, "Input BW Allowance Exceeded")                 \
+  _ (bw_out_allowance_exceeded, "Output BW Allowance Exceeded")               \
+  _ (pps_allowance_exceeded, "PPS Allowance Exceeded")                        \
+  _ (conntrack_allowance_exceeded, "ConnTrack Allowance Exceeded")            \
+  _ (linklocal_allowance_exceeded, "LinkLocal Allowance Exceeded")
+
+typedef struct
+{
+  /* common desc */
+  u16 command;
+  ena_aq_compl_status_t status;
+  union
+  {
+    struct
+    {
+      u8 phase : 1;
+      u8 _reserved3_1 : 7;
+    };
+    u8 flags;
+  };
+  u16 extended_status;
+  u16 sq_head_indx;
+
+  u32 data[14];
+} ena_aq_cq_entry_t;
+
+STATIC_ASSERT_SIZEOF (ena_aq_cq_entry_t, 64);
+
+#endif /* _ENA_ADMIN_DEFS_H_ */
diff --git a/src/plugins/dev_ena/ena_aenq_defs.h b/src/plugins/dev_ena/ena_aenq_defs.h
new file mode 100644 (file)
index 0000000..4530f5e
--- /dev/null
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _ENA_AENQ_DEFS_H_
+#define _ENA_AENQ_DEFS_H_
+
+#include <vppinfra/clib.h>
+#include <vppinfra/error_bootstrap.h>
+
+#define foreach_aenq_group                                                    \
+  _ (0, LINK_CHANGE)                                                          \
+  _ (1, FATAL_ERROR)                                                          \
+  _ (2, WARNING)                                                              \
+  _ (3, NOTIFICATION)                                                         \
+  _ (4, KEEP_ALIVE)                                                           \
+  _ (5, REFRESH_CAPABILITIES)                                                 \
+  _ (6, CONF_NOTIFICATIONS)
+
+#define foreach_aenq_syndrome                                                 \
+  _ (0, SUSPEND)                                                              \
+  _ (1, RESUME)                                                               \
+  _ (2, UPDATE_HINTS)
+
+typedef enum
+{
+#define _(v, n) ENA_AENQ_GROUP_##n = (v),
+  foreach_aenq_group
+#undef _
+} ena_aenq_group_t;
+
+typedef enum
+{
+#define _(v, n) ENA_AENQ_SYNDROME_##n = (v),
+  foreach_aenq_syndrome
+#undef _
+} ena_aenq_syndrome_t;
+
+typedef struct
+{
+  ena_aenq_group_t group : 16;
+  ena_aenq_syndrome_t syndrome : 16;
+
+  union
+  {
+    struct
+    {
+      u8 phase : 1;
+    };
+    u8 flags;
+  };
+  u8 reserved1[3];
+
+  union
+  {
+    u64 timestamp;
+    struct
+    {
+      u32 timestamp_low;
+      u32 timestamp_high;
+    };
+  };
+
+  union
+  {
+    u32 data[12];
+
+    struct
+    {
+      union
+      {
+       struct
+       {
+         u32 link_status : 1;
+       };
+       u32 flags;
+      };
+    } link_change;
+
+    struct
+    {
+      union
+      {
+       u64 rx_drops;
+       struct
+       {
+         u32 rx_drops_low;
+         u32 rx_drops_high;
+       };
+      };
+
+      union
+      {
+       u64 tx_drops;
+       struct
+       {
+         u32 tx_drops_low;
+         u32 tx_drops_high;
+       };
+      };
+    } keep_alive;
+  };
+} __clib_packed ena_aenq_entry_t;
+
+STATIC_ASSERT_SIZEOF (ena_aenq_entry_t, 64);
+
+#endif /* _ENA_AENQ_DEFS_H_ */
diff --git a/src/plugins/dev_ena/ena_defs.h b/src/plugins/dev_ena/ena_defs.h
new file mode 100644 (file)
index 0000000..1e52ed4
--- /dev/null
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _ENA_DEFS_H_
+#define _ENA_DEFS_H_
+
+#include <vppinfra/clib.h>
+#include <vppinfra/error_bootstrap.h>
+#include <dev_ena/ena_reg_defs.h>
+#include <dev_ena/ena_admin_defs.h>
+#include <dev_ena/ena_aenq_defs.h>
+#include <dev_ena/ena_io_defs.h>
+
+/*
+ * MMIO Response
+ */
+typedef struct
+{
+  u16 req_id;
+  u16 reg_off;
+  u32 reg_val;
+} ena_mmio_resp_t;
+
+#endif /* _ENA_DEFS_H_ */
diff --git a/src/plugins/dev_ena/ena_inlines.h b/src/plugins/dev_ena/ena_inlines.h
new file mode 100644 (file)
index 0000000..106bd5e
--- /dev/null
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _ENA_INLINES_H_
+#define _ENA_INLINES_H_
+
+#include <vppinfra/clib.h>
+#include <vppinfra/error_bootstrap.h>
+#include <dev_ena/ena.h>
+
+#define ena_log_is_debug()                                                    \
+  vlib_log_is_enabled (VLIB_LOG_LEVEL_DEBUG, ena_log.class)
+
+#define ena_stats_log_err(dev, f, ...)                                        \
+  vlib_log (VLIB_LOG_LEVEL_ERR, ena_stats_log.class, "%U: " f,                \
+           format_vnet_dev_addr, dev, ##__VA_ARGS__)
+
+#define ena_stats_log_debug(dev, f, ...)                                      \
+  vlib_log (VLIB_LOG_LEVEL_DEBUG, ena_stats_log.class, "%U: " f,              \
+           format_vnet_dev_addr, dev, ##__VA_ARGS__)
+
+#define ena_stats_log_is_debug()                                              \
+  vlib_log_is_enabled (VLIB_LOG_LEVEL_DEBUG, ena_stats_log.class)
+
+static_always_inline void
+ena_set_mem_addr (vlib_main_t *vm, vnet_dev_t *dev, ena_mem_addr_t *m, void *p)
+{
+  u64 pa = vnet_dev_get_dma_addr (vm, dev, p);
+  *m = (ena_mem_addr_t){ .addr_lo = (u32) pa, .addr_hi = (u16) (pa >> 32) };
+}
+
+static_always_inline int
+ena_aq_feature_is_supported (vnet_dev_t *dev, ena_aq_feature_id_t feat_id)
+{
+  ena_device_t *ed = vnet_dev_get_data (dev);
+  return (ed->dev_attr.supported_features & (1U << feat_id)) != 0;
+}
+
+#endif /* ENA_INLINES_H */
diff --git a/src/plugins/dev_ena/ena_io_defs.h b/src/plugins/dev_ena/ena_io_defs.h
new file mode 100644 (file)
index 0000000..89ca2ac
--- /dev/null
@@ -0,0 +1,179 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _ENA_IO_DEFS_H_
+#define _ENA_IO_DEFS_H_
+
+#include <vppinfra/clib.h>
+#include <vppinfra/error_bootstrap.h>
+#include <vppinfra/vector.h>
+
+typedef struct
+{
+  u16 length; /* 0 = 64K */
+  u8 reserved2;
+  union
+  {
+    struct
+    {
+      u8 phase : 1;
+      u8 reserved1 : 1;
+      u8 first : 1;    /* first descriptor in transaction */
+      u8 last : 1;     /* last descriptor in transaction */
+      u8 comp_req : 1; /* should completion be posted? */
+      u8 reserved5 : 1;
+      u8 reserved67 : 2;
+    };
+    u8 ctrl;
+  };
+  u16 req_id;
+  u16 reserved6;
+} ena_rx_desc_lo_t;
+
+STATIC_ASSERT_SIZEOF (ena_rx_desc_lo_t, 8);
+
+typedef struct
+{
+  union
+  {
+    struct
+    {
+      ena_rx_desc_lo_t lo;
+      u32 buff_addr_lo;
+      u16 buff_addr_hi;
+      u16 reserved16_w3;
+    };
+    u64x2 as_u64x2;
+  };
+} ena_rx_desc_t;
+
+STATIC_ASSERT_SIZEOF (ena_rx_desc_t, 16);
+
+#define foreach_ena_rx_cdesc_status                                           \
+  _ (5, l3_proto_idx)                                                         \
+  _ (2, src_vlan_cnt)                                                         \
+  _ (1, _reserved7)                                                           \
+  _ (5, l4_proto_idx)                                                         \
+  _ (1, l3_csum_err)                                                          \
+  _ (1, l4_csum_err)                                                          \
+  _ (1, ipv4_frag)                                                            \
+  _ (1, l4_csum_checked)                                                      \
+  _ (7, _reserved17)                                                          \
+  _ (1, phase)                                                                \
+  _ (1, l3_csum2)                                                             \
+  _ (1, first)                                                                \
+  _ (1, last)                                                                 \
+  _ (2, _reserved28)                                                          \
+  _ (1, buffer)                                                               \
+  _ (1, _reserved31)
+
+typedef struct
+{
+  union
+  {
+    struct
+    {
+#define _(b, n) u32 n : (b);
+      foreach_ena_rx_cdesc_status
+#undef _
+    };
+    u32 as_u32;
+  };
+} ena_rx_cdesc_status_t;
+
+typedef struct
+{
+  ena_rx_cdesc_status_t status;
+  u16 length;
+  u16 req_id;
+  u32 hash;
+  u16 sub_qid;
+  u8 offset;
+  u8 reserved;
+} ena_rx_cdesc_t;
+
+STATIC_ASSERT_SIZEOF (ena_rx_cdesc_t, 16);
+
+#define foreach_ena_tx_desc                                                   \
+  /* len_ctrl */                                                              \
+  _ (16, length)                                                              \
+  _ (6, req_id_hi)                                                            \
+  _ (1, _reserved0_22)                                                        \
+  _ (1, meta_desc)                                                            \
+  _ (1, phase)                                                                \
+  _ (1, _reserved0_25)                                                        \
+  _ (1, first)                                                                \
+  _ (1, last)                                                                 \
+  _ (1, comp_req)                                                             \
+  _ (2, _reserved0_29)                                                        \
+  _ (1, _reserved0_31)                                                        \
+  /* meta_ctrl */                                                             \
+  _ (4, l3_proto_idx)                                                         \
+  _ (1, df)                                                                   \
+  _ (2, _reserved1_5)                                                         \
+  _ (1, tso_en)                                                               \
+  _ (5, l4_proto_idx)                                                         \
+  _ (1, l3_csum_en)                                                           \
+  _ (1, l4_csum_en)                                                           \
+  _ (1, ethernet_fcs_dis)                                                     \
+  _ (1, _reserved1_16)                                                        \
+  _ (1, l4_csum_partial)                                                      \
+  _ (3, _reserved_1_18)                                                       \
+  _ (1, _reserved_1_21)                                                       \
+  _ (10, req_id_lo)
+
+typedef struct
+{
+  union
+  {
+    struct
+    {
+#define _(b, n) u32 n : (b);
+      foreach_ena_tx_desc
+#undef _
+       u32 buff_addr_lo;
+      u16 buff_addr_hi;
+      u8 _reserved3_16;
+      u8 header_length;
+    };
+
+    u16x8 as_u16x8;
+    u32x4 as_u32x4;
+    u64x2 as_u64x2;
+  };
+} ena_tx_desc_t;
+
+STATIC_ASSERT_SIZEOF (ena_tx_desc_t, 16);
+
+typedef struct
+{
+  ena_tx_desc_t desc[2];
+  u8 data[96];
+} __clib_aligned (128)
+ena_tx_llq_desc128_t;
+STATIC_ASSERT_SIZEOF (ena_tx_llq_desc128_t, 128);
+
+typedef union
+{
+  struct
+  {
+    u16 req_id;
+    u8 status;
+    union
+    {
+      struct
+      {
+       u8 phase : 1;
+      };
+      u8 flags;
+    };
+    u16 sub_qid;
+    u16 sq_head_idx;
+  };
+  u64 as_u64;
+} ena_tx_cdesc_t;
+
+STATIC_ASSERT_SIZEOF (ena_tx_cdesc_t, 8);
+
+#endif /* _ENA_IO_DEFS_H_ */
diff --git a/src/plugins/dev_ena/ena_reg_defs.h b/src/plugins/dev_ena/ena_reg_defs.h
new file mode 100644 (file)
index 0000000..11d458e
--- /dev/null
@@ -0,0 +1,150 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _ENA_REG_DEFS_H_
+#define _ENA_REG_DEFS_H_
+
+#include <vppinfra/clib.h>
+#include <vppinfra/error_bootstrap.h>
+
+#define ena_reg_version_t_fields                                              \
+  __ (8, minor)                                                               \
+  __ (8, major)
+
+#define ena_reg_controller_version_t_fields                                   \
+  __ (8, subminor)                                                            \
+  __ (8, minor)                                                               \
+  __ (8, major)                                                               \
+  __ (8, impl_id)
+
+#define ena_reg_caps_t_fields                                                 \
+  __ (1, contiguous_queue_required)                                           \
+  __ (5, reset_timeout)                                                       \
+  __ (2, _unused)                                                             \
+  __ (8, dma_addr_width)                                                      \
+  __ (4, admin_cmd_to)
+
+#define ena_reg_aq_caps_t_fields                                              \
+  __ (16, depth)                                                              \
+  __ (16, entry_size)
+
+#define ena_reg_acq_caps_t_fields                                             \
+  __ (16, depth)                                                              \
+  __ (16, entry_size)
+
+#define ena_reg_aenq_caps_t_fields                                            \
+  __ (16, depth)                                                              \
+  __ (16, entry_size)
+
+#define ena_reg_dev_ctl_t_fields                                              \
+  __ (1, dev_reset)                                                           \
+  __ (1, aq_restart)                                                          \
+  __ (1, quiescent)                                                           \
+  __ (1, io_resume)                                                           \
+  __ (24, _unused)                                                            \
+  __ (4, reset_reason)
+
+#define ena_reg_dev_sts_t_fields                                              \
+  __ (1, ready)                                                               \
+  __ (1, aq_restart_in_progress)                                              \
+  __ (1, aq_restart_finished)                                                 \
+  __ (1, reset_in_progress)                                                   \
+  __ (1, reset_finished)                                                      \
+  __ (1, fatal_error)                                                         \
+  __ (1, quiescent_state_in_progress)                                         \
+  __ (1, quiescent_state_achieved)
+
+#define ena_reg_mmio_reg_read_t_fields                                        \
+  __ (16, req_id)                                                             \
+  __ (16, reg_off)
+
+#define ena_reg_rss_ind_entry_update_t_fields                                 \
+  __ (16, index)                                                              \
+  __ (16, cx_idx)
+
+#define __(l, f) u32 f : l;
+#define _(n)                                                                  \
+  typedef union                                                               \
+  {                                                                           \
+    struct                                                                    \
+    {                                                                         \
+      n##_fields;                                                             \
+    };                                                                        \
+    u32 as_u32;                                                               \
+  } n;
+
+_ (ena_reg_version_t)
+_ (ena_reg_controller_version_t)
+_ (ena_reg_caps_t)
+_ (ena_reg_aq_caps_t)
+_ (ena_reg_acq_caps_t)
+_ (ena_reg_aenq_caps_t)
+_ (ena_reg_dev_ctl_t)
+_ (ena_reg_dev_sts_t)
+_ (ena_reg_mmio_reg_read_t)
+_ (ena_reg_rss_ind_entry_update_t)
+#undef _
+#undef __
+
+#define foreach_ena_reg                                                       \
+  _ (0x00, 1, VERSION, ena_reg_version_t_fields)                              \
+  _ (0x04, 1, CONTROLLER_VERSION, ena_reg_controller_version_t_fields)        \
+  _ (0x08, 1, CAPS, ena_reg_caps_t_fields)                                    \
+  _ (0x0c, 1, EXT_CAPS, )                                                     \
+  _ (0x10, 1, AQ_BASE_LO, )                                                   \
+  _ (0x14, 1, AQ_BASE_HI, )                                                   \
+  _ (0x18, 1, AQ_CAPS, ena_reg_aq_caps_t_fields)                              \
+  _ (0x20, 1, ACQ_BASE_LO, )                                                  \
+  _ (0x24, 1, ACQ_BASE_HI, )                                                  \
+  _ (0x28, 1, ACQ_CAPS, ena_reg_acq_caps_t_fields)                            \
+  _ (0x2c, 0, AQ_DB, )                                                        \
+  _ (0x30, 0, ACQ_TAIL, )                                                     \
+  _ (0x34, 1, AENQ_CAPS, ena_reg_aenq_caps_t_fields)                          \
+  _ (0x38, 0, AENQ_BASE_LO, )                                                 \
+  _ (0x3c, 0, AENQ_BASE_HI, )                                                 \
+  _ (0x40, 0, AENQ_HEAD_DB, )                                                 \
+  _ (0x44, 0, AENQ_TAIL, )                                                    \
+  _ (0x4c, 1, INTR_MASK, )                                                    \
+  _ (0x54, 0, DEV_CTL, ena_reg_dev_ctl_t_fields)                              \
+  _ (0x58, 1, DEV_STS, ena_reg_dev_sts_t_fields)                              \
+  _ (0x5c, 0, MMIO_REG_READ, ena_reg_mmio_reg_read_t_fields)                  \
+  _ (0x60, 0, MMIO_RESP_LO, )                                                 \
+  _ (0x64, 0, MMIO_RESP_HI, )                                                 \
+  _ (0x68, 0, RSS_IND_ENTRY_UPDATE, ena_reg_rss_ind_entry_update_t_fields)
+
+typedef enum
+{
+#define _(o, r, n, f) ENA_REG_##n = o,
+  foreach_ena_reg
+#undef _
+} ena_reg_t;
+
+#define foreach_ena_reset_reason                                              \
+  _ (0, NORMAL)                                                               \
+  _ (1, KEEP_ALIVE_TO)                                                        \
+  _ (2, ADMIN_TO)                                                             \
+  _ (3, MISS_TX_CMPL)                                                         \
+  _ (4, INV_RX_REQ_ID)                                                        \
+  _ (5, INV_TX_REQ_ID)                                                        \
+  _ (6, TOO_MANY_RX_DESCS)                                                    \
+  _ (7, INIT_ERR)                                                             \
+  _ (8, DRIVER_INVALID_STATE)                                                 \
+  _ (9, OS_TRIGGER)                                                           \
+  _ (10, OS_NETDEV_WD)                                                        \
+  _ (11, SHUTDOWN)                                                            \
+  _ (12, USER_TRIGGER)                                                        \
+  _ (13, GENERIC)                                                             \
+  _ (14, MISS_INTERRUPT)                                                      \
+  _ (15, SUSPECTED_POLL_STARVATION)                                           \
+  _ (16, RX_DESCRIPTOR_MALFORMED)                                             \
+  _ (17, TX_DESCRIPTOR_MALFORMED)
+
+typedef enum
+{
+#define _(o, n) ENA_RESET_REASON_##n = o,
+  foreach_ena_reset_reason
+#undef _
+} ena_reset_reason_t;
+
+#endif /* _ENA_REG_DEFS_H_ */
diff --git a/src/plugins/dev_ena/format.c b/src/plugins/dev_ena/format.c
new file mode 100644 (file)
index 0000000..2db52b5
--- /dev/null
@@ -0,0 +1,146 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include "vlib/pci/pci.h"
+#include "vnet/error.h"
+#include "vppinfra/error.h"
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <dev_ena/ena.h>
+#include <dev_ena/ena_defs.h>
+
+u8 *
+format_ena_dev_info (u8 *s, va_list *args)
+{
+  vlib_main_t *vm = vlib_get_main ();
+  vnet_dev_format_args_t __clib_unused *a =
+    va_arg (*args, vnet_dev_format_args_t *);
+  vnet_dev_t *dev = va_arg (*args, vnet_dev_t *);
+  ena_device_t *ed = vnet_dev_get_data (dev);
+  u32 indent = format_get_indent (s) + 2;
+
+  format (s, "Elastic Network Adapter:");
+  format (s, "\n%UDevice version is %u, implementation id is %u",
+         format_white_space, indent, ed->dev_attr.device_version,
+         ed->dev_attr.impl_id);
+  format (s, "\n%Urx drops %lu, tx drops %lu", format_white_space, indent,
+         ed->aenq.rx_drops, ed->aenq.tx_drops);
+  format (s, "\n%ULast keepalive arrived ", format_white_space, indent);
+  if (ed->aenq.last_keepalive != 0.0)
+    format (s, "%.2f seconds ago",
+           vlib_time_now (vm) - ed->aenq.last_keepalive);
+  else
+    format (s, "never");
+  return s;
+}
+
+u8 *
+format_ena_mem_addr (u8 *s, va_list *args)
+{
+  ena_mem_addr_t *ema = va_arg (*args, ena_mem_addr_t *);
+  return format (s, "0x%lx", (u64) ema->addr_hi << 32 | ema->addr_lo);
+}
+
+u8 *
+format_ena_tx_desc (u8 *s, va_list *args)
+{
+  ena_tx_desc_t *d = va_arg (*args, ena_tx_desc_t *);
+  s =
+    format (s, "addr 0x%012lx", (u64) d->buff_addr_hi << 32 | d->buff_addr_lo);
+  s = format (s, " len %u", d->length);
+  s = format (s, " req_id 0x%x", d->req_id_lo | d->req_id_hi << 10);
+  if (d->header_length)
+    s = format (s, " hdr_len %u", d->header_length);
+#define _(v, n)                                                               \
+  if ((v) < 6 && #n[0] != '_' && d->n)                                        \
+    s = format (s, " " #n " %u", d->n);
+  foreach_ena_tx_desc
+#undef _
+    return s;
+}
+
+u8 *
+format_ena_rx_desc_status (u8 *s, va_list *args)
+{
+  ena_rx_cdesc_status_t st = va_arg (*args, ena_rx_cdesc_status_t);
+  s = format (s, "0x%x", st.as_u32);
+  if (st.as_u32 != 0)
+    {
+      int not_first_line = 0;
+      s = format (s, " -> ");
+#define _(b, n)                                                               \
+  if (st.n)                                                                   \
+    s = format (s, "%s%s %u", not_first_line++ ? ", " : "", #n, st.n);
+      foreach_ena_rx_cdesc_status
+#undef _
+    }
+  return s;
+}
+
+u8 *
+format_ena_rx_trace (u8 *s, va_list *args)
+{
+  vlib_main_t *vm = va_arg (*args, vlib_main_t *);
+  vlib_node_t *node = va_arg (*args, vlib_node_t *);
+  ena_rx_trace_t *t = va_arg (*args, ena_rx_trace_t *);
+  vnet_main_t *vnm = vnet_get_main ();
+  vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, t->hw_if_index);
+  u32 indent = format_get_indent (s);
+
+  s = format (
+    s, "ena: %v (%d) qid %u next-node %U length %u req-id 0x%x n-desc %u",
+    hi->name, t->hw_if_index, t->qid, format_vlib_next_node_name, vm,
+    node->index, t->next_index, t->length, t->req_id, t->n_desc);
+  s = format (s, "\n%Ustatus: %U", format_white_space, indent + 2,
+             format_ena_rx_desc_status, t->status);
+  return s;
+}
+
+u8 *
+format_ena_regs (u8 *s, va_list *args)
+{
+  vnet_dev_t *dev = va_arg (*args, vnet_dev_t *);
+  int offset = va_arg (*args, int);
+  u32 indent = format_get_indent (s);
+  u32 rv = 0, f, v;
+  u8 *s2 = 0;
+
+#define _(o, r, rn, m)                                                        \
+  if ((offset == -1 || offset == o) && r == 1)                                \
+    {                                                                         \
+      s = format (s, "\n%U", format_white_space, indent);                     \
+      vec_reset_length (s2);                                                  \
+      s2 = format (s2, "[0x%02x] %s:", o, #rn);                               \
+      ena_reg_read (dev, o, &rv);                                             \
+      s = format (s, "%-34v = 0x%08x", s2, rv);                               \
+      f = 0;                                                                  \
+      m                                                                       \
+    }
+
+#define __(l, fn)                                                             \
+  if (#fn[0] != '_')                                                          \
+    {                                                                         \
+      vec_reset_length (s2);                                                  \
+      s2 = format (s2, "\n%U", format_white_space, indent);                   \
+      s2 = format (s2, "  [%2u:%2u] %s", f + l - 1, f, #fn);                  \
+      s = format (s, "  %-35v = ", s2);                                       \
+      v = (rv >> f) & pow2_mask (l);                                          \
+      if (l < 3)                                                              \
+       s = format (s, "%u", v);                                              \
+      else if (l <= 8)                                                        \
+       s = format (s, "0x%02x (%u)", v, v);                                  \
+      else if (l <= 16)                                                       \
+       s = format (s, "0x%04x", v);                                          \
+      else                                                                    \
+       s = format (s, "0x%08x", v);                                          \
+    }                                                                         \
+  f += l;
+
+  foreach_ena_reg;
+#undef _
+
+  vec_free (s2);
+
+  return s;
+}
diff --git a/src/plugins/dev_ena/format_aq.c b/src/plugins/dev_ena/format_aq.c
new file mode 100644 (file)
index 0000000..18bad1e
--- /dev/null
@@ -0,0 +1,412 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/dev/dev.h>
+
+#include <dev_ena/ena.h>
+#include <dev_ena/ena_inlines.h>
+#include <vnet/ethernet/ethernet.h>
+
+static char *opcode_names[] = {
+#define _(v, s) [v] = #s,
+  foreach_ena_aq_opcode
+#undef _
+};
+
+static char *status_names[] = {
+#define _(v, s) [v] = #s,
+  foreach_ena_aq_compl_status
+#undef _
+};
+
+#define __maxval(s, f) (u64) (((typeof ((s)[0])){ .f = -1LL }).f)
+
+#define __name(s, n)                                                          \
+  {                                                                           \
+    s = format (s, "%s%U%-32s: ", line ? "\n" : "", format_white_space,       \
+               line ? indent : 0, #n);                                       \
+    line++;                                                                   \
+  }
+
+#define _format_number(s, d, n, ...)                                          \
+  {                                                                           \
+    __name (s, n);                                                            \
+    if (d->n < 10)                                                            \
+      s = format (s, "%u", d->n);                                             \
+    else if (__maxval (d, n) <= 255)                                          \
+      s = format (s, "0x%02x (%u)", d->n, d->n);                              \
+    else if (__maxval (d, n) <= 65535)                                        \
+      s = format (s, "0x%04x (%u)", d->n, d->n);                              \
+    else                                                                      \
+      s = format (s, "0x%08x (%u)", d->n, d->n);                              \
+  }
+
+#define _format_with_fn_and_ptr(s, c, n, f)                                   \
+  {                                                                           \
+    __name (s, n);                                                            \
+    s = format (s, "%U", f, &((c)->n));                                       \
+  }
+
+#define _format_with_fn_and_val(s, c, n, f)                                   \
+  {                                                                           \
+    __name (s, n);                                                            \
+    s = format (s, "%U", f, (c)->n);                                          \
+  }
+#define _format_ena_memory(s, c, n)                                           \
+  _format_with_fn_and_ptr (s, c, n, format_ena_mem_addr)
+
+u8 *
+format_ena_aq_opcode (u8 *s, va_list *args)
+{
+  u32 opcode = va_arg (*args, u32);
+
+  if (opcode >= ARRAY_LEN (opcode_names) || opcode_names[opcode] == 0)
+    return format (s, "UNKNOWN(%u)", opcode);
+  return format (s, "%s", opcode_names[opcode]);
+}
+
+u8 *
+format_ena_aq_status (u8 *s, va_list *args)
+{
+  u32 status = va_arg (*args, u32);
+
+  if (status >= ARRAY_LEN (status_names) || status_names[status] == 0)
+    return format (s, "UNKNOWN(%u)", status);
+  return format (s, "%s", status_names[status]);
+}
+
+u8 *
+format_ena_aq_aenq_groups (u8 *s, va_list *args)
+{
+  ena_aq_aenq_groups_t g = va_arg (*args, ena_aq_aenq_groups_t);
+  u32 i, not_first = 0;
+  u32 indent = format_get_indent (s);
+
+#define _(x)                                                                  \
+  if (g.x)                                                                    \
+    {                                                                         \
+      if (format_get_indent (s) > 80)                                         \
+       s = format (s, "\n%U", format_white_space, indent);                   \
+      s = format (s, "%s%s", not_first++ ? " " : "", #x);                     \
+      g.x = 0;                                                                \
+    }
+  foreach_ena_aq_aenq_groups;
+#undef _
+
+  foreach_set_bit_index (i, g.as_u32)
+    s = format (s, "%sunknown-%u", not_first++ ? " " : "", i);
+
+  return s;
+}
+
+u8 *
+format_ena_aq_feat_id_bitmap (u8 *s, va_list *args)
+{
+  u32 bmp = va_arg (*args, u32);
+  int i, line = 0;
+  u32 indent = format_get_indent (s);
+
+  foreach_set_bit_index (i, bmp)
+    {
+      ena_aq_feat_info_t *info = ena_aq_get_feat_info (i);
+      if (line++)
+       s = format (s, ", ");
+      if (format_get_indent (s) > 80)
+       s = format (s, "\n%U", format_white_space, indent);
+      if (info)
+       s = format (s, "%s", info->name);
+      else
+       s = format (s, "unknown-%u", i);
+    }
+
+  return s;
+}
+
+u8 *
+format_ena_aq_feat_name (u8 *s, va_list *args)
+{
+  ena_aq_feature_id_t feat_id = va_arg (*args, int);
+  char *feat_names[] = {
+#define _(v, r, gt, st, s, u) [v] = #s,
+    foreach_ena_aq_feature_id
+#undef _
+  };
+
+  if (feat_id >= ARRAY_LEN (feat_names) || feat_names[feat_id] == 0)
+    return format (s, "UNKNOWN(%u)", feat_id);
+  return format (s, "%s", feat_names[feat_id]);
+}
+
+u8 *
+format_ena_aq_feat_desc (u8 *s, va_list *args)
+{
+  ena_aq_feature_id_t feat_id = va_arg (*args, int);
+  void *data = va_arg (*args, void *);
+  ena_aq_feat_info_t *info = ena_aq_get_feat_info (feat_id);
+  u32 indent = format_get_indent (s);
+  u32 line = 0;
+
+  switch (feat_id)
+    {
+    case ENA_ADMIN_FEAT_ID_DEVICE_ATTRIBUTES:
+      {
+       ena_aq_feat_device_attr_t *d = data;
+       _format_number (s, d, impl_id);
+       _format_number (s, d, device_version);
+       _format_number (s, d, phys_addr_width);
+       _format_number (s, d, virt_addr_width);
+       _format_with_fn_and_val (s, d, mac_addr, format_ethernet_address);
+       _format_number (s, d, max_mtu);
+       _format_with_fn_and_val (s, d, supported_features,
+                                format_ena_aq_feat_id_bitmap);
+      }
+      break;
+
+    case ENA_ADMIN_FEAT_ID_AENQ_CONFIG:
+      {
+       ena_aq_feat_aenq_config_t *d = data;
+       _format_with_fn_and_val (s, d, supported_groups,
+                                format_ena_aq_aenq_groups);
+       _format_with_fn_and_val (s, d, enabled_groups,
+                                format_ena_aq_aenq_groups);
+      }
+      break;
+
+    case ENA_ADMIN_FEAT_ID_INTERRUPT_MODERATION:
+      {
+       ena_aq_feat_intr_moder_t *d = data;
+       _format_number (s, d, intr_delay_resolution);
+      }
+      break;
+
+    case ENA_ADMIN_FEAT_ID_STATELESS_OFFLOAD_CONFIG:
+      {
+       ena_aq_feat_stateless_offload_config_t *d = data;
+       _format_number (s, d, rx_supported);
+       _format_number (s, d, rx_enabled);
+       _format_number (s, d, tx);
+      }
+      break;
+
+    case ENA_ADMIN_FEAT_ID_RSS_INDIRECTION_TABLE_CONFIG:
+      {
+       ena_aq_feat_rss_ind_table_config_t *d = data;
+       _format_number (s, d, min_size);
+       _format_number (s, d, max_size);
+       _format_number (s, d, size);
+       _format_number (s, d, one_entry_update);
+       _format_number (s, d, inline_index);
+       _format_number (s, d, inline_entry.cq_idx);
+      }
+      break;
+
+    case ENA_ADMIN_FEAT_ID_MAX_QUEUES_NUM:
+      {
+       ena_aq_feat_max_queue_num_t *d = data;
+       _format_number (s, d, max_sq_num);
+       _format_number (s, d, max_sq_depth);
+       _format_number (s, d, max_cq_num);
+       _format_number (s, d, max_cq_depth);
+       _format_number (s, d, max_legacy_llq_num);
+       _format_number (s, d, max_legacy_llq_depth);
+       _format_number (s, d, max_header_size);
+       _format_number (s, d, max_packet_tx_descs);
+       _format_number (s, d, max_packet_rx_descs);
+      }
+      break;
+
+    case ENA_ADMIN_FEAT_ID_MAX_QUEUES_EXT:
+      {
+       ena_aq_feat_max_queue_ext_t *d = data;
+       _format_number (s, d, max_rx_sq_num);
+       _format_number (s, d, max_rx_cq_num);
+       _format_number (s, d, max_tx_sq_num);
+       _format_number (s, d, max_tx_cq_num);
+       _format_number (s, d, max_rx_sq_depth);
+       _format_number (s, d, max_rx_cq_depth);
+       _format_number (s, d, max_tx_sq_depth);
+       _format_number (s, d, max_tx_cq_depth);
+       _format_number (s, d, version);
+       _format_number (s, d, max_tx_header_size);
+       _format_number (s, d, max_per_packet_tx_descs);
+       _format_number (s, d, max_per_packet_rx_descs);
+      }
+      break;
+
+    case ENA_ADMIN_FEAT_ID_RSS_HASH_FUNCTION:
+      {
+       ena_aq_feat_rss_hash_function_t *d = data;
+       _format_number (s, d, supported_func);
+       _format_number (s, d, selected_func);
+       _format_number (s, d, init_val);
+      }
+      break;
+
+    case ENA_ADMIN_FEAT_ID_LLQ:
+      {
+       ena_aq_feat_llq_t *d = data;
+       _format_number (s, d, max_llq_num);
+       _format_number (s, d, max_llq_depth);
+       _format_number (s, d, header_location_ctrl_supported);
+       _format_number (s, d, header_location_ctrl_enabled);
+       _format_number (s, d, entry_size_ctrl_supported);
+       _format_number (s, d, entry_size_ctrl_enabled);
+       _format_number (s, d, desc_num_before_header_supported);
+       _format_number (s, d, desc_num_before_header_enabled);
+       _format_number (s, d, descriptors_stride_ctrl_supported);
+       _format_number (s, d, descriptors_stride_ctrl_enabled);
+       _format_number (s, d, accel_mode.get.supported_flags);
+       _format_number (s, d, accel_mode.get.max_tx_burst_size);
+       _format_number (s, d, accel_mode.set.enabled_flags);
+      }
+      break;
+
+    case ENA_ADMIN_FEAT_ID_EXTRA_PROPERTIES_STRINGS:
+      {
+       ena_aq_feat_extra_properties_strings_t *d = data;
+       _format_number (s, d, count);
+      }
+      break;
+
+    case ENA_ADMIN_FEAT_ID_EXTRA_PROPERTIES_FLAGS:
+      {
+       ena_aq_feat_extra_properties_flags_t *d = data;
+       _format_number (s, d, flags);
+      }
+      break;
+
+    case ENA_ADMIN_FEAT_ID_HOST_ATTR_CONFIG:
+      {
+       ena_aq_feat_host_attr_config_t *d = data;
+       _format_ena_memory (s, d, os_info_ba);
+       _format_ena_memory (s, d, debug_ba);
+       _format_number (s, d, debug_area_size);
+      }
+      break;
+
+    default:
+      if (info)
+       s = format (s, "%U", format_hexdump, data, info->data_sz);
+      break;
+    }
+
+  return s;
+}
+
+u8 *
+format_ena_aq_create_sq_cmd (u8 *s, va_list *args)
+{
+  ena_aq_create_sq_cmd_t *cmd = va_arg (*args, ena_aq_create_sq_cmd_t *);
+  u32 indent = format_get_indent (s);
+  u32 line = 0;
+
+  _format_number (s, cmd, sq_direction);
+  _format_number (s, cmd, placement_policy);
+  _format_number (s, cmd, completion_policy);
+  _format_number (s, cmd, is_physically_contiguous);
+  _format_number (s, cmd, cq_idx);
+  _format_number (s, cmd, sq_depth);
+  _format_ena_memory (s, cmd, sq_ba);
+  _format_ena_memory (s, cmd, sq_head_writeback);
+  return s;
+}
+
+u8 *
+format_ena_aq_create_cq_cmd (u8 *s, va_list *args)
+{
+  ena_aq_create_cq_cmd_t *cmd = va_arg (*args, ena_aq_create_cq_cmd_t *);
+  u32 indent = format_get_indent (s);
+  u32 line = 0;
+
+  _format_number (s, cmd, interrupt_mode_enabled);
+  _format_number (s, cmd, cq_entry_size_words);
+  _format_number (s, cmd, cq_depth);
+  _format_number (s, cmd, msix_vector);
+  _format_ena_memory (s, cmd, cq_ba);
+  return s;
+}
+
+u8 *
+format_ena_aq_create_sq_resp (u8 *s, va_list *args)
+{
+  ena_aq_create_sq_resp_t *resp = va_arg (*args, ena_aq_create_sq_resp_t *);
+  u32 indent = format_get_indent (s);
+  u32 line = 0;
+
+  _format_number (s, resp, sq_idx);
+  _format_number (s, resp, sq_doorbell_offset);
+  _format_number (s, resp, llq_descriptors_offset);
+  _format_number (s, resp, llq_headers_offset);
+  return s;
+}
+
+u8 *
+format_ena_aq_create_cq_resp (u8 *s, va_list *args)
+{
+  ena_aq_create_cq_resp_t *resp = va_arg (*args, ena_aq_create_cq_resp_t *);
+  u32 indent = format_get_indent (s);
+  u32 line = 0;
+
+  _format_number (s, resp, cq_idx);
+  _format_number (s, resp, cq_actual_depth);
+  _format_number (s, resp, numa_node_register_offset);
+  _format_number (s, resp, cq_head_db_register_offset);
+  _format_number (s, resp, cq_interrupt_unmask_register_offset);
+  return s;
+}
+
+u8 *
+format_ena_aq_destroy_sq_cmd (u8 *s, va_list *args)
+{
+  ena_aq_destroy_sq_cmd_t *cmd = va_arg (*args, ena_aq_destroy_sq_cmd_t *);
+  u32 indent = format_get_indent (s);
+  u32 line = 0;
+
+  _format_number (s, cmd, sq_idx);
+  _format_number (s, cmd, sq_direction);
+  return s;
+}
+
+u8 *
+format_ena_aq_destroy_cq_cmd (u8 *s, va_list *args)
+{
+  ena_aq_destroy_cq_cmd_t *cmd = va_arg (*args, ena_aq_destroy_cq_cmd_t *);
+  u32 indent = format_get_indent (s);
+  u32 line = 0;
+
+  _format_number (s, cmd, cq_idx);
+  return s;
+}
+
+u8 *
+format_ena_aq_basic_stats (u8 *s, va_list *args)
+{
+  ena_aq_basic_stats_t *st = va_arg (*args, ena_aq_basic_stats_t *);
+  u32 indent = format_get_indent (s);
+  u32 line = 0;
+
+  _format_number (s, st, tx_bytes);
+  _format_number (s, st, tx_pkts);
+  _format_number (s, st, rx_bytes);
+  _format_number (s, st, rx_pkts);
+  _format_number (s, st, rx_drops);
+  _format_number (s, st, tx_drops);
+  return s;
+}
+
+u8 *
+format_ena_aq_eni_stats (u8 *s, va_list *args)
+{
+  ena_aq_eni_stats_t *st = va_arg (*args, ena_aq_eni_stats_t *);
+  u32 indent = format_get_indent (s);
+  u32 line = 0;
+
+  _format_number (s, st, bw_in_allowance_exceeded);
+  _format_number (s, st, bw_out_allowance_exceeded);
+  _format_number (s, st, pps_allowance_exceeded);
+  _format_number (s, st, conntrack_allowance_exceeded);
+  _format_number (s, st, linklocal_allowance_exceeded);
+  return s;
+}
diff --git a/src/plugins/dev_ena/port.c b/src/plugins/dev_ena/port.c
new file mode 100644 (file)
index 0000000..2b26fef
--- /dev/null
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/pci.h>
+#include <dev_ena/ena.h>
+#include <dev_ena/ena_inlines.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+
+VLIB_REGISTER_LOG_CLASS (ena_log, static) = {
+  .class_name = "ena",
+  .subclass_name = "port",
+};
+
+vnet_dev_rv_t
+ena_port_init (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+  vnet_dev_t *dev = port->dev;
+
+  log_debug (dev, "port %u", port->port_id);
+
+  return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+ena_port_start (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+  vnet_dev_t *dev = port->dev;
+  vnet_dev_rv_t rv;
+
+  log_debug (dev, "port start: port %u", port->port_id);
+
+  if (ena_aq_feature_is_supported (dev, ENA_ADMIN_FEAT_ID_MTU))
+    {
+      ena_aq_feat_mtu_t mtu = { .mtu = port->max_rx_frame_size };
+
+      if ((rv = ena_aq_set_feature (vm, dev, ENA_ADMIN_FEAT_ID_MTU, &mtu)))
+       return rv;
+    }
+
+  if ((rv = vnet_dev_port_start_all_rx_queues (vm, port)))
+    return rv;
+
+  if ((rv = vnet_dev_port_start_all_tx_queues (vm, port)))
+    return rv;
+
+  return VNET_DEV_OK;
+}
+
+void
+ena_port_stop (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+  log_debug (port->dev, "port stop: port %u", port->port_id);
+}
+
+vnet_dev_rv_t
+ena_port_cfg_change_validate (vlib_main_t *vm, vnet_dev_port_t *port,
+                             vnet_dev_port_cfg_change_req_t *req)
+{
+  vnet_dev_rv_t rv = VNET_DEV_OK;
+
+  switch (req->type)
+    {
+    case VNET_DEV_PORT_CFG_MAX_RX_FRAME_SIZE:
+      if (port->started)
+       rv = VNET_DEV_ERR_PORT_STARTED;
+      break;
+
+    default:
+      rv = VNET_DEV_ERR_NOT_SUPPORTED;
+    };
+
+  return rv;
+}
+
+vnet_dev_rv_t
+ena_port_cfg_change (vlib_main_t *vm, vnet_dev_port_t *port,
+                    vnet_dev_port_cfg_change_req_t *req)
+{
+  vnet_dev_rv_t rv = VNET_DEV_OK;
+
+  switch (req->type)
+    {
+    case VNET_DEV_PORT_CFG_MAX_RX_FRAME_SIZE:
+      break;
+
+    default:
+      return VNET_DEV_ERR_NOT_SUPPORTED;
+    };
+
+  return rv;
+}
diff --git a/src/plugins/dev_ena/queue.c b/src/plugins/dev_ena/queue.c
new file mode 100644 (file)
index 0000000..08c763c
--- /dev/null
@@ -0,0 +1,384 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/dev/dev.h>
+
+#include <dev_ena/ena.h>
+#include <dev_ena/ena_inlines.h>
+
+VLIB_REGISTER_LOG_CLASS (ena_log, static) = {
+  .class_name = "ena",
+  .subclass_name = "queue",
+};
+
+void
+ena_rx_queue_free (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq)
+{
+  ena_rxq_t *eq = vnet_dev_get_rx_queue_data (rxq);
+  vnet_dev_port_t *port = rxq->port;
+  vnet_dev_t *dev = port->dev;
+
+  ASSERT (rxq->started == 0);
+  ASSERT (eq->cq_created == 0);
+  ASSERT (eq->sq_created == 0);
+
+  log_debug (dev, "queue %u", rxq->queue_id);
+
+  foreach_pointer (p, eq->buffer_indices, eq->compl_sqe_indices)
+    if (p)
+      clib_mem_free (p);
+
+  foreach_pointer (p, eq->cqes, eq->sqes)
+    vnet_dev_dma_mem_free (vm, dev, p);
+}
+
+vnet_dev_rv_t
+ena_rx_queue_alloc (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq)
+{
+  vnet_dev_port_t *port = rxq->port;
+  vnet_dev_t *dev = port->dev;
+  ena_rxq_t *eq = vnet_dev_get_rx_queue_data (rxq);
+  u16 size = rxq->size;
+  vnet_dev_rv_t rv;
+
+  ASSERT (eq->buffer_indices == 0);
+  ASSERT (eq->compl_sqe_indices == 0);
+  ASSERT (eq->cqes == 0);
+  ASSERT (eq->sqes == 0);
+
+  log_debug (dev, "queue %u", rxq->queue_id);
+
+  eq->buffer_indices = clib_mem_alloc_aligned (
+    sizeof (eq->buffer_indices[0]) * size, CLIB_CACHE_LINE_BYTES);
+
+  eq->compl_sqe_indices = clib_mem_alloc_aligned (
+    sizeof (eq->compl_sqe_indices[0]) * size, CLIB_CACHE_LINE_BYTES);
+
+  if ((rv = vnet_dev_dma_mem_alloc (vm, dev, sizeof (eq->cqes[0]) * size, 0,
+                                   (void **) &eq->cqes)))
+    goto err;
+
+  if ((rv = vnet_dev_dma_mem_alloc (vm, dev, sizeof (eq->sqes[0]) * size, 0,
+                                   (void **) &eq->sqes)))
+    goto err;
+
+  return VNET_DEV_OK;
+
+err:
+  ena_rx_queue_free (vm, rxq);
+  return rv;
+}
+
+void
+ena_tx_queue_free (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
+{
+  ena_txq_t *eq = vnet_dev_get_tx_queue_data (txq);
+  vnet_dev_port_t *port = txq->port;
+  vnet_dev_t *dev = port->dev;
+
+  ASSERT (txq->started == 0);
+
+  log_debug (dev, "queue %u", txq->queue_id);
+
+  foreach_pointer (p, eq->buffer_indices, eq->sqe_templates)
+    if (p)
+      clib_mem_free (p);
+
+  foreach_pointer (p, eq->cqes, eq->sqes)
+    vnet_dev_dma_mem_free (vm, dev, p);
+}
+
+vnet_dev_rv_t
+ena_tx_queue_alloc (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
+{
+  vnet_dev_port_t *port = txq->port;
+  vnet_dev_t *dev = port->dev;
+  ena_txq_t *eq = vnet_dev_get_tx_queue_data (txq);
+  u16 size = txq->size;
+  vnet_dev_rv_t rv;
+
+  ASSERT (eq->buffer_indices == 0);
+  ASSERT (eq->sqe_templates == 0);
+  ASSERT (eq->cqes == 0);
+  ASSERT (eq->sqes == 0);
+
+  log_debug (dev, "queue %u", txq->queue_id);
+
+  eq->buffer_indices = clib_mem_alloc_aligned (
+    sizeof (eq->buffer_indices[0]) * size, CLIB_CACHE_LINE_BYTES);
+  eq->sqe_templates = clib_mem_alloc_aligned (
+    sizeof (eq->sqe_templates[0]) * size, CLIB_CACHE_LINE_BYTES);
+
+  if ((rv = vnet_dev_dma_mem_alloc (vm, dev, sizeof (eq->cqes[0]) * size, 0,
+                                   (void **) &eq->cqes)))
+    goto err;
+
+  if ((rv = vnet_dev_dma_mem_alloc (vm, dev, sizeof (eq->sqes[0]) * size, 0,
+                                   (void **) &eq->sqes)))
+    goto err;
+
+  return VNET_DEV_OK;
+
+err:
+  ena_tx_queue_free (vm, txq);
+  return rv;
+}
+
+vnet_dev_rv_t
+ena_rx_queue_start (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq)
+{
+  ena_rxq_t *eq = vnet_dev_get_rx_queue_data (rxq);
+  vnet_dev_port_t *port = rxq->port;
+  vnet_dev_t *dev = port->dev;
+  ena_device_t *ed = vnet_dev_get_data (dev);
+  u16 buffer_size = vnet_dev_get_rx_queue_buffer_data_size (vm, rxq);
+  u16 size = rxq->size;
+  vnet_dev_rv_t rv;
+
+  /* Create Completion Queue */
+  ena_aq_create_cq_resp_t cqresp;
+  ena_aq_create_cq_cmd_t cqcmd = {
+    .interrupt_mode_enabled = 1,
+    .cq_entry_size_words = sizeof (ena_rx_cdesc_t) / 4,
+    .cq_depth = size,
+    .msix_vector = ~0,
+  };
+
+  ena_set_mem_addr (vm, dev, &cqcmd.cq_ba, eq->cqes);
+  if ((rv = ena_aq_create_cq (vm, dev, &cqcmd, &cqresp)))
+    {
+      log_err (dev, "queue %u cq creation failed", rxq->queue_id);
+      goto error;
+    }
+
+  eq->cq_idx = cqresp.cq_idx;
+  eq->cq_created = 1;
+
+  log_debug (dev, "queue %u cq %u created", rxq->queue_id, eq->cq_idx);
+
+  /* Create Submission Queue */
+  ena_aq_create_sq_resp_t sqresp;
+  ena_aq_create_sq_cmd_t sqcmd = {
+    .sq_direction = ENA_ADMIN_SQ_DIRECTION_RX,
+    .placement_policy = ENA_ADMIN_SQ_PLACEMENT_POLICY_HOST,
+    .completion_policy = ENA_ADMIN_SQ_COMPLETION_POLICY_DESC,
+    .is_physically_contiguous = 1,
+    .sq_depth = size,
+    .cq_idx = cqresp.cq_idx,
+  };
+
+  ena_set_mem_addr (vm, dev, &sqcmd.sq_ba, eq->sqes);
+  if ((rv = ena_aq_create_sq (vm, dev, &sqcmd, &sqresp)))
+    {
+      log_err (dev, "queue %u sq creation failed", rxq->queue_id);
+      goto error;
+    }
+
+  eq->sq_idx = sqresp.sq_idx;
+  eq->sq_db = (u32 *) ((u8 *) ed->reg_bar + sqresp.sq_doorbell_offset);
+  eq->sq_created = 1;
+
+  log_debug (dev, "queue %u sq %u created, sq_db %p", rxq->queue_id,
+            eq->sq_idx, eq->sq_db);
+
+  for (int i = 0; i < size; i++)
+    {
+      eq->sqes[i] = (ena_rx_desc_t){
+       .lo = {
+          .length = buffer_size,
+          .comp_req = 1,
+          .first = 1,
+          .last = 1,
+          .reserved5 = 1, /* ena_com says MBO */
+          .req_id = i,
+        },
+      };
+      eq->buffer_indices[i] = VLIB_BUFFER_INVALID_INDEX;
+      eq->compl_sqe_indices[i] = i;
+    }
+
+  eq->sq_next = 0;
+  eq->n_compl_sqes = size;
+
+  return VNET_DEV_OK;
+
+error:
+  ena_rx_queue_stop (vm, rxq);
+  return rv;
+}
+
+vnet_dev_rv_t
+ena_tx_queue_start (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
+{
+  ena_txq_t *eq = vnet_dev_get_tx_queue_data (txq);
+  vnet_dev_port_t *port = txq->port;
+  vnet_dev_t *dev = port->dev;
+  ena_device_t *ed = vnet_dev_get_data (dev);
+  u16 size = txq->size;
+  vnet_dev_rv_t rv;
+
+  /* Create Completion Queue */
+  ena_aq_create_cq_resp_t cqresp;
+  ena_aq_create_cq_cmd_t cqcmd = {
+    .interrupt_mode_enabled = 1,
+    .cq_entry_size_words = sizeof (ena_tx_cdesc_t) / 4,
+    .cq_depth = size,
+    .msix_vector = ~0,
+  };
+
+  ena_set_mem_addr (vm, dev, &cqcmd.cq_ba, eq->cqes);
+  if ((rv = ena_aq_create_cq (vm, dev, &cqcmd, &cqresp)))
+    {
+      log_err (dev, "queue %u cq creation failed", txq->queue_id);
+      goto error;
+    }
+
+  eq->cq_idx = cqresp.cq_idx;
+  eq->cq_created = 1;
+
+  log_debug (dev, "queue %u cq %u created", txq->queue_id, eq->cq_idx);
+
+  /* Create Submission Queue */
+  ena_aq_create_sq_resp_t sqresp;
+  ena_aq_create_sq_cmd_t sqcmd = {
+    .sq_direction = ENA_ADMIN_SQ_DIRECTION_TX,
+    .placement_policy = eq->llq ? ENA_ADMIN_SQ_PLACEMENT_POLICY_DEVICE :
+                                       ENA_ADMIN_SQ_PLACEMENT_POLICY_HOST,
+    .completion_policy = ENA_ADMIN_SQ_COMPLETION_POLICY_DESC,
+    .is_physically_contiguous = 1,
+    .sq_depth = size,
+    .cq_idx = cqresp.cq_idx,
+  };
+
+  if (eq->llq == 0)
+    ena_set_mem_addr (vm, dev, &sqcmd.sq_ba, eq->sqes);
+  if ((rv = ena_aq_create_sq (vm, dev, &sqcmd, &sqresp)))
+    {
+      log_err (dev, "queue %u sq creation failed", txq->queue_id);
+      goto error;
+    }
+
+  eq->sq_idx = sqresp.sq_idx;
+  eq->sq_db = (u32 *) ((u8 *) ed->reg_bar + sqresp.sq_doorbell_offset);
+  eq->sq_created = 1;
+
+  log_debug (dev, "queue %u sq %u created, sq_db %p", txq->queue_id,
+            eq->sq_idx, eq->sq_db);
+
+  for (u32 i = 0; i < size; i++)
+    {
+      eq->sqe_templates[i] =
+       (ena_tx_desc_t){ .req_id_lo = i, .req_id_hi = i >> 10, .comp_req = 1 }
+         .as_u64x2[0];
+
+      eq->buffer_indices[i] = VLIB_BUFFER_INVALID_INDEX;
+    }
+
+  eq->sq_head = 0;
+  eq->sq_tail = 0;
+  eq->cq_next = 0;
+
+#if 0
+  if (txq->llq)
+    txq->llq_descs =
+      (ena_tx_llq_desc128_t *) ((u8 *) ed->mem_bar +
+                               sqresp.llq_descriptors_offset);
+#endif
+
+  log_debug (dev, "queue %u sq %u created, sq_db %p llq_desc %p",
+            txq->queue_id, eq->sq_idx, eq->sq_db,
+            eq->llq ? eq->llq_descs : 0);
+  return VNET_DEV_OK;
+
+error:
+  ena_tx_queue_stop (vm, txq);
+  return rv;
+}
+
+static void
+ena_free_sq_buffer_indices (vlib_main_t *vm, u32 *sq_buffer_indices,
+                           u32 n_desc)
+{
+  u32 *to = sq_buffer_indices;
+
+  for (u32 *from = to; from < sq_buffer_indices + n_desc; from++)
+    if (from[0] != VLIB_BUFFER_INVALID_INDEX)
+      to++[0] = from[0];
+
+  if (to - sq_buffer_indices > 0)
+    vlib_buffer_free (vm, sq_buffer_indices, to - sq_buffer_indices);
+}
+
+void
+ena_rx_queue_stop (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq)
+{
+  ena_rxq_t *eq = vnet_dev_get_rx_queue_data (rxq);
+  vnet_dev_t *dev = rxq->port->dev;
+  vnet_dev_rv_t rv;
+
+  if (eq->sq_created)
+    {
+      ena_aq_destroy_sq_cmd_t cmd = {
+       .sq_idx = eq->sq_idx,
+       .sq_direction = ENA_ADMIN_SQ_DIRECTION_TX,
+      };
+
+      if ((rv = ena_aq_destroy_sq (vm, dev, &cmd)))
+       log_err (dev, "queue %u failed to destroy sq %u", rxq->queue_id,
+                eq->sq_idx);
+      eq->sq_created = 0;
+    };
+
+  if (eq->cq_created)
+    {
+      ena_aq_destroy_cq_cmd_t cmd = {
+       .cq_idx = eq->cq_idx,
+      };
+
+      if ((rv = ena_aq_destroy_cq (vm, dev, &cmd)))
+       log_err (dev, "queue %u failed to destroy cq %u", rxq->queue_id,
+                eq->cq_idx);
+      eq->cq_created = 0;
+    };
+
+  if (eq->n_compl_sqes < rxq->size)
+    ena_free_sq_buffer_indices (vm, eq->buffer_indices, rxq->size);
+}
+
+void
+ena_tx_queue_stop (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
+{
+  ena_txq_t *eq = vnet_dev_get_tx_queue_data (txq);
+  vnet_dev_t *dev = txq->port->dev;
+  vnet_dev_rv_t rv;
+
+  if (eq->sq_created)
+    {
+      ena_aq_destroy_sq_cmd_t cmd = {
+       .sq_idx = eq->sq_idx,
+       .sq_direction = ENA_ADMIN_SQ_DIRECTION_TX,
+      };
+
+      if ((rv = ena_aq_destroy_sq (vm, dev, &cmd)))
+       log_err (dev, "queue %u failed to destroy sq %u", txq->queue_id,
+                eq->sq_idx);
+      eq->sq_created = 0;
+    };
+
+  if (eq->cq_created)
+    {
+      ena_aq_destroy_cq_cmd_t cmd = {
+       .cq_idx = eq->cq_idx,
+      };
+
+      if ((rv = ena_aq_destroy_cq (vm, dev, &cmd)))
+       log_err (dev, "queue %u failed to destroy cq %u", txq->queue_id,
+                eq->cq_idx);
+      eq->cq_created = 0;
+    };
+
+  if (eq->sq_head != eq->sq_tail)
+    ena_free_sq_buffer_indices (vm, eq->buffer_indices, txq->size);
+}
diff --git a/src/plugins/dev_ena/reg.c b/src/plugins/dev_ena/reg.c
new file mode 100644 (file)
index 0000000..7f2cc0f
--- /dev/null
@@ -0,0 +1,172 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/dev/dev.h>
+
+#include <dev_ena/ena.h>
+#include <dev_ena/ena_inlines.h>
+
+VLIB_REGISTER_LOG_CLASS (ena_log, static) = {
+  .class_name = "ena",
+  .subclass_name = "reg",
+};
+
+static vnet_dev_rv_t
+ena_err (vnet_dev_t *dev, vnet_dev_rv_t rv, char *fmt, ...)
+{
+  va_list va;
+  u8 *s;
+
+  va_start (va, fmt);
+  s = va_format (0, fmt, &va);
+  va_end (va);
+  log_err (dev, "%v", s);
+  vec_free (s);
+  return rv;
+}
+
+static u8 *
+format_ena_reg_name (u8 *s, va_list *args)
+{
+  int offset = va_arg (*args, int);
+
+  char *reg_names[] = {
+#define _(o, r, rn, m) [(o) >> 2] = #rn,
+    foreach_ena_reg
+#undef _
+  };
+
+  offset >>= 2;
+
+  if (offset < 0 || offset >= ARRAY_LEN (reg_names) || reg_names[offset] == 0)
+    return format (s, "(unknown)");
+  return format (s, "%s", reg_names[offset]);
+}
+
+void
+ena_reg_write (vnet_dev_t *dev, ena_reg_t reg, void *v)
+{
+  ena_device_t *ed = vnet_dev_get_data (dev);
+  u32 *p = (u32 *) ((u8 *) ed->reg_bar + reg);
+  u32 val = *(u32 *) v;
+  log_debug (dev, "%s: reg %U (0x%02x) value 0x%08x", __func__,
+            format_ena_reg_name, reg, reg, val);
+  __atomic_store_n (p, val, __ATOMIC_RELEASE);
+}
+
+void
+ena_reg_set_dma_addr (vlib_main_t *vm, vnet_dev_t *dev, u32 rlo, u32 rhi,
+                     void *p)
+{
+  uword pa = vnet_dev_get_dma_addr (vm, dev, p);
+  u32 reg = (u32) pa;
+  ena_reg_write (dev, rlo, &reg);
+  reg = pa >> 32;
+  ena_reg_write (dev, rhi, &reg);
+}
+
+void
+ena_reg_read (vnet_dev_t *dev, ena_reg_t reg, const void *v)
+{
+  ena_device_t *ed = vnet_dev_get_data (dev);
+  vlib_main_t *vm = vlib_get_main ();
+  u32 rv;
+  f64 dt = 0, t0;
+
+  if (ed->readless == 0)
+    {
+      rv =
+       __atomic_load_n ((u32 *) ((u8 *) ed->reg_bar + reg), __ATOMIC_SEQ_CST);
+    }
+  else
+    {
+      u32 *p = (u32 *) ((u8 *) ed->reg_bar + ENA_REG_MMIO_REG_READ);
+
+      ena_reg_mmio_reg_read_t rr = { .reg_off = reg, .req_id = 1 };
+      ed->mmio_resp->req_id = 0;
+      ed->mmio_resp->reg_val = ~0;
+
+      __atomic_store_n (p, rr.as_u32, __ATOMIC_RELEASE);
+
+      t0 = vlib_time_now (vm);
+      while (ed->mmio_resp->req_id == 0 && dt < 0.2)
+       {
+         CLIB_PAUSE ();
+         dt = vlib_time_now (vm) - t0;
+       }
+
+      rv = ed->mmio_resp->reg_val;
+    }
+
+  log_debug (dev, "%s: reg %U (0x%02x) value 0x%08x dt %.3fs", __func__,
+            format_ena_reg_name, reg, reg, rv, dt);
+  *(u32 *) v = rv;
+}
+
+vnet_dev_rv_t
+ena_reg_reset (vlib_main_t *vm, vnet_dev_t *dev, ena_reset_reason_t reason)
+{
+  ena_device_t *ed = vnet_dev_get_data (dev);
+  ena_reg_version_t ver;
+  ena_reg_controller_version_t ctrl_ver;
+  ena_reg_caps_t caps = {};
+  ena_reg_dev_sts_t dev_sts = {};
+  ena_reg_dev_ctl_t reset_start = { .dev_reset = 1, .reset_reason = reason };
+
+  if (ed->readless)
+    ena_reg_set_dma_addr (vm, dev, ENA_REG_MMIO_RESP_LO, ENA_REG_MMIO_RESP_HI,
+                         ed->mmio_resp);
+
+  ena_reg_read (dev, ENA_REG_DEV_STS, &dev_sts);
+  ena_reg_read (dev, ENA_REG_CAPS, &caps);
+
+  if (caps.as_u32 == ~0 && dev_sts.as_u32 == ~0)
+    return ena_err (dev, VNET_DEV_ERR_BUS, "failed to read regs");
+
+  if (dev_sts.ready == 0)
+    return VNET_DEV_ERR_NOT_READY;
+
+  log_debug (dev, "reg_reset: reset timeout is %u", caps.reset_timeout);
+
+  ena_reg_write (dev, ENA_REG_DEV_CTL, &reset_start);
+
+  if (ed->readless)
+    ena_reg_set_dma_addr (vm, dev, ENA_REG_MMIO_RESP_LO, ENA_REG_MMIO_RESP_HI,
+                         ed->mmio_resp);
+
+  while (1)
+    {
+      int i = 0;
+      ena_reg_read (dev, ENA_REG_DEV_STS, &dev_sts);
+      if (dev_sts.reset_in_progress)
+       break;
+      if (i++ == 20)
+       return ena_err (dev, VNET_DEV_ERR_BUS, "failed to initiate reset");
+      vlib_process_suspend (vm, 0.001);
+    }
+
+  ena_reg_write (dev, ENA_REG_DEV_CTL, &(ena_reg_dev_ctl_t){});
+
+  return 0;
+  while (1)
+    {
+      int i = 0;
+      ena_reg_read (dev, ENA_REG_DEV_STS, &dev_sts);
+      if (dev_sts.reset_in_progress == 0)
+       break;
+      if (i++ == 20)
+       return ena_err (dev, VNET_DEV_ERR_BUS, "failed to complete reset");
+      vlib_process_suspend (vm, 0.001);
+    }
+
+  ena_reg_read (dev, ENA_REG_VERSION, &ver);
+  ena_reg_read (dev, ENA_REG_CONTROLLER_VERSION, &ctrl_ver);
+
+  log_info (dev, "version %u.%u controller_version %u.%u.%u impl_id %u\n",
+           ver.major, ver.minor, ctrl_ver.major, ctrl_ver.minor,
+           ctrl_ver.subminor, ctrl_ver.impl_id);
+
+  return 0;
+}
diff --git a/src/plugins/dev_ena/rx_node.c b/src/plugins/dev_ena/rx_node.c
new file mode 100644 (file)
index 0000000..41fc5b8
--- /dev/null
@@ -0,0 +1,457 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vppinfra/vector/mask_compare.h>
+#include <vppinfra/vector/compress.h>
+
+#include <dev_ena/ena.h>
+#include <dev_ena/ena_inlines.h>
+
+#define ENA_RX_REFILL_BATCH 32
+
+typedef struct
+{
+  u16 phase_bit;
+  u16 size;
+  u32 mask;
+  ena_rx_cdesc_status_t st_or;
+  ena_rx_cdesc_status_t st_and;
+  u16 *comp_sqe_indices;
+  u32 *sq_buffer_indices;
+} ena_rx_ctx_t;
+
+static_always_inline void
+ena_device_input_status_to_flags (ena_rx_cdesc_status_t *statuses, u32 *flags,
+                                 u32 n_desc, vlib_frame_bitmap_t first_bmp,
+                                 int maybe_chained)
+{
+  const ena_rx_cdesc_status_t mask_first = { .first = 1 },
+                             match_first1 = { .first = 1 };
+
+  const ena_rx_cdesc_status_t mask_last = { .last = 1 },
+                             match_last0 = { .last = 0 };
+
+  const ena_rx_cdesc_status_t mask_l4_csum = { .ipv4_frag = 1,
+                                              .l4_csum_checked = 1,
+                                              .l4_csum_err = 1 },
+                             match_l4_csum_ok = { .l4_csum_checked = 1 };
+
+  clib_memset_u32 (statuses + n_desc, 0, 8);
+#if defined(CLIB_HAVE_VEC128)
+
+#if defined(CxLIB_HAVE_VEC512)
+#define N          16
+#define u32xN      u32x16
+#define u32xNu     u32x16u
+#define u32xN_splat u32x16_splat
+#elif defined(CxLIB_HAVE_VEC256)
+#define N          8
+#define u32xN      u32x8
+#define u32xNu     u32x8u
+#define u32xN_splat u32x8_splat
+#else
+#define N          4
+#define u32xN      u32x4
+#define u32xNu     u32x4u
+#define u32xN_splat u32x4_splat
+#endif
+
+  const u32xN st_mask_first = u32xN_splat (mask_first.as_u32);
+  const u32xN st_match_first1 = u32xN_splat (match_first1.as_u32);
+  const u32xN st_mask_last = u32xN_splat (mask_last.as_u32);
+  const u32xN st_match_last0 = u32xN_splat (match_last0.as_u32);
+  const u32xN st_mask_l4_csum = u32xN_splat (mask_l4_csum.as_u32);
+  const u32xN st_match_l4_csum_ok = u32xN_splat (match_l4_csum_ok.as_u32);
+  const u32xN f_total_len_valid = u32xN_splat (VLIB_BUFFER_TOTAL_LENGTH_VALID);
+  const u32xN f_next_preset = u32xN_splat (VLIB_BUFFER_NEXT_PRESENT);
+  const u32xN f_l4_csum = u32xN_splat (VNET_BUFFER_F_L4_CHECKSUM_CORRECT |
+                                      VNET_BUFFER_F_L4_CHECKSUM_COMPUTED);
+
+  for (u32 i = 0; i < round_pow2 (n_desc, 2 * N); i += 2 * N)
+    {
+      uword msk = 0;
+      u32xN f0, f1, r0, r1;
+      u32xN s0 = ((u32xNu *) (statuses + i))[0];
+      u32xN s1 = ((u32xNu *) (statuses + i))[1];
+
+      r0 = (s0 & st_mask_first) == st_match_first1;
+      r1 = (s1 & st_mask_first) == st_match_first1;
+      f0 = r0 & f_total_len_valid;
+      f1 = r1 & f_total_len_valid;
+
+      if (maybe_chained)
+       {
+#if defined(CxLIB_HAVE_VEC512)
+         u64 msb_mask = 0x1111111111111111;
+         msk = bit_extract_u64 (u8x64_msb_mask ((u8x64) r0), msb_mask);
+         msk |= bit_extract_u64 (u8x64_msb_mask ((u8x64) r1), msb_mask) << 16;
+#elif defined(CxLIB_HAVE_VEC256)
+         msk = u8x32_msb_mask ((u8x32) r0);
+         msk |= (u64) u8x32_msb_mask ((u8x32) r1) << 32;
+         msk = bit_extract_u64 (msk, 0x1111111111111111);
+#else
+         msk = u8x16_msb_mask ((u8x16) r0);
+         msk |= (u32) u8x16_msb_mask ((u8x16) r1) << 16;
+         msk = bit_extract_u32 (msk, 0x11111111);
+#endif
+         first_bmp[i / uword_bits] |= msk << (i % uword_bits);
+       }
+
+      f0 |= ((s0 & st_mask_last) == st_match_last0) & f_next_preset;
+      f1 |= ((s1 & st_mask_last) == st_match_last0) & f_next_preset;
+
+      f0 |= ((s0 & st_mask_l4_csum) == st_match_l4_csum_ok) & f_l4_csum;
+      f1 |= ((s1 & st_mask_l4_csum) == st_match_l4_csum_ok) & f_l4_csum;
+
+      ((u32xNu *) (flags + i))[0] = f0;
+      ((u32xNu *) (flags + i))[1] = f1;
+    }
+#else
+  while (n_left)
+    {
+      u16 f = 0;
+      ena_rx_cdesc_status_t st = statuses++[0];
+
+      if ((st.as_u32 & mask_first.as_u32) == match_first1.as_u32)
+       f |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
+
+      if ((st.as_u32 & mask_last.as_u32) == match_last0.as_u32)
+       f |= VLIB_BUFFER_NEXT_PRESENT;
+
+      if ((st.as_u32 & mask_l4_csum.as_u32) == match_l4_csum_ok.as_u32)
+       f |= VNET_BUFFER_F_L4_CHECKSUM_COMPUTED |
+            VNET_BUFFER_F_L4_CHECKSUM_CORRECT;
+
+      flags++[0] = f;
+      n_left--;
+    }
+#endif
+}
+
+static_always_inline u16
+ena_device_input_cq_dequeue_no_wrap (ena_rx_ctx_t *ctx, ena_rxq_t *q,
+                                    ena_rx_cdesc_status_t *statuses,
+                                    u16 *lengths, u16 *csi)
+{
+  u32 next = q->cq_next;
+  ena_rx_cdesc_t *cqes = q->cqes;
+  u32 phase = (next & ctx->size << 1) != 0;
+  u16 index = next & ctx->mask;
+  ena_rx_cdesc_t *cd = cqes + index;
+  ena_rx_cdesc_status_t st;
+  u32 n_to_check, i = 0;
+
+  st = cd->status;
+  if (st.phase == phase)
+    return 0;
+
+  n_to_check = clib_min (VLIB_FRAME_SIZE, ctx->size - index);
+
+  ctx->st_or.as_u32 |= st.as_u32;
+  ctx->st_and.as_u32 &= st.as_u32;
+  statuses[i] = st;
+  lengths[i] = cd->length;
+  csi[i] = cd->req_id;
+  i++;
+  cd++;
+
+more:
+  for (st = cd->status; i < n_to_check && st.phase != phase;
+       i++, st = (++cd)->status)
+    {
+      ctx->st_or.as_u32 |= st.as_u32;
+      ctx->st_and.as_u32 &= st.as_u32;
+      statuses[i] = st;
+      lengths[i] = cd->length;
+      csi[i] = cd->req_id;
+    }
+
+  if (i == n_to_check)
+    {
+      n_to_check = VLIB_FRAME_SIZE - n_to_check;
+      if (n_to_check)
+       {
+         phase ^= 1;
+         cd = cqes;
+         goto more;
+       }
+    }
+
+  /* revert incomplete */
+  if (PREDICT_FALSE (statuses[i - 1].last == 0))
+    {
+      i--;
+      while (i && statuses[i - 1].last == 0)
+       i--;
+    }
+
+  return i;
+}
+
+static_always_inline void
+ena_device_input_refill (vlib_main_t *vm, ena_rx_ctx_t *ctx,
+                        vnet_dev_rx_queue_t *rxq, int use_va)
+{
+  ena_rxq_t *q = vnet_dev_get_rx_queue_data (rxq);
+  const u64x2 flip_phase = (ena_rx_desc_t){ .lo.phase = 1 }.as_u64x2;
+  u32 buffer_indices[ENA_RX_REFILL_BATCH];
+  uword dma_addr[ENA_RX_REFILL_BATCH];
+  u32 n_alloc, n_compl_sqes = q->n_compl_sqes;
+  u16 *csi = ctx->comp_sqe_indices;
+  ena_rx_desc_t *sqes = q->sqes;
+
+  while (n_compl_sqes > 0)
+    {
+      n_alloc = vlib_buffer_alloc_from_pool (
+       vm, buffer_indices, clib_min (ENA_RX_REFILL_BATCH, n_compl_sqes),
+       vnet_dev_get_rx_queue_buffer_pool_index (rxq));
+
+      if (PREDICT_FALSE (n_alloc == 0))
+       break;
+
+      vlib_get_buffers_with_offset (vm, buffer_indices, (void **) dma_addr,
+                                   ENA_RX_REFILL_BATCH,
+                                   STRUCT_OFFSET_OF (vlib_buffer_t, data));
+
+      if (!use_va)
+       for (u32 i = 0; i < n_alloc; i++)
+         dma_addr[i] = vlib_physmem_get_pa (vm, (void *) dma_addr[i]);
+
+      for (u32 i = 0; i < n_alloc; i++)
+       {
+         u16 slot = csi[i];
+         u64x2 r = sqes[slot].as_u64x2 ^ flip_phase;
+         ctx->sq_buffer_indices[slot] = buffer_indices[i];
+         r[1] = dma_addr[i];
+         sqes[slot].as_u64x2 = r; /* write SQE as single 16-byte store */
+       }
+
+      csi += n_alloc;
+      n_compl_sqes -= n_alloc;
+    }
+
+  if (n_compl_sqes == q->n_compl_sqes)
+    return;
+
+  q->sq_next += q->n_compl_sqes - n_compl_sqes;
+  __atomic_store_n (q->sq_db, q->sq_next, __ATOMIC_RELEASE);
+
+  if (PREDICT_FALSE (n_compl_sqes))
+    clib_memmove (ctx->comp_sqe_indices, csi, n_compl_sqes * sizeof (csi[0]));
+
+  q->n_compl_sqes = n_compl_sqes;
+}
+
+static_always_inline uword
+ena_device_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+                        vnet_dev_rx_queue_t *rxq)
+{
+  ena_rxq_t *q = vnet_dev_get_rx_queue_data (rxq);
+  vnet_dev_port_t *port = rxq->port;
+  vnet_main_t *vnm = vnet_get_main ();
+  vlib_buffer_t *buffers[VLIB_FRAME_SIZE], **b;
+  ena_rx_cdesc_status_t statuses[VLIB_FRAME_SIZE + 8];
+  u16 lengths[VLIB_FRAME_SIZE + 8], *l;
+  u32 flags[VLIB_FRAME_SIZE + 8], *f;
+  u16 *csi;
+  uword n_rx_packets = 0, n_rx_bytes = 0;
+  vlib_frame_bitmap_t head_bmp = {};
+  u32 sw_if_index = port->intf.sw_if_index;
+  u32 hw_if_index = port->intf.hw_if_index;
+  u32 n_trace, n_deq, n_left;
+  u32 cq_next = q->cq_next;
+  u32 next_index = rxq->next_index;
+  vlib_frame_t *next_frame;
+  vlib_buffer_template_t bt = rxq->buffer_template;
+  u32 *bi;
+  int maybe_chained;
+
+  ASSERT (count_set_bits (rxq->size) == 1);
+  ena_rx_ctx_t ctx = {
+    .size = rxq->size,
+    .mask = rxq->size - 1,
+    .st_and.as_u32 = ~0,
+    .comp_sqe_indices = q->compl_sqe_indices,
+    .sq_buffer_indices = q->buffer_indices,
+  };
+
+  /* we may have completed SQE indices from previous run */
+  csi = ctx.comp_sqe_indices + q->n_compl_sqes;
+
+  n_deq =
+    ena_device_input_cq_dequeue_no_wrap (&ctx, q, statuses, lengths, csi);
+
+  if (n_deq == 0)
+    goto refill;
+
+  q->n_compl_sqes += n_deq;
+
+  maybe_chained = ctx.st_and.first && ctx.st_and.last ? 0 : 1;
+
+  next_frame =
+    vlib_get_next_frame_internal (vm, node, next_index, /* new frame */ 1);
+  bi = vlib_frame_vector_args (next_frame);
+
+  /* move buffer indices from the ring */
+  for (u32 i = 0; i < n_deq; i++)
+    {
+      u32 slot = csi[i];
+      bi[i] = ctx.sq_buffer_indices[slot];
+      ctx.sq_buffer_indices[slot] = VLIB_BUFFER_INVALID_INDEX;
+    }
+
+  vlib_get_buffers (vm, bi, buffers, n_deq);
+
+  if (PREDICT_FALSE (maybe_chained))
+    ena_device_input_status_to_flags (statuses, flags, n_deq, head_bmp, 1);
+  else
+    ena_device_input_status_to_flags (statuses, flags, n_deq, head_bmp, 0);
+
+  for (b = buffers, l = lengths, f = flags, n_left = n_deq; n_left >= 8;
+       b += 4, f += 4, l += 4, n_left -= 4)
+    {
+      clib_prefetch_store (b[4]);
+      clib_prefetch_store (b[5]);
+      clib_prefetch_store (b[6]);
+      clib_prefetch_store (b[7]);
+      b[0]->template = bt;
+      n_rx_bytes += b[0]->current_length = l[0];
+      b[0]->flags = f[0];
+      b[1]->template = bt;
+      n_rx_bytes += b[1]->current_length = l[1];
+      b[1]->flags = f[1];
+      b[2]->template = bt;
+      n_rx_bytes += b[2]->current_length = l[2];
+      b[2]->flags = f[2];
+      b[3]->template = bt;
+      n_rx_bytes += b[3]->current_length = l[3];
+      b[3]->flags = f[3];
+    }
+
+  for (; n_left > 0; b += 1, f += 1, l += 1, n_left -= 1)
+    {
+      b[0]->template = bt;
+      n_rx_bytes += b[0]->current_length = l[0];
+      b[0]->flags = f[0];
+    }
+
+  if (maybe_chained)
+    {
+      vlib_buffer_t *hb = 0;
+      vlib_frame_bitmap_t tail_buf_bmp = {};
+      u32 i, total_len = 0, head_flags = 0, tail_flags = 0;
+      n_rx_packets = vlib_frame_bitmap_count_set_bits (head_bmp);
+
+      vlib_frame_bitmap_init (tail_buf_bmp, n_deq);
+      vlib_frame_bitmap_xor (tail_buf_bmp, head_bmp);
+
+      foreach_vlib_frame_bitmap_set_bit_index (i, tail_buf_bmp)
+       {
+         vlib_buffer_t *pb = buffers[i - 1];
+         /* only store opertations here */
+         pb->next_buffer = bi[i];
+         if (vlib_frame_bitmap_is_bit_set (tail_buf_bmp, i - 1) == 0)
+           {
+             if (hb)
+               {
+                 hb->total_length_not_including_first_buffer = total_len;
+                 /* tail descriptor contains protocol info so we need to
+                  * combine head and tail buffer flags */
+                 hb->flags = head_flags | tail_flags;
+               }
+             head_flags = flags[i - 1];
+             total_len = 0;
+             hb = pb;
+           }
+         total_len += lengths[i];
+         tail_flags = flags[i];
+       }
+
+      hb->total_length_not_including_first_buffer = total_len;
+      hb->flags = head_flags | tail_flags;
+    }
+  else
+    n_rx_packets = n_deq;
+
+  /* packet tracing */
+  if (PREDICT_FALSE ((n_trace = vlib_get_trace_count (vm, node))))
+    {
+      u32 i;
+      if (!maybe_chained)
+       vlib_frame_bitmap_init (head_bmp, n_deq);
+      foreach_vlib_frame_bitmap_set_bit_index (i, head_bmp)
+       {
+         vlib_buffer_t *b = buffers[i];
+         if (vlib_trace_buffer (vm, node, next_index, b, 0))
+           {
+             u32 j = i;
+             ena_rx_trace_t *tr = vlib_add_trace (vm, node, b, sizeof (*tr));
+             tr->next_index = next_index;
+             tr->qid = rxq->queue_id;
+             tr->hw_if_index = hw_if_index;
+             tr->n_desc = 1;
+             tr->length = lengths[i];
+             tr->req_id = csi[i];
+             tr->status = statuses[i];
+             while (statuses[j].last == 0)
+               {
+                 j++;
+                 tr->n_desc++;
+                 tr->length += lengths[j];
+               }
+             tr->status = statuses[j];
+
+             if (-n_trace)
+               goto trace_done;
+           }
+       }
+    trace_done:
+      vlib_set_trace_count (vm, node, n_trace);
+    }
+
+  if (PREDICT_FALSE (maybe_chained))
+    clib_compress_u32 (bi, bi, head_bmp, n_deq);
+
+  if (PREDICT_TRUE (next_index == VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT))
+    {
+      ethernet_input_frame_t *ef;
+      next_frame->flags = ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX;
+
+      ef = vlib_frame_scalar_args (next_frame);
+      ef->sw_if_index = sw_if_index;
+      ef->hw_if_index = hw_if_index;
+
+      if (ctx.st_or.l3_csum_err == 0)
+       next_frame->flags |= ETH_INPUT_FRAME_F_IP4_CKSUM_OK;
+      vlib_frame_no_append (next_frame);
+    }
+
+  vlib_put_next_frame (vm, node, next_index, VLIB_FRAME_SIZE - n_rx_packets);
+
+  vlib_increment_combined_counter (
+    vnm->interface_main.combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
+    vm->thread_index, hw_if_index, n_rx_packets, n_rx_bytes);
+
+  q->cq_next = cq_next + n_deq;
+
+refill:
+  if (rxq->port->dev->va_dma)
+    ena_device_input_refill (vm, &ctx, rxq, 1);
+  else
+    ena_device_input_refill (vm, &ctx, rxq, 0);
+
+  return n_rx_packets;
+}
+
+VNET_DEV_NODE_FN (ena_rx_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+  u32 n_rx = 0;
+  foreach_vnet_dev_rx_queue_runtime (rxq, node)
+    n_rx += ena_device_input_inline (vm, node, rxq);
+  return n_rx;
+}
diff --git a/src/plugins/dev_ena/tx_node.c b/src/plugins/dev_ena/tx_node.c
new file mode 100644 (file)
index 0000000..ae1b852
--- /dev/null
@@ -0,0 +1,514 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <dev_ena/ena.h>
+#include <vnet/ethernet/ethernet.h>
+#include <dev_ena/ena.h>
+#include <dev_ena/ena_inlines.h>
+
+#define ENA_TX_ENQ_BATCH_SZ   64
+#define ENA_MAX_LOG2_TXQ_SIZE 11
+#define ENA_TX_MAX_TAIL_LEN   5
+
+typedef struct
+{
+  u32 n_bytes;
+  ena_device_t *ed;
+  u16 n_desc;
+  u32 mask;
+  u16 n_packets_left;
+  u16 n_free_slots;
+  u32 *from;
+  u32 *sq_buffer_indices;
+  u32 tmp_bi[VLIB_FRAME_SIZE];
+  ena_tx_desc_t *sqes;
+  u64 *sqe_templates;
+  u16 n_dropped_chain_too_long;
+  u8 llq;
+  void *bd;
+} ena_tx_ctx_t;
+
+/* bits inside req_id which represent SQE index */
+static const u16 reqid_sqe_idx_mask = (1U << ENA_MAX_LOG2_TXQ_SIZE) - 1;
+
+static_always_inline void
+ena_txq_adv_sq_tail (ena_tx_ctx_t *ctx, ena_txq_t *eq)
+{
+  /* CQEs can arrive out of order, so we cannot blindly advance SQ tail for
+   * number of free slots, instead we need to check if slot contains invalid
+   * buffer index */
+
+  u32 sq_head = eq->sq_head;
+  u32 sq_tail = eq->sq_tail;
+  u16 n, offset = sq_tail & ctx->mask;
+  u32 *bi = ctx->sq_buffer_indices + offset;
+  u16 n_to_check = clib_min (sq_head - sq_tail, ctx->n_desc - offset);
+
+advance_sq_tail:
+  n = n_to_check;
+
+#ifdef CLIB_HAVE_VEC256
+  for (; n >= 8; n -= 8, bi += 8)
+    if (!u32x8_is_all_equal (*(u32x8u *) bi, VLIB_BUFFER_INVALID_INDEX))
+      break;
+#elif defined(CLIB_HAVE_VEC128)
+  for (; n >= 4; n -= 4, bi += 4)
+    if (!u32x4_is_all_equal (*(u32x4u *) bi, VLIB_BUFFER_INVALID_INDEX))
+      break;
+#endif
+
+  for (; n > 0; n -= 1, bi += 1)
+    if (bi[0] != VLIB_BUFFER_INVALID_INDEX)
+      break;
+
+  sq_tail += n_to_check - n;
+
+  if (n == 0 && sq_tail < sq_head)
+    {
+      n_to_check = sq_head - sq_tail;
+      bi = ctx->sq_buffer_indices;
+      goto advance_sq_tail;
+    }
+
+  eq->sq_tail = sq_tail;
+}
+
+static_always_inline void
+ena_txq_deq (vlib_main_t *vm, ena_tx_ctx_t *ctx, ena_txq_t *txq)
+{
+  /* dequeue CQ, extract SQ slot and number of chained buffers from
+   * req_id, move completed buffer indices to temp array */
+  const ena_tx_cdesc_t mask_phase = { .phase = 1 };
+  ena_tx_cdesc_t *cqes = txq->cqes, *cd, match_phase = {};
+  u32 cq_next = txq->cq_next;
+  u32 offset, n = 0;
+  u32 n_to_check;
+  u32 *buffers_to_free = ctx->tmp_bi;
+  u32 n_buffers_to_free = 0;
+
+  offset = cq_next & ctx->mask;
+  cd = cqes + offset;
+  n_to_check = ctx->n_desc - offset;
+  match_phase.phase = ~(cq_next & (ctx->n_desc << 1)) != 0;
+
+#ifdef CLIB_HAVE_VEC256
+  const u16 reqid_nic1 = 1U << ENA_MAX_LOG2_TXQ_SIZE;
+  const ena_tx_cdesc_t mask_reqid = { .req_id = reqid_sqe_idx_mask },
+                      match_ph0_nic1 = { .req_id = reqid_nic1, .phase = 0 },
+                      match_ph1_nic1 = { .req_id = reqid_nic1, .phase = 1 },
+                      mask_ph_nic = { .req_id = ~reqid_sqe_idx_mask,
+                                      .phase = 1 };
+  /* both phase and req_id are in lower 32 bits */
+  u32x8 mask_ph_nic_x8 = u32x8_splat (mask_ph_nic.as_u64);
+  u32x8 mask_reqid_x8 = u32x8_splat (mask_reqid.as_u64);
+  u32x8 match_ph_nic1_x8 = u32x8_splat (
+    match_phase.phase ? match_ph1_nic1.as_u64 : match_ph0_nic1.as_u64);
+  u32x8 buf_inv_idx_x8 = u32x8_splat (VLIB_BUFFER_INVALID_INDEX);
+#endif
+
+more:
+  while (n < n_to_check)
+    {
+      u16 req_id, n_in_chain;
+
+#ifdef CLIB_HAVE_VEC256
+      while (n + 7 < n_to_check)
+       {
+         u32x8 r, v;
+
+         /* load lower 32-bits of 8 CQEs in 256-bit register */
+         r = u32x8_shuffle2 (*(u32x8u *) cd, *(u32x8u *) (cd + 4), 0, 2, 4, 6,
+                             8, 10, 12, 14);
+
+         /* check if all 8 CQEs are completed and there is no chained bufs */
+         if (u32x8_is_equal (r & mask_ph_nic_x8, match_ph_nic1_x8) == 0)
+           goto one_by_one;
+
+         r &= mask_reqid_x8;
+
+         /* take consumed buffer indices from ring */
+         v = u32x8_gather_u32 (ctx->sq_buffer_indices, r,
+                               sizeof (ctx->sq_buffer_indices[0]));
+         u32x8_scatter_u32 (ctx->sq_buffer_indices, r, buf_inv_idx_x8,
+                            sizeof (ctx->sq_buffer_indices[0]));
+         *(u32x8u *) (buffers_to_free + n_buffers_to_free) = v;
+         n_buffers_to_free += 8;
+
+         n += 8;
+         cd += 8;
+         continue;
+       }
+    one_by_one:
+#endif
+
+      if ((cd->as_u64 & mask_phase.as_u64) != match_phase.as_u64)
+       goto done;
+
+      req_id = cd->req_id;
+      n_in_chain = req_id >> ENA_MAX_LOG2_TXQ_SIZE;
+      req_id &= reqid_sqe_idx_mask;
+
+      buffers_to_free[n_buffers_to_free++] = ctx->sq_buffer_indices[req_id];
+      ctx->sq_buffer_indices[req_id] = VLIB_BUFFER_INVALID_INDEX;
+
+      if (PREDICT_FALSE (n_in_chain > 1))
+       while (n_in_chain-- > 1)
+         {
+           req_id = (req_id + 1) & ctx->mask;
+           buffers_to_free[n_buffers_to_free++] =
+             ctx->sq_buffer_indices[req_id];
+           ctx->sq_buffer_indices[req_id] = VLIB_BUFFER_INVALID_INDEX;
+         }
+
+      n++;
+      cd++;
+    }
+
+  if (PREDICT_FALSE (n == n_to_check))
+    {
+      cq_next += n;
+      n = 0;
+      cd = cqes;
+      match_phase.phase ^= 1;
+#ifdef CLIB_HAVE_VEC256
+      match_ph_nic1_x8 ^= u32x8_splat (mask_phase.as_u64);
+#endif
+      n_to_check = ctx->n_desc;
+      goto more;
+    }
+
+done:
+
+  if (n_buffers_to_free)
+    {
+      cq_next += n;
+
+      /* part two - free buffers stored in temporary array */
+      vlib_buffer_free_no_next (vm, buffers_to_free, n_buffers_to_free);
+      txq->cq_next = cq_next;
+
+      ena_txq_adv_sq_tail (ctx, txq);
+    }
+}
+
+static_always_inline u16
+ena_txq_wr_sqe (vlib_main_t *vm, vlib_buffer_t *b, int use_iova,
+               ena_tx_desc_t *dp, u32 n_in_chain, ena_tx_desc_t desc)
+{
+  uword dma_addr = use_iova ? vlib_buffer_get_current_va (b) :
+                                   vlib_buffer_get_current_pa (vm, b);
+  u16 len = b->current_length;
+
+  desc.req_id_hi = n_in_chain << (ENA_MAX_LOG2_TXQ_SIZE - 10);
+  desc.as_u16x8[0] = len;
+  ASSERT (dma_addr < 0xffffffffffff); /* > 48bit - should never happen */
+  desc.as_u64x2[1] = dma_addr;       /* this also overwrites header_length */
+
+  /* write descriptor as single 128-bit store */
+  dp->as_u64x2 = desc.as_u64x2;
+  return len;
+}
+
+static_always_inline void
+ena_txq_copy_sqes (ena_tx_ctx_t *ctx, u32 off, ena_tx_desc_t *s, u32 n_desc)
+{
+  const u64 temp_phase_xor = (ena_tx_desc_t){ .phase = 1 }.as_u64x2[0];
+  u32 n = 0;
+
+  if (ctx->llq)
+    {
+      ena_tx_llq_desc128_t *llq_descs = (ena_tx_llq_desc128_t *) ctx->sqes;
+      for (; n < n_desc; n += 1, s += 1, off += 1)
+       {
+         ena_tx_llq_desc128_t td = {};
+         u64 t = ctx->sqe_templates[off];
+         u64x2 v = { t, 0 };
+         ctx->sqe_templates[off] = t ^ temp_phase_xor;
+         td.desc[0].as_u64x2 = v | s->as_u64x2;
+         td.desc[0].phase = 1;
+         td.desc[0].header_length = 96;
+         td.desc[0].length -= 96;
+         td.desc[0].buff_addr_lo += 96;
+         vlib_buffer_t *b =
+           vlib_get_buffer (vlib_get_main (), ctx->sq_buffer_indices[off]);
+         clib_memcpy_fast (td.data, vlib_buffer_get_current (b), 96);
+         fformat (stderr, "%U\n", format_hexdump_u32, &td, 32);
+         fformat (stderr, "%U\n", format_ena_tx_desc, &td);
+         clib_memcpy_fast (llq_descs + off, &td, 128);
+       }
+      return;
+    }
+
+#ifdef CLIB_HAVE_VEC512
+  u64x8 temp_phase_xor_x8 = u64x8_splat (temp_phase_xor);
+  for (; n + 7 < n_desc; n += 8, s += 8, off += 8)
+    {
+      u64x8 t8 = *(u64x8u *) (ctx->sqe_templates + off);
+      *(u64x8u *) (ctx->sqe_templates + off) = t8 ^ temp_phase_xor_x8;
+      u64x8 r0 = *(u64x8u *) s;
+      u64x8 r1 = *(u64x8u *) (s + 4);
+      r0 |= u64x8_shuffle2 (t8, (u64x8){}, 0, 9, 1, 11, 2, 13, 3, 15);
+      r1 |= u64x8_shuffle2 (t8, (u64x8){}, 4, 9, 5, 11, 6, 13, 7, 15);
+      *((u64x8u *) (ctx->sqes + off)) = r0;
+      *((u64x8u *) (ctx->sqes + off + 4)) = r1;
+    }
+#elif defined(CLIB_HAVE_VEC256)
+  u64x4 temp_phase_xor_x4 = u64x4_splat (temp_phase_xor);
+  for (; n + 3 < n_desc; n += 4, s += 4, off += 4)
+    {
+      u64x4 t4 = *(u64x4u *) (ctx->sqe_templates + off);
+      *(u64x4u *) (ctx->sqe_templates + off) = t4 ^ temp_phase_xor_x4;
+      u64x4 r0 = *(u64x4u *) s;
+      u64x4 r1 = *(u64x4u *) (s + 2);
+      r0 |= u64x4_shuffle2 (t4, (u64x4){}, 0, 5, 1, 7);
+      r1 |= u64x4_shuffle2 (t4, (u64x4){}, 2, 5, 3, 7);
+      *((u64x4u *) (ctx->sqes + off)) = r0;
+      *((u64x4u *) (ctx->sqes + off + 2)) = r1;
+    }
+#endif
+
+  for (; n < n_desc; n += 1, s += 1, off += 1)
+    {
+      u64 t = ctx->sqe_templates[off];
+      u64x2 v = { t, 0 };
+      ctx->sqe_templates[off] = t ^ temp_phase_xor;
+      ctx->sqes[off].as_u64x2 = v | s->as_u64x2;
+    }
+}
+
+static_always_inline u32
+ena_txq_enq_one (vlib_main_t *vm, ena_tx_ctx_t *ctx, vlib_buffer_t *b0,
+                ena_tx_desc_t *d, u16 n_free_desc, u32 *f, int use_iova)
+{
+  const ena_tx_desc_t single = { .first = 1, .last = 1 };
+  vlib_buffer_t *b;
+  u32 i, n;
+
+  /* non-chained buffer */
+  if ((b0->flags & VLIB_BUFFER_NEXT_PRESENT) == 0)
+    {
+      ctx->n_bytes += ena_txq_wr_sqe (vm, b0, use_iova, d, 1, single);
+      f[0] = ctx->from[0];
+      ctx->from += 1;
+      ctx->n_packets_left -= 1;
+      return 1;
+    }
+
+  /* count number of buffers in chain */
+  for (n = 1, b = b0; b->flags & VLIB_BUFFER_NEXT_PRESENT; n++)
+    b = vlib_get_buffer (vm, b->next_buffer);
+
+  /* if chain is too long, drop packet */
+  if (n > ENA_TX_MAX_TAIL_LEN + 1)
+    {
+      vlib_buffer_free_one (vm, ctx->from[0]);
+      ctx->from += 1;
+      ctx->n_packets_left -= 1;
+      ctx->n_dropped_chain_too_long++;
+      return 0;
+    }
+
+  /* no enough descriptors to accomodate? */
+  if (n > n_free_desc)
+    return 0;
+
+  /* first */
+  f++[0] = ctx->from[0];
+  ctx->from += 1;
+  ctx->n_packets_left -= 1;
+  ctx->n_bytes +=
+    ena_txq_wr_sqe (vm, b0, use_iova, d++, n, (ena_tx_desc_t){ .first = 1 });
+
+  /* mid */
+  for (i = 1, b = b0; i < n - 1; i++)
+    {
+      f++[0] = b->next_buffer;
+      b = vlib_get_buffer (vm, b->next_buffer);
+      ctx->n_bytes +=
+       ena_txq_wr_sqe (vm, b, use_iova, d++, 0, (ena_tx_desc_t){});
+    }
+
+  /* last */
+  f[0] = b->next_buffer;
+  b = vlib_get_buffer (vm, b->next_buffer);
+  ctx->n_bytes +=
+    ena_txq_wr_sqe (vm, b, use_iova, d, 0, (ena_tx_desc_t){ .last = 1 });
+
+  return n;
+}
+
+static_always_inline uword
+ena_txq_enq (vlib_main_t *vm, ena_tx_ctx_t *ctx, ena_txq_t *txq, int use_iova)
+{
+  vlib_buffer_t *b0, *b1, *b2, *b3;
+  u32 *f = ctx->tmp_bi;
+  ena_tx_desc_t desc[ENA_TX_ENQ_BATCH_SZ], *d = desc;
+  const ena_tx_desc_t single = { .first = 1, .last = 1 };
+  u32 n_desc_left, n;
+
+  if (ctx->n_packets_left == 0)
+    return 0;
+
+  if (ctx->n_free_slots == 0)
+    return 0;
+
+  n_desc_left = clib_min (ENA_TX_ENQ_BATCH_SZ, ctx->n_free_slots);
+
+  while (n_desc_left >= 4 && ctx->n_packets_left >= 8)
+    {
+      clib_prefetch_load (vlib_get_buffer (vm, ctx->from[4]));
+      b0 = vlib_get_buffer (vm, ctx->from[0]);
+      clib_prefetch_load (vlib_get_buffer (vm, ctx->from[5]));
+      b1 = vlib_get_buffer (vm, ctx->from[1]);
+      clib_prefetch_load (vlib_get_buffer (vm, ctx->from[6]));
+      b2 = vlib_get_buffer (vm, ctx->from[2]);
+      clib_prefetch_load (vlib_get_buffer (vm, ctx->from[7]));
+      b3 = vlib_get_buffer (vm, ctx->from[3]);
+
+      if (PREDICT_FALSE (((b0->flags | b1->flags | b2->flags | b3->flags) &
+                         VLIB_BUFFER_NEXT_PRESENT) == 0))
+       {
+         ctx->n_bytes += ena_txq_wr_sqe (vm, b0, use_iova, d++, 1, single);
+         ctx->n_bytes += ena_txq_wr_sqe (vm, b1, use_iova, d++, 1, single);
+         ctx->n_bytes += ena_txq_wr_sqe (vm, b2, use_iova, d++, 1, single);
+         ctx->n_bytes += ena_txq_wr_sqe (vm, b3, use_iova, d++, 1, single);
+         vlib_buffer_copy_indices (f, ctx->from, 4);
+         ctx->from += 4;
+         ctx->n_packets_left -= 4;
+
+         n_desc_left -= 4;
+         f += 4;
+       }
+      else
+       {
+         n = ena_txq_enq_one (vm, ctx, b0, d, n_desc_left, f, use_iova);
+         if (n == 0)
+           break;
+         n_desc_left -= n;
+         f += n;
+         d += n;
+       }
+    }
+
+  while (n_desc_left > 0 && ctx->n_packets_left > 0)
+    {
+      vlib_buffer_t *b0;
+
+      b0 = vlib_get_buffer (vm, ctx->from[0]);
+      n = ena_txq_enq_one (vm, ctx, b0, d, n_desc_left, f, use_iova);
+      if (n == 0)
+       break;
+      n_desc_left -= n;
+      f += n;
+      d += n;
+    }
+
+  n = d - desc;
+
+  if (n)
+    {
+      u32 head = txq->sq_head;
+      u32 offset = head & ctx->mask;
+      u32 n_before_wrap = ctx->n_desc - offset;
+      u32 n_copy;
+
+      d = desc;
+      f = ctx->tmp_bi;
+
+      if (n_before_wrap >= n)
+       {
+         n_copy = n;
+         vlib_buffer_copy_indices (ctx->sq_buffer_indices + offset, f,
+                                   n_copy);
+         ena_txq_copy_sqes (ctx, offset, d, n_copy);
+       }
+      else
+       {
+         n_copy = n_before_wrap;
+         vlib_buffer_copy_indices (ctx->sq_buffer_indices + offset, f,
+                                   n_copy);
+         ena_txq_copy_sqes (ctx, offset, d, n_copy);
+
+         n_copy = n - n_before_wrap;
+         vlib_buffer_copy_indices (ctx->sq_buffer_indices, f + n_before_wrap,
+                                   n_copy);
+         ena_txq_copy_sqes (ctx, 0, d + n_before_wrap, n_copy);
+       }
+
+      head += n;
+      __atomic_store_n (txq->sq_db, head, __ATOMIC_RELEASE);
+      txq->sq_head = head;
+      ctx->n_free_slots -= n;
+
+      return n;
+    }
+  return 0;
+}
+
+VNET_DEV_NODE_FN (ena_tx_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+  vnet_dev_tx_node_runtime_t *tnr = vnet_dev_get_tx_node_runtime (node);
+  vnet_dev_tx_queue_t *txq = tnr->tx_queue;
+  vnet_dev_t *dev = txq->port->dev;
+  ena_device_t *ed = vnet_dev_get_data (dev);
+  ena_txq_t *eq = vnet_dev_get_tx_queue_data (txq);
+  u32 n_pkts = 0;
+
+  ena_tx_ctx_t ctx = { .mask = txq->size - 1,
+                      .n_desc = txq->size,
+                      .n_packets_left = frame->n_vectors,
+                      .from = vlib_frame_vector_args (frame),
+                      .sqe_templates = eq->sqe_templates,
+                      .sqes = eq->sqes,
+                      .sq_buffer_indices = eq->buffer_indices,
+                      .llq = ed->llq };
+
+  vnet_dev_tx_queue_lock_if_needed (txq);
+
+  /* try 3 times to enquee packets by first freeing consumed from the ring
+   * and then trying to enqueue as much as possible */
+  for (int i = 0; i < 3; i++)
+    {
+      /* free buffers consumed by ENA */
+      if (eq->sq_head != eq->sq_tail)
+       ena_txq_deq (vm, &ctx, eq);
+
+      /* enqueue new buffers, try until last attempt enqueues 0 packets */
+      ctx.n_free_slots = ctx.n_desc - (eq->sq_head - eq->sq_tail);
+
+      if (dev->va_dma)
+       while (ena_txq_enq (vm, &ctx, eq, /* va */ 1) > 0)
+         ;
+      else
+       while (ena_txq_enq (vm, &ctx, eq, /* va */ 0) > 0)
+         ;
+
+      if (ctx.n_packets_left == 0)
+       break;
+    }
+
+  vnet_dev_tx_queue_unlock_if_needed (txq);
+
+  if (ctx.n_dropped_chain_too_long)
+    vlib_error_count (vm, node->node_index, ENA_TX_NODE_CTR_CHAIN_TOO_LONG,
+                     ctx.n_dropped_chain_too_long);
+
+  n_pkts = frame->n_vectors - ctx.n_packets_left;
+  vlib_increment_combined_counter (
+    vnet_get_main ()->interface_main.combined_sw_if_counters +
+      VNET_INTERFACE_COUNTER_TX,
+    vm->thread_index, tnr->hw_if_index, n_pkts, ctx.n_bytes);
+
+  if (ctx.n_packets_left)
+    {
+      vlib_buffer_free (vm, ctx.from, ctx.n_packets_left);
+      vlib_error_count (vm, node->node_index, ENA_TX_NODE_CTR_NO_FREE_SLOTS,
+                       ctx.n_packets_left);
+    }
+
+  return n_pkts;
+}
index 4de2dea..a747ea9 100644 (file)
@@ -236,6 +236,7 @@ STATIC_ASSERT (VLIB_BUFFER_PRE_DATA_SIZE % CLIB_CACHE_LINE_BYTES == 0,
               "VLIB_BUFFER_PRE_DATA_SIZE must be divisible by cache line size");
 
 #define VLIB_BUFFER_HDR_SIZE  (sizeof(vlib_buffer_t) - VLIB_BUFFER_PRE_DATA_SIZE)
+#define VLIB_BUFFER_INVALID_INDEX 0xffffffff
 
 /** \brief Prefetch buffer metadata.
     The first 64 bytes of buffer contains most header information
index ee3d540..b832681 100644 (file)
@@ -336,7 +336,7 @@ u32x8_scatter_one (u32x8 r, int index, void *p)
 }
 
 #define u32x8_gather_u32(base, indices, scale)                                \
-  (u32x8) _mm256_i32gather_epi32 (base, (__m256i) indices, scale)
+  (u32x8) _mm256_i32gather_epi32 ((const int *) base, (__m256i) indices, scale)
 
 #ifdef __AVX512F__
 #define u32x8_scatter_u32(base, indices, v, scale)                            \