dev: new device driver infra 88/39688/20
authorDamjan Marion <damarion@cisco.com>
Tue, 17 Oct 2023 16:06:26 +0000 (16:06 +0000)
committerDamjan Marion <damarion@cisco.com>
Thu, 2 Nov 2023 13:41:32 +0000 (13:41 +0000)
Type: feature
Change-Id: I20c56e0d3103624407f18365c2bc1273dea5c199
Signed-off-by: Damjan Marion <damarion@cisco.com>
35 files changed:
.clang-format
MAINTAINERS
src/vlib/linux/pci.c
src/vlib/pci/pci.h
src/vnet/CMakeLists.txt
src/vnet/dev/api.c [new file with mode: 0644]
src/vnet/dev/api.h [new file with mode: 0644]
src/vnet/dev/cli.c [new file with mode: 0644]
src/vnet/dev/config.c [new file with mode: 0644]
src/vnet/dev/counters.c [new file with mode: 0644]
src/vnet/dev/counters.h [new file with mode: 0644]
src/vnet/dev/dev.c [new file with mode: 0644]
src/vnet/dev/dev.h [new file with mode: 0644]
src/vnet/dev/dev_funcs.h [new file with mode: 0644]
src/vnet/dev/error.c [new file with mode: 0644]
src/vnet/dev/errors.h [new file with mode: 0644]
src/vnet/dev/format.c [new file with mode: 0644]
src/vnet/dev/handlers.c [new file with mode: 0644]
src/vnet/dev/log.h [new file with mode: 0644]
src/vnet/dev/mgmt.h [new file with mode: 0644]
src/vnet/dev/pci.c [new file with mode: 0644]
src/vnet/dev/pci.h [new file with mode: 0644]
src/vnet/dev/port.c [new file with mode: 0644]
src/vnet/dev/process.c [new file with mode: 0644]
src/vnet/dev/process.h [new file with mode: 0644]
src/vnet/dev/queue.c [new file with mode: 0644]
src/vnet/dev/runtime.c [new file with mode: 0644]
src/vnet/dev/types.h [new file with mode: 0644]
src/vnet/ethernet/p2p_ethernet.c
src/vnet/handoff.c
src/vnet/interface_funcs.h
src/vnet/l2/l2_patch.c
src/vnet/span/span.c
src/vppinfra/types.h
src/vppinfra/vec_bootstrap.h

index 0356841..62791fa 100644 (file)
@@ -21,15 +21,22 @@ ForEachMacros:
   - 'vec_foreach_pointer'
   - 'vlib_foreach_rx_tx'
   - 'foreach_int'
+  - 'foreach_pointer'
   - 'foreach_vlib_main'
   - 'foreach_set_bit_index'
   - 'foreach_vlib_frame_bitmap_set_bit_index'
   - 'FOREACH_ARRAY_ELT'
   - 'RTE_ETH_FOREACH_DEV'
+  - 'foreach_vnet_dev_rx_queue_runtime'
+  - 'foreach_vnet_dev_counter'
+  - 'foreach_vnet_dev_port_rx_queue'
+  - 'foreach_vnet_dev_port_tx_queue'
+  - 'foreach_vnet_dev_port'
 
 StatementMacros:
   - 'CLIB_MULTIARCH_FN'
   - 'VLIB_NODE_FN'
+  - 'VNET_DEV_NODE_FN'
   - 'VNET_DEVICE_CLASS_TX_FN'
   - '__clib_section'
   - '__clib_aligned'
index a67d753..fc7be1a 100644 (file)
@@ -98,6 +98,11 @@ I:   policer
 M:     Neale Ranns <neale@graphiant.com>
 F:     src/vnet/policer/
 
+VNET New Device Drivers Infra
+I:     dev
+M:     Damjan Marion <damarion@cisco.com>
+F:     src/vnet/dev/
+
 VNET Device Drivers
 I:     devices
 Y:     src/vnet/devices/pipe/FEATURE.yaml
index 1a70c56..69d26fd 100644 (file)
@@ -885,6 +885,27 @@ vlib_pci_register_intx_handler (vlib_main_t * vm, vlib_pci_dev_handle_t h,
   return 0;
 }
 
+clib_error_t *
+vlib_pci_unregister_intx_handler (vlib_main_t *vm, vlib_pci_dev_handle_t h)
+{
+  linux_pci_device_t *p = linux_pci_get_device (h);
+  linux_pci_irq_t *irq = &p->intx_irq;
+
+  if (irq->intx_handler == 0)
+    return 0;
+
+  clib_file_del_by_index (&file_main, irq->clib_file_index);
+  if (p->type == LINUX_PCI_DEVICE_TYPE_VFIO)
+    {
+      close (irq->fd);
+      irq->fd = -1;
+    }
+
+  irq->intx_handler = 0;
+
+  return 0;
+}
+
 clib_error_t *
 vlib_pci_register_msix_handler (vlib_main_t * vm, vlib_pci_dev_handle_t h,
                                u32 start, u32 count,
@@ -942,6 +963,33 @@ error:
   return err;
 }
 
+clib_error_t *
+vlib_pci_unregister_msix_handler (vlib_main_t *vm, vlib_pci_dev_handle_t h,
+                                 u32 start, u32 count)
+{
+  clib_error_t *err = 0;
+  linux_pci_device_t *p = linux_pci_get_device (h);
+  u32 i;
+
+  if (p->type != LINUX_PCI_DEVICE_TYPE_VFIO)
+    return clib_error_return (0, "vfio driver is needed for MSI-X interrupt "
+                                "support");
+
+  for (i = start; i < start + count; i++)
+    {
+      linux_pci_irq_t *irq = vec_elt_at_index (p->msix_irqs, i);
+
+      if (irq->fd != -1)
+       {
+         clib_file_del_by_index (&file_main, irq->clib_file_index);
+         close (irq->fd);
+         irq->fd = -1;
+       }
+    }
+
+  return err;
+}
+
 clib_error_t *
 vlib_pci_enable_msix_irq (vlib_main_t * vm, vlib_pci_dev_handle_t h,
                          u16 start, u16 count)
index 627833d..06a2a17 100644 (file)
@@ -240,11 +240,16 @@ clib_error_t *vlib_pci_register_intx_handler (vlib_main_t * vm,
                                              vlib_pci_dev_handle_t h,
                                              pci_intx_handler_function_t *
                                              intx_handler);
+clib_error_t *vlib_pci_unregister_intx_handler (vlib_main_t *vm,
+                                               vlib_pci_dev_handle_t h);
 clib_error_t *vlib_pci_register_msix_handler (vlib_main_t * vm,
                                              vlib_pci_dev_handle_t h,
                                              u32 start, u32 count,
                                              pci_msix_handler_function_t *
                                              msix_handler);
+clib_error_t *vlib_pci_unregister_msix_handler (vlib_main_t *vm,
+                                               vlib_pci_dev_handle_t h,
+                                               u32 start, u32 count);
 clib_error_t *vlib_pci_enable_msix_irq (vlib_main_t * vm,
                                        vlib_pci_dev_handle_t h, u16 start,
                                        u16 count);
index 5e913df..5236d7e 100644 (file)
@@ -26,6 +26,19 @@ list(APPEND VNET_SOURCES
   config.c
   devices/devices.c
   devices/netlink.c
+  dev/api.c
+  dev/cli.c
+  dev/config.c
+  dev/counters.c
+  dev/dev.c
+  dev/error.c
+  dev/format.c
+  dev/handlers.c
+  dev/pci.c
+  dev/port.c
+  dev/process.c
+  dev/queue.c
+  dev/runtime.c
   error.c
   flow/flow.c
   flow/flow_cli.c
@@ -59,6 +72,7 @@ list(APPEND VNET_HEADERS
   config.h
   devices/devices.h
   devices/netlink.h
+  dev/dev.h
   flow/flow.h
   global_funcs.h
   interface/rx_queue_funcs.h
diff --git a/src/vnet/dev/api.c b/src/vnet/dev/api.c
new file mode 100644 (file)
index 0000000..4d556c7
--- /dev/null
@@ -0,0 +1,241 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include "vppinfra/pool.h"
+#include <vnet/vnet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/counters.h>
+#include <vnet/dev/log.h>
+#include <vnet/dev/api.h>
+
+VLIB_REGISTER_LOG_CLASS (dev_log, static) = {
+  .class_name = "dev",
+  .subclass_name = "api",
+};
+
+static int
+_vnet_dev_queue_size_validate (u32 size, vnet_dev_queue_config_t c)
+{
+  if (size < c.min_size)
+    return 0;
+  if (size > c.max_size)
+    return 0;
+  if (c.size_is_power_of_two && count_set_bits (size) != 1)
+    return 0;
+  if (c.multiplier && size % c.multiplier)
+    return 0;
+
+  return 1;
+}
+
+vnet_dev_rv_t
+vnet_dev_api_attach (vlib_main_t *vm, vnet_dev_api_attach_args_t *args)
+{
+  vnet_dev_main_t *dm = &vnet_dev_main;
+  vnet_dev_t *dev = 0;
+  vnet_dev_rv_t rv = VNET_DEV_OK;
+  vnet_dev_bus_t *bus;
+  vnet_dev_driver_t *driver;
+  void *bus_dev_info = 0;
+  u8 *dev_desc = 0;
+
+  log_debug (0, "%s driver %s flags '%U' args '%v'", args->device_id,
+            args->driver_name, format_vnet_dev_flags, &args->flags,
+            args->args);
+
+  if (vnet_dev_by_id (args->device_id))
+    return VNET_DEV_ERR_ALREADY_IN_USE;
+
+  bus = vnet_dev_find_device_bus (vm, args->device_id);
+  if (!bus)
+    {
+      log_err (dev, "unknown bus");
+      rv = VNET_DEV_ERR_INVALID_BUS;
+      goto done;
+    }
+
+  bus_dev_info = vnet_dev_get_device_info (vm, args->device_id);
+  if (!bus_dev_info)
+    {
+      log_err (dev, "invalid or unsupported device id");
+      rv = VNET_DEV_ERR_INVALID_DEVICE_ID;
+      goto done;
+    }
+
+  vec_foreach (driver, dm->drivers)
+    {
+      if (args->driver_name[0] &&
+         strcmp (args->driver_name, driver->registration->name))
+       continue;
+      if (driver->ops.probe &&
+         (dev_desc = driver->ops.probe (vm, bus->index, bus_dev_info)))
+       break;
+    }
+
+  if (!dev_desc)
+    {
+      log_err (dev, "driver not available for %s", args->device_id);
+      rv = VNET_DEV_ERR_DRIVER_NOT_AVAILABLE;
+      goto done;
+    }
+
+  dev = vnet_dev_alloc (vm, args->device_id, driver);
+  if (!dev)
+    {
+      log_err (dev, "dev alloc failed for %s", args->device_id);
+      rv = VNET_DEV_ERR_BUG;
+      goto done;
+    }
+  dev->description = dev_desc;
+
+  if ((args->flags.e & VNET_DEV_F_NO_STATS) == 0)
+    dev->poll_stats = 1;
+
+  log_debug (0, "found '%v'", dev->description);
+
+  rv = vnet_dev_process_call_op (vm, dev, vnet_dev_init);
+
+done:
+  if (bus_dev_info)
+    bus->ops.free_device_info (vm, bus_dev_info);
+
+  if (rv != VNET_DEV_OK && dev)
+    vnet_dev_process_call_op_no_rv (vm, dev, vnet_dev_free);
+
+  return rv;
+}
+
+vnet_dev_rv_t
+vnet_dev_api_detach (vlib_main_t *vm, vnet_dev_api_detach_args_t *args)
+{
+  vnet_dev_t *dev = vnet_dev_by_id (args->device_id);
+
+  log_debug (dev, "detach");
+
+  if (dev)
+    return vnet_dev_process_call_op_no_rv (vm, dev, vnet_dev_detach);
+
+  return VNET_DEV_ERR_NOT_FOUND;
+}
+
+vnet_dev_rv_t
+vnet_dev_api_reset (vlib_main_t *vm, vnet_dev_api_reset_args_t *args)
+{
+  vnet_dev_t *dev = vnet_dev_by_id (args->device_id);
+
+  log_debug (dev, "detach");
+
+  if (!dev)
+    return VNET_DEV_ERR_NOT_FOUND;
+
+  if (dev->ops.reset)
+    return VNET_DEV_ERR_NOT_SUPPORTED;
+
+  return vnet_dev_process_call_op (vm, dev, vnet_dev_reset);
+}
+
+vnet_dev_rv_t
+vnet_dev_api_create_port_if (vlib_main_t *vm,
+                            vnet_dev_api_create_port_if_args_t *args)
+{
+  vnet_dev_t *dev = vnet_dev_by_id (args->device_id);
+  vnet_dev_port_t *port = 0;
+  u16 n_threads = vlib_get_n_threads ();
+
+  log_debug (dev,
+            "create_port_if: device '%s' port %u intf_name '%s' num_rx_q %u "
+            "num_tx_q %u rx_q_sz %u tx_q_sz %u, flags '%U' args '%v'",
+            args->device_id, args->port_id, args->intf_name,
+            args->num_rx_queues, args->num_tx_queues, args->rx_queue_size,
+            args->tx_queue_size, format_vnet_dev_port_flags, &args->flags,
+            args->args);
+
+  if (dev == 0)
+    return VNET_DEV_ERR_NOT_FOUND;
+
+  foreach_vnet_dev_port (p, dev)
+    if (p->port_id == args->port_id)
+      {
+       port = p;
+       break;
+      }
+
+  if (!port)
+    return VNET_DEV_ERR_INVALID_DEVICE_ID;
+
+  if (port->interface_created)
+    return VNET_DEV_ERR_ALREADY_EXISTS;
+
+  if (args->num_rx_queues)
+    {
+      if (args->num_rx_queues > port->attr.max_rx_queues)
+       return VNET_DEV_ERR_INVALID_NUM_RX_QUEUES;
+      port->intf.num_rx_queues = args->num_rx_queues;
+    }
+  else
+    port->intf.num_rx_queues = clib_min (port->attr.max_tx_queues, 1);
+
+  if (args->num_tx_queues)
+    {
+      if (args->num_tx_queues > port->attr.max_tx_queues)
+       return VNET_DEV_ERR_INVALID_NUM_TX_QUEUES;
+      port->intf.num_tx_queues = args->num_tx_queues;
+    }
+  else
+    port->intf.num_tx_queues = clib_min (port->attr.max_tx_queues, n_threads);
+
+  if (args->rx_queue_size)
+    {
+      if (!_vnet_dev_queue_size_validate (args->rx_queue_size,
+                                         port->rx_queue_config))
+       return VNET_DEV_ERR_INVALID_RX_QUEUE_SIZE;
+      port->intf.rxq_sz = args->rx_queue_size;
+    }
+  else
+    port->intf.rxq_sz = port->rx_queue_config.default_size;
+
+  if (args->tx_queue_size)
+    {
+      if (!_vnet_dev_queue_size_validate (args->tx_queue_size,
+                                         port->tx_queue_config))
+       return VNET_DEV_ERR_INVALID_TX_QUEUE_SIZE;
+      port->intf.txq_sz = args->tx_queue_size;
+    }
+  else
+    port->intf.txq_sz = port->tx_queue_config.default_size;
+
+  clib_memcpy (port->intf.name, args->intf_name, sizeof (port->intf.name));
+
+  return vnet_dev_process_call_port_op (vm, port, vnet_dev_port_if_create);
+}
+
+vnet_dev_rv_t
+vnet_dev_api_remove_port_if (vlib_main_t *vm,
+                            vnet_dev_api_remove_port_if_args_t *args)
+{
+  vnet_dev_main_t *dm = &vnet_dev_main;
+  vnet_main_t *vnm = vnet_get_main ();
+  vnet_sw_interface_t *si;
+  vnet_hw_interface_t *hi;
+  vnet_dev_port_t *port;
+
+  si = vnet_get_sw_interface_or_null (vnm, args->sw_if_index);
+  if (!si)
+    return VNET_DEV_ERR_UNKNOWN_INTERFACE;
+
+  hi = vnet_get_hw_interface_or_null (vnm, si->hw_if_index);
+  if (!hi)
+    return VNET_DEV_ERR_UNKNOWN_INTERFACE;
+
+  if (pool_is_free_index (dm->ports_by_dev_instance, hi->dev_instance))
+    return VNET_DEV_ERR_UNKNOWN_INTERFACE;
+
+  port = vnet_dev_get_port_from_dev_instance (hi->dev_instance);
+
+  if (port->intf.hw_if_index != si->hw_if_index)
+    return VNET_DEV_ERR_UNKNOWN_INTERFACE;
+
+  return vnet_dev_process_call_port_op (vm, port, vnet_dev_port_if_remove);
+}
diff --git a/src/vnet/dev/api.h b/src/vnet/dev/api.h
new file mode 100644 (file)
index 0000000..69a8462
--- /dev/null
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _VNET_DEV_API_H_
+#define _VNET_DEV_API_H_
+
+#include <vppinfra/clib.h>
+#include <vnet/vnet.h>
+#include <vnet/dev/types.h>
+
+typedef struct
+{
+  vnet_dev_device_id_t device_id;
+  vnet_dev_driver_name_t driver_name;
+  vnet_dev_flags_t flags;
+  u8 *args;
+} vnet_dev_api_attach_args_t;
+
+vnet_dev_rv_t vnet_dev_api_attach (vlib_main_t *,
+                                  vnet_dev_api_attach_args_t *);
+
+typedef struct
+{
+  vnet_dev_device_id_t device_id;
+} vnet_dev_api_detach_args_t;
+vnet_dev_rv_t vnet_dev_api_detach (vlib_main_t *,
+                                  vnet_dev_api_detach_args_t *);
+
+typedef struct
+{
+  vnet_dev_device_id_t device_id;
+} vnet_dev_api_reset_args_t;
+vnet_dev_rv_t vnet_dev_api_reset (vlib_main_t *, vnet_dev_api_reset_args_t *);
+
+typedef struct
+{
+  vnet_dev_device_id_t device_id;
+  vnet_dev_if_name_t intf_name;
+  u16 num_rx_queues;
+  u16 num_tx_queues;
+  u16 rx_queue_size;
+  u16 tx_queue_size;
+  vnet_dev_port_id_t port_id;
+  vnet_dev_port_flags_t flags;
+  u8 *args;
+} vnet_dev_api_create_port_if_args_t;
+
+vnet_dev_rv_t
+vnet_dev_api_create_port_if (vlib_main_t *,
+                            vnet_dev_api_create_port_if_args_t *);
+
+typedef struct
+{
+  u32 sw_if_index;
+} vnet_dev_api_remove_port_if_args_t;
+
+vnet_dev_rv_t
+vnet_dev_api_remove_port_if (vlib_main_t *,
+                            vnet_dev_api_remove_port_if_args_t *);
+
+#endif /* _VNET_DEV_API_H_ */
diff --git a/src/vnet/dev/cli.c b/src/vnet/dev/cli.c
new file mode 100644 (file)
index 0000000..d478f1d
--- /dev/null
@@ -0,0 +1,315 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/counters.h>
+#include <vnet/dev/api.h>
+
+static clib_error_t *
+device_attach_cmd_fn (vlib_main_t *vm, unformat_input_t *input,
+                     vlib_cli_command_t *cmd)
+{
+  vnet_dev_api_attach_args_t a = {};
+  vnet_dev_rv_t rv;
+
+  if (!unformat_user (input, unformat_c_string_array, a.device_id,
+                     sizeof (a.device_id)))
+    return clib_error_return (0, "please specify valid device id");
+
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (!a.driver_name[0] &&
+         unformat (input, "driver %U", unformat_c_string_array, a.driver_name,
+                   sizeof (a.driver_name)))
+       ;
+      else if (!a.flags.n &&
+              unformat (input, "flags %U", unformat_vnet_dev_flags, &a.flags))
+       ;
+      else if (!a.args && unformat (input, "args %v", &a.args))
+       ;
+      else
+       return clib_error_return (0, "unknown input `%U'",
+                                 format_unformat_error, input);
+    }
+
+  rv = vnet_dev_api_attach (vm, &a);
+
+  vec_free (a.args);
+
+  if (rv != VNET_DEV_OK)
+    return clib_error_return (0, "unable to attach '%s': %U", a.device_id,
+                             format_vnet_dev_rv, rv);
+
+  return 0;
+}
+
+VLIB_CLI_COMMAND (device_attach_cmd, static) = {
+  .path = "device attach",
+  .short_help = "device attach <device-id> [driver <name>] "
+               "[args <dev-args>]",
+  .function = device_attach_cmd_fn,
+};
+
+static clib_error_t *
+device_detach_cmd_fn (vlib_main_t *vm, unformat_input_t *input,
+                     vlib_cli_command_t *cmd)
+{
+  vnet_dev_api_detach_args_t a = {};
+  vnet_dev_rv_t rv;
+
+  if (!unformat_user (input, unformat_c_string_array, a.device_id,
+                     sizeof (a.device_id)))
+    return clib_error_return (0, "please specify valid device id");
+
+  rv = vnet_dev_api_detach (vm, &a);
+
+  if (rv != VNET_DEV_OK)
+    return clib_error_return (0, "unable to detach '%s': %U", a.device_id,
+                             format_vnet_dev_rv, rv);
+
+  return 0;
+}
+
+VLIB_CLI_COMMAND (device_detach_cmd, static) = {
+  .path = "device detach",
+  .short_help = "device detach <device-id>",
+  .function = device_detach_cmd_fn,
+  .is_mp_safe = 1,
+};
+
+static clib_error_t *
+device_reset_cmd_fn (vlib_main_t *vm, unformat_input_t *input,
+                    vlib_cli_command_t *cmd)
+{
+  vnet_dev_api_reset_args_t a = {};
+  vnet_dev_rv_t rv;
+
+  if (!unformat_user (input, unformat_c_string_array, a.device_id,
+                     sizeof (a.device_id)))
+    return clib_error_return (0, "please specify valid device id");
+
+  rv = vnet_dev_api_reset (vm, &a);
+
+  if (rv != VNET_DEV_OK)
+    return clib_error_return (0, "unable to reset '%s': %U", a.device_id,
+                             format_vnet_dev_rv, rv);
+
+  return 0;
+}
+
+VLIB_CLI_COMMAND (device_reset_cmd, static) = {
+  .path = "device reset",
+  .short_help = "device reset <device-id>",
+  .function = device_reset_cmd_fn,
+  .is_mp_safe = 1,
+};
+
+static clib_error_t *
+device_create_if_cmd_fn (vlib_main_t *vm, unformat_input_t *input,
+                        vlib_cli_command_t *cmd)
+{
+  vnet_dev_api_create_port_if_args_t a = {};
+  vnet_dev_rv_t rv;
+  u32 n;
+
+  if (!unformat_user (input, unformat_c_string_array, a.device_id,
+                     sizeof (a.device_id)))
+    return clib_error_return (0, "please specify valid device id");
+
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (!a.intf_name[0] &&
+         unformat (input, "if-name %U", unformat_c_string_array, a.intf_name,
+                   sizeof (a.intf_name)))
+       ;
+      else if (!a.port_id && unformat (input, "port %u", &n))
+       a.port_id = n;
+      else if (!a.flags.n && unformat (input, "flags %U",
+                                      unformat_vnet_dev_port_flags, &a.flags))
+       ;
+      else if (!a.num_rx_queues && unformat (input, "num-rx-queues %u", &n))
+       a.num_rx_queues = n;
+      else if (!a.num_tx_queues && unformat (input, "num-tx-queues %u", &n))
+       a.num_tx_queues = n;
+      else if (!a.rx_queue_size && unformat (input, "rx-queues-size %u", &n))
+       a.rx_queue_size = n;
+      else if (!a.tx_queue_size && unformat (input, "tx-queues-size %u", &n))
+       a.tx_queue_size = n;
+      else if (!a.intf_name[0] &&
+              unformat (input, "name %U", unformat_c_string_array,
+                        &a.intf_name, sizeof (a.intf_name)))
+       ;
+      else if (!a.args && unformat (input, "args %v", &a.args))
+       ;
+      else
+       return clib_error_return (0, "unknown input `%U'",
+                                 format_unformat_error, input);
+    }
+
+  rv = vnet_dev_api_create_port_if (vm, &a);
+
+  vec_free (a.args);
+
+  if (rv != VNET_DEV_OK)
+    return clib_error_return (0, "unable to create_if '%s': %U", a.device_id,
+                             format_vnet_dev_rv, rv);
+
+  return 0;
+}
+
+VLIB_CLI_COMMAND (device_create_if_cmd, static) = {
+  .path = "device create-interface",
+  .short_help = "device create-interface <device-id> [port <port-id>] "
+               "[args <iface-args>]",
+  .function = device_create_if_cmd_fn,
+  .is_mp_safe = 1,
+};
+
+static clib_error_t *
+device_remove_if_cmd_fn (vlib_main_t *vm, unformat_input_t *input,
+                        vlib_cli_command_t *cmd)
+{
+  vnet_dev_api_remove_port_if_args_t a = { .sw_if_index = ~0 };
+  vnet_main_t *vnm = vnet_get_main ();
+  vnet_dev_rv_t rv;
+
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (input, "%U", unformat_vnet_sw_interface, vnm,
+                   &a.sw_if_index))
+       ;
+      else if (unformat (input, "sw-if-index %u", &a.sw_if_index))
+       ;
+      else
+       return clib_error_return (0, "unknown input `%U'",
+                                 format_unformat_error, input);
+    }
+
+  if (a.sw_if_index == ~0)
+    return clib_error_return (0, "please specify existing interface name");
+
+  rv = vnet_dev_api_remove_port_if (vm, &a);
+
+  if (rv != VNET_DEV_OK)
+    return clib_error_return (0, "unable to remove interface: %U",
+                             format_vnet_dev_rv, rv);
+
+  return 0;
+}
+
+VLIB_CLI_COMMAND (device_remove_if_cmd, static) = {
+  .path = "device remove-interface",
+  .short_help = "device remove-interface [<interface-name> | sw-if-index <n>]",
+  .function = device_remove_if_cmd_fn,
+  .is_mp_safe = 1,
+};
+
+static clib_error_t *
+show_devices_cmd_fn (vlib_main_t *vm, unformat_input_t *input,
+                    vlib_cli_command_t *cmd)
+{
+  vnet_dev_main_t *dm = &vnet_dev_main;
+  vnet_dev_format_args_t fa = {}, *a = &fa;
+
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (input, "counters"))
+       fa.counters = 1;
+      else if (unformat (input, "all"))
+       fa.show_zero_counters = 1;
+      else if (unformat (input, "debug"))
+       fa.debug = 1;
+      else
+       return clib_error_return (0, "unknown input `%U'",
+                                 format_unformat_error, input);
+    }
+
+  pool_foreach_pointer (dev, dm->devices)
+    {
+      vlib_cli_output (vm, "device '%s':", dev->device_id);
+      vlib_cli_output (vm, "  %U", format_vnet_dev_info, a, dev);
+      foreach_vnet_dev_port (p, dev)
+       {
+         vlib_cli_output (vm, "  Port %u:", p->port_id);
+         vlib_cli_output (vm, "    %U", format_vnet_dev_port_info, a, p);
+         if (fa.counters)
+           vlib_cli_output (vm, "    %U", format_vnet_dev_counters, a,
+                            p->counter_main);
+
+         foreach_vnet_dev_port_rx_queue (q, p)
+           {
+             vlib_cli_output (vm, "    RX queue %u:", q->queue_id);
+             vlib_cli_output (vm, "      %U", format_vnet_dev_rx_queue_info,
+                              a, q);
+           }
+
+         foreach_vnet_dev_port_tx_queue (q, p)
+           {
+             vlib_cli_output (vm, "    TX queue %u:", q->queue_id);
+             vlib_cli_output (vm, "      %U", format_vnet_dev_tx_queue_info,
+                              a, q);
+           }
+       }
+    }
+  return 0;
+}
+
+VLIB_CLI_COMMAND (show_devices_cmd, static) = {
+  .path = "show device",
+  .short_help = "show device [counters]",
+  .function = show_devices_cmd_fn,
+  .is_mp_safe = 1,
+};
+
+static clib_error_t *
+show_device_counters_cmd_fn (vlib_main_t *vm, unformat_input_t *input,
+                            vlib_cli_command_t *cmd)
+{
+  vnet_dev_main_t *dm = &vnet_dev_main;
+  vnet_dev_format_args_t fa = { .counters = 1 };
+
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (input, "all"))
+       fa.show_zero_counters = 1;
+      else
+       return clib_error_return (0, "unknown input `%U'",
+                                 format_unformat_error, input);
+    }
+
+  pool_foreach_pointer (dev, dm->devices)
+    {
+      vlib_cli_output (vm, "device '%s':", dev->device_id);
+      foreach_vnet_dev_port (p, dev)
+       {
+         vlib_cli_output (vm, "    %U", format_vnet_dev_counters, &fa,
+                          p->counter_main);
+
+         foreach_vnet_dev_port_rx_queue (q, p)
+           if (q->counter_main)
+             {
+               vlib_cli_output (vm, "  RX queue %u:", q->queue_id);
+               vlib_cli_output (vm, "    %U", format_vnet_dev_counters, &fa,
+                                q->counter_main);
+             }
+
+         foreach_vnet_dev_port_tx_queue (q, p)
+           if (q->counter_main)
+             {
+               vlib_cli_output (vm, "  TX queue %u:", q->queue_id);
+               vlib_cli_output (vm, "    %U", format_vnet_dev_counters, &fa,
+                                q->counter_main);
+             }
+       }
+    }
+  return 0;
+}
+
+VLIB_CLI_COMMAND (show_device_counters_cmd, static) = {
+  .path = "show device counters",
+  .short_help = "show device counters [all]",
+  .function = show_device_counters_cmd_fn,
+  .is_mp_safe = 1,
+};
diff --git a/src/vnet/dev/config.c b/src/vnet/dev/config.c
new file mode 100644 (file)
index 0000000..c98524c
--- /dev/null
@@ -0,0 +1,182 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include "vppinfra/error.h"
+#include "vppinfra/pool.h"
+#include <vnet/vnet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/api.h>
+#include <vnet/dev/log.h>
+
+VLIB_REGISTER_LOG_CLASS (dev_log, static) = {
+  .class_name = "dev",
+  .subclass_name = "config",
+};
+
+static clib_error_t *
+vnet_dev_config_one_interface (vlib_main_t *vm, unformat_input_t *input,
+                              vnet_dev_api_create_port_if_args_t *args)
+{
+  clib_error_t *err = 0;
+
+  log_debug (0, "port %u %U", args->port_id, format_unformat_input, input);
+
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+      u32 n;
+
+      if (unformat (input, "name %U", unformat_c_string_array, args->intf_name,
+                   sizeof (args->intf_name)))
+       ;
+      else if (unformat (input, "num-rx-queues %u", &n))
+       args->num_rx_queues = n;
+      else if (unformat (input, "num-tx-queues %u", &n))
+       args->num_tx_queues = n;
+      else if (unformat (input, "rx-queue-size %u", &n))
+       args->rx_queue_size = n;
+      else if (unformat (input, "tx-queue-size %u", &n))
+       args->tx_queue_size = n;
+      else if (unformat (input, "flags %U", unformat_vnet_dev_port_flags,
+                        &args->flags))
+       ;
+      else
+       {
+         err = clib_error_return (0, "unknown input '%U'",
+                                  format_unformat_error, input);
+         break;
+       }
+    }
+  return err;
+}
+
+static clib_error_t *
+vnet_dev_config_one_device (vlib_main_t *vm, unformat_input_t *input,
+                           char *device_id)
+{
+  log_debug (0, "device %s %U", device_id, format_unformat_input, input);
+  clib_error_t *err = 0;
+  vnet_dev_api_attach_args_t args = {};
+  vnet_dev_api_create_port_if_args_t *if_args_vec = 0, *if_args;
+
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+      unformat_input_t sub_input;
+      u32 n;
+
+      if (unformat (input, "driver %U", unformat_c_string_array,
+                   args.driver_name, sizeof (args.driver_name)))
+       ;
+      else if (unformat (input, "flags %U", unformat_vnet_dev_flags,
+                        &args.flags))
+       ;
+      else if (unformat (input, "port %u %U", &n, unformat_vlib_cli_sub_input,
+                        &sub_input))
+       {
+         vnet_dev_api_create_port_if_args_t *if_args;
+         vec_add2 (if_args_vec, if_args, 1);
+         if_args->port_id = n;
+         err = vnet_dev_config_one_interface (vm, &sub_input, if_args);
+         unformat_free (&sub_input);
+         if (err)
+           break;
+       }
+      else
+       {
+         err = clib_error_return (0, "unknown input '%U'",
+                                  format_unformat_error, input);
+         break;
+       }
+    }
+
+  if (err == 0)
+    {
+      vnet_dev_rv_t rv;
+
+      clib_memcpy (args.device_id, device_id, sizeof (args.device_id));
+      rv = vnet_dev_api_attach (vm, &args);
+
+      if (rv == VNET_DEV_OK)
+       {
+         vec_foreach (if_args, if_args_vec)
+           {
+             clib_memcpy (if_args->device_id, device_id,
+                          sizeof (if_args->device_id));
+             rv = vnet_dev_api_create_port_if (vm, if_args);
+             if (rv != VNET_DEV_OK)
+               break;
+           }
+
+         if (rv != VNET_DEV_OK)
+           err = clib_error_return (0, "error: %U for device '%s'",
+                                    format_vnet_dev_rv, rv, device_id);
+       }
+    }
+
+  vec_free (if_args_vec);
+  return err;
+}
+
+uword
+dev_config_process_node_fn (vlib_main_t *vm, vlib_node_runtime_t *rt,
+                           vlib_frame_t *f)
+{
+  vnet_dev_main_t *dm = &vnet_dev_main;
+  unformat_input_t input;
+  clib_error_t *err = 0;
+
+  if (dm->startup_config == 0)
+    return 0;
+
+  unformat_init_vector (&input, dm->startup_config);
+  dm->startup_config = 0;
+
+  while (unformat_check_input (&input) != UNFORMAT_END_OF_INPUT)
+    {
+      unformat_input_t sub_input;
+      vnet_dev_device_id_t device_id;
+      if (unformat (&input, "dev %U %U", unformat_c_string_array, device_id,
+                   sizeof (device_id), unformat_vlib_cli_sub_input,
+                   &sub_input))
+       {
+         err = vnet_dev_config_one_device (vm, &sub_input, device_id);
+         unformat_free (&sub_input);
+         if (err)
+           break;
+       }
+      else
+       {
+         err = clib_error_return (0, "unknown input '%U'",
+                                  format_unformat_error, input);
+         break;
+       }
+    }
+
+  unformat_free (&input);
+
+  vlib_node_set_state (vm, rt->node_index, VLIB_NODE_STATE_DISABLED);
+  vlib_node_rename (vm, rt->node_index, "deleted-%u", rt->node_index);
+  vec_add1 (dm->free_process_node_indices, rt->node_index);
+  return 0;
+}
+
+VLIB_REGISTER_NODE (dev_config_process_node) = {
+  .function = dev_config_process_node_fn,
+  .type = VLIB_NODE_TYPE_PROCESS,
+  .name = "dev-config",
+};
+
+static clib_error_t *
+devices_config (vlib_main_t *vm, unformat_input_t *input)
+{
+  vnet_dev_main_t *dm = &vnet_dev_main;
+  uword c;
+
+  while ((c = unformat_get_input (input)) != UNFORMAT_END_OF_INPUT)
+    vec_add1 (dm->startup_config, c);
+
+  return 0;
+}
+
+VLIB_CONFIG_FUNCTION (devices_config, "devices");
diff --git a/src/vnet/dev/counters.c b/src/vnet/dev/counters.c
new file mode 100644 (file)
index 0000000..0a1e0a7
--- /dev/null
@@ -0,0 +1,132 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/counters.h>
+#include <vnet/dev/log.h>
+#include <vnet/interface/rx_queue_funcs.h>
+
+VLIB_REGISTER_LOG_CLASS (dev_log, static) = {
+  .class_name = "dev",
+  .subclass_name = "counters",
+};
+
+vnet_dev_counter_main_t *
+vnet_dev_counters_alloc (vlib_main_t *vm, vnet_dev_counter_t *counters,
+                        u16 n_counters, char *fmt, ...)
+{
+  vnet_dev_counter_t *c;
+  vnet_dev_counter_main_t *cm;
+  u32 alloc_sz;
+
+  alloc_sz = sizeof (*cm) + n_counters * sizeof (*c);
+  cm = clib_mem_alloc_aligned (alloc_sz, CLIB_CACHE_LINE_BYTES);
+  clib_memset (cm, 0, sizeof (*cm));
+  cm->n_counters = n_counters;
+
+  if (fmt && strlen (fmt))
+    {
+      va_list va;
+      va_start (va, fmt);
+      cm->desc = va_format (0, fmt, &va);
+      va_end (va);
+    }
+
+  for (u32 i = 0; i < n_counters; i++)
+    {
+      cm->counters[i] = counters[i];
+      cm->counters[i].index = i;
+    }
+
+  vec_validate_aligned (cm->counter_data, n_counters - 1,
+                       CLIB_CACHE_LINE_BYTES);
+  vec_validate_aligned (cm->counter_start, n_counters - 1,
+                       CLIB_CACHE_LINE_BYTES);
+
+  return cm;
+}
+
+void
+vnet_dev_counters_clear (vlib_main_t *vm, vnet_dev_counter_main_t *cm)
+{
+  for (int i = 0; i < cm->n_counters; i++)
+    {
+      cm->counter_start[i] = cm->counter_data[i];
+      cm->counter_data[i] = 0;
+    }
+}
+
+void
+vnet_dev_counters_free (vlib_main_t *vm, vnet_dev_counter_main_t *cm)
+{
+  vec_free (cm->desc);
+  vec_free (cm->counter_data);
+  vec_free (cm->counter_start);
+  clib_mem_free (cm);
+}
+
+u8 *
+format_vnet_dev_counter_name (u8 *s, va_list *va)
+{
+  vnet_dev_counter_t *c = va_arg (*va, vnet_dev_counter_t *);
+
+  char *std_counters[] = {
+    [VNET_DEV_CTR_TYPE_RX_BYTES] = "total bytes received",
+    [VNET_DEV_CTR_TYPE_TX_BYTES] = "total bytes transmitted",
+    [VNET_DEV_CTR_TYPE_RX_PACKETS] = "total packets received",
+    [VNET_DEV_CTR_TYPE_TX_PACKETS] = "total packets transmitted",
+    [VNET_DEV_CTR_TYPE_RX_DROPS] = "total drops received",
+    [VNET_DEV_CTR_TYPE_TX_DROPS] = "total drops transmitted",
+  };
+
+  char *directions[] = {
+    [VNET_DEV_CTR_DIR_RX] = "received",
+    [VNET_DEV_CTR_DIR_TX] = "sent",
+  };
+  char *units[] = {
+    [VNET_DEV_CTR_UNIT_BYTES] = "bytes",
+    [VNET_DEV_CTR_UNIT_PACKETS] = "packets",
+  };
+
+  if (c->type == VNET_DEV_CTR_TYPE_VENDOR)
+    {
+      s = format (s, "%s", c->name);
+
+      if (c->unit < ARRAY_LEN (units) && units[c->unit])
+       s = format (s, " %s", units[c->unit]);
+
+      if (c->dir < ARRAY_LEN (directions) && directions[c->dir])
+       s = format (s, " %s", directions[c->dir]);
+    }
+  else if (c->type < ARRAY_LEN (std_counters) && std_counters[c->type])
+    s = format (s, "%s", std_counters[c->type]);
+  else
+    ASSERT (0);
+
+  return s;
+}
+
+u8 *
+format_vnet_dev_counters (u8 *s, va_list *va)
+{
+  vnet_dev_format_args_t *a = va_arg (*va, vnet_dev_format_args_t *);
+  vnet_dev_counter_main_t *cm = va_arg (*va, vnet_dev_counter_main_t *);
+  u32 line = 0, indent = format_get_indent (s);
+
+  foreach_vnet_dev_counter (c, cm)
+    {
+      if (a->show_zero_counters == 0 && cm->counter_data[c->index] == 0)
+       continue;
+
+      if (line++)
+       s = format (s, "\n%U", format_white_space, indent);
+
+      s = format (s, "%-45U%lu", format_vnet_dev_counter_name, c,
+                 cm->counter_data[c->index]);
+    }
+
+  return s;
+}
diff --git a/src/vnet/dev/counters.h b/src/vnet/dev/counters.h
new file mode 100644 (file)
index 0000000..33d08ff
--- /dev/null
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _VNET_DEV_COUNTERS_H_
+#define _VNET_DEV_COUNTERS_H_
+
+#include <vnet/dev/dev.h>
+
+typedef enum
+{
+  VNET_DEV_CTR_DIR_NA,
+  VNET_DEV_CTR_DIR_RX,
+  VNET_DEV_CTR_DIR_TX,
+} __clib_packed vnet_dev_counter_direction_t;
+
+typedef enum
+{
+  VNET_DEV_CTR_TYPE_RX_BYTES,
+  VNET_DEV_CTR_TYPE_RX_PACKETS,
+  VNET_DEV_CTR_TYPE_RX_DROPS,
+  VNET_DEV_CTR_TYPE_TX_BYTES,
+  VNET_DEV_CTR_TYPE_TX_PACKETS,
+  VNET_DEV_CTR_TYPE_TX_DROPS,
+  VNET_DEV_CTR_TYPE_VENDOR,
+} __clib_packed vnet_dev_counter_type_t;
+
+typedef enum
+{
+  VNET_DEV_CTR_UNIT_NA,
+  VNET_DEV_CTR_UNIT_BYTES,
+  VNET_DEV_CTR_UNIT_PACKETS,
+} __clib_packed vnet_dev_counter_unit_t;
+
+typedef struct vnet_dev_counter
+{
+  char name[24];
+  uword user_data;
+  vnet_dev_counter_type_t type;
+  vnet_dev_counter_direction_t dir;
+  vnet_dev_counter_unit_t unit;
+  u16 index;
+} vnet_dev_counter_t;
+
+typedef struct vnet_dev_counter_main
+{
+  u8 *desc;
+  u64 *counter_data;
+  u64 *counter_start;
+  u16 n_counters;
+  vnet_dev_counter_t counters[];
+} vnet_dev_counter_main_t;
+
+#define VNET_DEV_CTR_RX_BYTES(p, ...)                                         \
+  {                                                                           \
+    .type = VNET_DEV_CTR_TYPE_RX_BYTES, .dir = VNET_DEV_CTR_DIR_RX,           \
+    .unit = VNET_DEV_CTR_UNIT_BYTES, .user_data = (p), __VA_ARGS__            \
+  }
+#define VNET_DEV_CTR_TX_BYTES(p, ...)                                         \
+  {                                                                           \
+    .type = VNET_DEV_CTR_TYPE_TX_BYTES, .dir = VNET_DEV_CTR_DIR_TX,           \
+    .unit = VNET_DEV_CTR_UNIT_BYTES, .user_data = (p), __VA_ARGS__            \
+  }
+#define VNET_DEV_CTR_RX_PACKETS(p, ...)                                       \
+  {                                                                           \
+    .type = VNET_DEV_CTR_TYPE_RX_PACKETS, .dir = VNET_DEV_CTR_DIR_RX,         \
+    .unit = VNET_DEV_CTR_UNIT_PACKETS, .user_data = (p), __VA_ARGS__          \
+  }
+#define VNET_DEV_CTR_TX_PACKETS(p, ...)                                       \
+  {                                                                           \
+    .type = VNET_DEV_CTR_TYPE_TX_PACKETS, .dir = VNET_DEV_CTR_DIR_TX,         \
+    .unit = VNET_DEV_CTR_UNIT_PACKETS, .user_data = (p), __VA_ARGS__          \
+  }
+#define VNET_DEV_CTR_RX_DROPS(p, ...)                                         \
+  {                                                                           \
+    .type = VNET_DEV_CTR_TYPE_RX_DROPS, .dir = VNET_DEV_CTR_DIR_RX,           \
+    .unit = VNET_DEV_CTR_UNIT_PACKETS, .user_data = (p), __VA_ARGS__          \
+  }
+#define VNET_DEV_CTR_TX_DROPS(p, ...)                                         \
+  {                                                                           \
+    .type = VNET_DEV_CTR_TYPE_TX_DROPS, .dir = VNET_DEV_CTR_DIR_TX,           \
+    .unit = VNET_DEV_CTR_UNIT_PACKETS, .user_data = (p), __VA_ARGS__          \
+  }
+#define VNET_DEV_CTR_VENDOR(p, d, u, n, ...)                                  \
+  {                                                                           \
+    .type = VNET_DEV_CTR_TYPE_VENDOR, .user_data = (p), .name = n,            \
+    .dir = VNET_DEV_CTR_DIR_##d, .unit = VNET_DEV_CTR_UNIT_##u, __VA_ARGS__   \
+  }
+
+vnet_dev_counter_main_t *vnet_dev_counters_alloc (vlib_main_t *,
+                                                 vnet_dev_counter_t *, u16,
+                                                 char *, ...);
+void vnet_dev_counters_clear (vlib_main_t *, vnet_dev_counter_main_t *);
+void vnet_dev_counters_free (vlib_main_t *, vnet_dev_counter_main_t *);
+
+format_function_t format_vnet_dev_counters;
+format_function_t format_vnet_dev_counters_all;
+
+static_always_inline vnet_dev_counter_main_t *
+vnet_dev_counter_get_main (vnet_dev_counter_t *counter)
+{
+  return (vnet_dev_counter_main_t *) ((u8 *) (counter - counter->index) -
+                                     STRUCT_OFFSET_OF (
+                                       vnet_dev_counter_main_t, counters));
+}
+
+static_always_inline void
+vnet_dev_counter_value_add (vlib_main_t *vm, vnet_dev_counter_t *counter,
+                           u64 val)
+{
+  vnet_dev_counter_main_t *cm = vnet_dev_counter_get_main (counter);
+  cm->counter_data[counter->index] += val;
+}
+
+static_always_inline void
+vnet_dev_counter_value_update (vlib_main_t *vm, vnet_dev_counter_t *counter,
+                              u64 val)
+{
+  vnet_dev_counter_main_t *cm = vnet_dev_counter_get_main (counter);
+  cm->counter_data[counter->index] = val - cm->counter_start[counter->index];
+}
+
+#define foreach_vnet_dev_counter(c, cm)                                       \
+  if (cm)                                                                     \
+    for (typeof (*(cm)->counters) *(c) = (cm)->counters;                      \
+        (c) < (cm)->counters + (cm)->n_counters; (c)++)
+
+#endif /* _VNET_DEV_COUNTERS_H_ */
diff --git a/src/vnet/dev/dev.c b/src/vnet/dev/dev.c
new file mode 100644 (file)
index 0000000..538d144
--- /dev/null
@@ -0,0 +1,456 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include "vppinfra/pool.h"
+#include <vnet/vnet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/log.h>
+#include <vnet/dev/counters.h>
+
+VLIB_REGISTER_LOG_CLASS (dev_log, static) = {
+  .class_name = "dev",
+};
+
+vnet_dev_main_t vnet_dev_main = { .next_rx_queue_thread = 1 };
+
+vnet_dev_bus_t *
+vnet_dev_find_device_bus (vlib_main_t *vm, vnet_dev_device_id_t id)
+{
+  vnet_dev_main_t *dm = &vnet_dev_main;
+  vnet_dev_bus_t *bus;
+
+  pool_foreach (bus, dm->buses)
+    {
+      int n = strlen (bus->registration->name);
+      int l = strlen (id);
+      int dl = strlen (VNET_DEV_DEVICE_ID_PREFIX_DELIMITER);
+
+      if (l <= n + dl)
+       continue;
+
+      if (strncmp (bus->registration->name, id, n))
+       continue;
+
+      if (strncmp (VNET_DEV_DEVICE_ID_PREFIX_DELIMITER, id + n, dl))
+       continue;
+
+      return bus;
+    }
+
+  return 0;
+}
+
+void *
+vnet_dev_get_device_info (vlib_main_t *vm, vnet_dev_device_id_t id)
+{
+  vnet_dev_bus_t *bus;
+
+  bus = vnet_dev_find_device_bus (vm, id);
+  if (bus == 0)
+    return 0;
+
+  return bus->ops.get_device_info (vm, id);
+}
+
+vnet_dev_t *
+vnet_dev_alloc (vlib_main_t *vm, vnet_dev_device_id_t id,
+               vnet_dev_driver_t *driver)
+{
+  vnet_dev_main_t *dm = &vnet_dev_main;
+  vnet_dev_t *dev = 0, **devp = 0;
+
+  dev = vnet_dev_alloc_with_data (sizeof (vnet_dev_t),
+                                 driver->registration->device_data_sz);
+
+  pool_get (dm->devices, devp);
+  devp[0] = dev;
+  dev->index = devp - dm->devices;
+  dev->driver_index = driver->index;
+  dev->ops = driver->registration->ops;
+  dev->bus_index = driver->bus_index;
+  clib_memcpy (dev->device_id, id, sizeof (dev->device_id));
+  hash_set (dm->device_index_by_id, dev->device_id, dev->index);
+
+  if ((vnet_dev_process_create (vm, dev)) == VNET_DEV_OK)
+    return dev;
+
+  vnet_dev_free (vm, dev);
+  return 0;
+}
+
+vnet_dev_rv_t
+vnet_dev_init (vlib_main_t *vm, vnet_dev_t *dev)
+{
+  vnet_dev_main_t *dm = &vnet_dev_main;
+  vnet_dev_bus_t *bus = pool_elt_at_index (dm->buses, dev->bus_index);
+  vnet_dev_rv_t rv;
+
+  vnet_dev_validate (vm, dev);
+
+  if ((rv = bus->ops.device_open (vm, dev)) != VNET_DEV_OK)
+    return rv;
+
+  if ((rv = dev->ops.alloc (vm, dev)) != VNET_DEV_OK)
+    {
+      log_err (dev, "device init failed [rv %d]", rv);
+      if (dev->ops.deinit)
+       dev->ops.deinit (vm, dev);
+      if (dev->ops.free)
+       dev->ops.free (vm, dev);
+      return rv;
+    }
+
+  if ((rv = dev->ops.init (vm, dev)) != VNET_DEV_OK)
+    {
+      log_err (dev, "device init failed [rv %d]", rv);
+      if (dev->ops.deinit)
+       dev->ops.deinit (vm, dev);
+      if (dev->ops.free)
+       dev->ops.free (vm, dev);
+      return rv;
+    }
+
+  dev->initialized = 1;
+  dev->not_first_init = 1;
+  return VNET_DEV_OK;
+}
+
+void
+vnet_dev_deinit (vlib_main_t *vm, vnet_dev_t *dev)
+{
+  ASSERT (dev->initialized == 1);
+  vnet_dev_bus_t *bus;
+
+  vnet_dev_validate (vm, dev);
+
+  foreach_vnet_dev_port (p, dev)
+    ASSERT (p->interface_created == 0);
+
+  if (dev->ops.deinit)
+    dev->ops.deinit (vm, dev);
+
+  bus = vnet_dev_get_bus (dev);
+  if (bus->ops.device_close)
+    bus->ops.device_close (vm, dev);
+
+  vnet_dev_process_quit (vm, dev);
+
+  dev->initialized = 0;
+}
+
+void
+vnet_dev_free (vlib_main_t *vm, vnet_dev_t *dev)
+{
+  vnet_dev_main_t *dm = &vnet_dev_main;
+
+  vnet_dev_validate (vm, dev);
+
+  ASSERT (dev->initialized == 0);
+
+  foreach_vnet_dev_port (p, dev)
+    vnet_dev_port_free (vm, p);
+
+  vec_free (dev->description);
+  pool_free (dev->ports);
+  pool_free (dev->periodic_ops);
+  hash_unset (dm->device_index_by_id, dev->device_id);
+  pool_put_index (dm->devices, dev->index);
+}
+
+vnet_dev_rv_t
+vnet_dev_reset (vlib_main_t *vm, vnet_dev_t *dev)
+{
+  vnet_dev_rv_t rv;
+
+  ASSERT (dev->initialized == 1);
+  vnet_dev_validate (vm, dev);
+
+  if (dev->ops.reset == 0)
+    return VNET_DEV_ERR_NOT_SUPPORTED;
+
+  if ((rv = dev->ops.reset (vm, dev)) != VNET_DEV_OK)
+    {
+      log_err (dev, "device reset failed [rv %d]", rv);
+      return rv;
+    }
+
+  return VNET_DEV_OK;
+}
+
+void
+vnet_dev_detach (vlib_main_t *vm, vnet_dev_t *dev)
+{
+  foreach_vnet_dev_port (p, dev)
+    if (p->interface_created)
+      vnet_dev_port_if_remove (vm, p);
+  vnet_dev_deinit (vm, dev);
+  vnet_dev_free (vm, dev);
+}
+
+vnet_dev_rv_t
+vnet_dev_dma_mem_alloc (vlib_main_t *vm, vnet_dev_t *dev, u32 size, u32 align,
+                       void **pp)
+{
+  vnet_dev_main_t *dm = &vnet_dev_main;
+  vnet_dev_bus_t *bus = pool_elt_at_index (dm->buses, dev->bus_index);
+  vnet_dev_rv_t rv;
+
+  vnet_dev_validate (vm, dev);
+
+  if (!bus->ops.dma_mem_alloc_fn)
+    return VNET_DEV_ERR_NOT_SUPPORTED;
+
+  rv = bus->ops.dma_mem_alloc_fn (vm, dev, size, align, pp);
+  if (rv == VNET_DEV_OK)
+    log_debug (dev, "%u bytes va %p dma-addr 0x%lx numa %u align %u", size,
+              *pp, vnet_dev_get_dma_addr (vm, dev, *pp), dev->numa_node,
+              align);
+  return rv;
+}
+
+void
+vnet_dev_dma_mem_free (vlib_main_t *vm, vnet_dev_t *dev, void *p)
+{
+  vnet_dev_main_t *dm = &vnet_dev_main;
+  vnet_dev_bus_t *bus = pool_elt_at_index (dm->buses, dev->bus_index);
+
+  vnet_dev_validate (vm, dev);
+
+  if (p == 0 || !bus->ops.dma_mem_free_fn)
+    return;
+
+  return bus->ops.dma_mem_free_fn (vm, dev, p);
+}
+
+clib_error_t *
+vnet_dev_admin_up_down_fn (vnet_main_t *vnm, u32 hw_if_index, u32 flags)
+{
+  vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
+  vlib_main_t *vm = vlib_get_main ();
+  vnet_dev_port_t *p = vnet_dev_get_port_from_dev_instance (hi->dev_instance);
+  vnet_dev_rv_t rv = VNET_DEV_OK;
+  u32 is_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
+
+  if (is_up && p->started == 0)
+    rv = vnet_dev_process_call_port_op (vm, p, vnet_dev_port_start);
+  else if (!is_up && p->started)
+    rv = vnet_dev_process_call_port_op_no_rv (vm, p, vnet_dev_port_stop);
+
+  if (rv != VNET_DEV_OK)
+    return clib_error_return (0, "failed to change port admin state: %U",
+                             format_vnet_dev_rv, rv);
+
+  return 0;
+}
+
+static void
+vnet_dev_feature_update_cb (u32 sw_if_index, u8 arc_index, u8 is_enable,
+                           void *cb)
+{
+  vlib_main_t *vm = vlib_get_main ();
+  vnet_main_t *vnm = vnet_get_main ();
+  vnet_feature_main_t *fm = &feature_main;
+  vnet_feature_config_main_t *cm;
+  vnet_dev_main_t *vdm = &vnet_dev_main;
+  vnet_dev_port_t *port;
+  vnet_hw_interface_t *hw;
+  u32 current_config_index = ~0;
+  u32 next_index = ~0;
+  int update_runtime = 0;
+
+  if (arc_index != vdm->eth_port_rx_feature_arc_index)
+    return;
+
+  hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
+  port = vnet_dev_get_port_from_dev_instance (hw->dev_instance);
+
+  if (port == 0 || port->intf.sw_if_index != sw_if_index)
+    return;
+
+  if (vnet_have_features (arc_index, sw_if_index))
+    {
+      cm = &fm->feature_config_mains[arc_index];
+      current_config_index =
+       vec_elt (cm->config_index_by_sw_if_index, sw_if_index);
+      vnet_get_config_data (&cm->config_main, &current_config_index,
+                           &next_index, 0);
+      if (port->intf.feature_arc == 0 ||
+         port->intf.rx_next_index != next_index ||
+         port->intf.current_config_index != current_config_index)
+       {
+         port->intf.current_config_index = current_config_index;
+         port->intf.rx_next_index = next_index;
+         port->intf.feature_arc_index = arc_index;
+         port->intf.feature_arc = 1;
+         update_runtime = 1;
+       }
+    }
+  else
+    {
+      if (port->intf.feature_arc)
+       {
+         port->intf.current_config_index = 0;
+         port->intf.rx_next_index =
+           port->intf.redirect_to_node ?
+                   port->intf.redirect_to_node_next_index :
+                   vnet_dev_default_next_index_by_port_type[port->attr.type];
+         port->intf.feature_arc_index = 0;
+         port->intf.feature_arc = 0;
+         update_runtime = 1;
+       }
+    }
+
+  if (update_runtime)
+    {
+      foreach_vnet_dev_port_rx_queue (rxq, port)
+       vnet_dev_rx_queue_rt_request (
+         vm, rxq,
+         (vnet_dev_rx_queue_rt_req_t){ .update_next_index = 1,
+                                       .update_feature_arc = 1 });
+      log_debug (port->dev, "runtime update requested due to chgange in "
+                           "feature arc configuration");
+    }
+}
+
+static int
+sort_driver_registrations (void *a0, void *a1)
+{
+  vnet_dev_driver_registration_t **r0 = a0;
+  vnet_dev_driver_registration_t **r1 = a1;
+
+  if (r0[0]->priority > r1[0]->priority)
+    return -1;
+  else if (r0[0]->priority < r1[0]->priority)
+    return 1;
+
+  return 0;
+}
+
+static clib_error_t *
+vnet_dev_main_init (vlib_main_t *vm)
+{
+  vnet_dev_main_t *dm = &vnet_dev_main;
+  vnet_dev_driver_registration_t **drv = 0;
+  u32 temp_space_sz = 0;
+
+  dm->device_index_by_id = hash_create_string (0, sizeof (uword));
+
+  for (vnet_dev_bus_registration_t *r = dm->bus_registrations; r;
+       r = r->next_registration)
+    {
+      vnet_dev_bus_t *bus;
+      pool_get_zero (dm->buses, bus);
+      bus->registration = r;
+      bus->index = bus - dm->buses;
+      bus->ops = r->ops;
+      if (!r->device_data_size ||
+         r->device_data_size > STRUCT_SIZE_OF (vnet_dev_t, bus_data))
+       return clib_error_return (
+         0, "bus device data for bus '%s' is too big not specified", r->name);
+
+      log_debug (0, "bus '%s' registered", r->name);
+    }
+
+  for (vnet_dev_driver_registration_t *r = dm->driver_registrations; r;
+       r = r->next_registration)
+    vec_add1 (drv, r);
+
+  vec_sort_with_function (drv, sort_driver_registrations);
+
+  vec_foreach_pointer (r, drv)
+    {
+      vnet_dev_driver_t *driver;
+      vnet_dev_bus_t *bus;
+      vnet_device_class_t *dev_class;
+      int bus_index = -1;
+
+      pool_foreach (bus, dm->buses)
+       {
+         if (strcmp (bus->registration->name, r->bus) == 0)
+           {
+             bus_index = bus->index;
+             break;
+           }
+       }
+
+      if (bus_index < 0)
+       return clib_error_return (0, "unknown bus '%s'", r->bus);
+
+      pool_get_zero (dm->drivers, driver);
+      driver->registration = r;
+      driver->index = driver - dm->drivers;
+      driver->bus_index = bus_index;
+      driver->ops = r->ops;
+      dev_class = clib_mem_alloc (sizeof (vnet_device_class_t));
+      *dev_class = (vnet_device_class_t){
+       .name = r->name,
+       .format_device_name = format_vnet_dev_interface_name,
+       .format_device = format_vnet_dev_interface_info,
+       .admin_up_down_function = vnet_dev_admin_up_down_fn,
+       .rx_redirect_to_node = vnet_dev_set_interface_next_node,
+       .clear_counters = vnet_dev_clear_hw_interface_counters,
+       .rx_mode_change_function = vnet_dev_rx_mode_change_fn,
+       .mac_addr_change_function = vnet_dev_port_mac_change,
+       .mac_addr_add_del_function = vnet_dev_add_del_mac_address,
+       .flow_ops_function = vnet_dev_flow_ops_fn,
+       .set_rss_queues_function = vnet_dev_interface_set_rss_queues,
+      };
+      driver->dev_class_index = vnet_register_device_class (vm, dev_class);
+      log_debug (0, "driver '%s' registered on bus '%s'", r->name,
+                bus->registration->name);
+
+      if (temp_space_sz < r->runtime_temp_space_sz)
+       temp_space_sz = r->runtime_temp_space_sz;
+    }
+
+  if (dm->startup_config)
+    log_debug (0, "startup config: %v", dm->startup_config);
+
+  vec_free (drv);
+
+  if (temp_space_sz > 0)
+    {
+      const u32 align = CLIB_CACHE_LINE_BYTES;
+      u32 sz = round_pow2 (temp_space_sz, align);
+      dm->log2_runtime_temp_space_sz =
+       get_lowest_set_bit_index (max_pow2 (sz));
+      sz = 1 << dm->log2_runtime_temp_space_sz;
+      sz *= vlib_get_n_threads ();
+      dm->runtime_temp_spaces = clib_mem_alloc_aligned (sz, align);
+      clib_memset (dm->runtime_temp_spaces, 0, sz);
+      log_debug (0,
+                "requested %u bytes for runtime temp storage, allocated %u "
+                "per thread (total %u)",
+                temp_space_sz, 1 << dm->log2_runtime_temp_space_sz, sz);
+    }
+
+  vnet_feature_register (vnet_dev_feature_update_cb, 0);
+
+  return 0;
+}
+
+VLIB_INIT_FUNCTION (vnet_dev_main_init);
+
+clib_error_t *
+vnet_dev_num_workers_change (vlib_main_t *vm)
+{
+  vnet_dev_main_t *dm = &vnet_dev_main;
+
+  if (dm->log2_runtime_temp_space_sz > 0)
+    {
+      const u32 align = CLIB_CACHE_LINE_BYTES;
+      uword sz =
+       (1ULL << dm->log2_runtime_temp_space_sz) * vlib_get_n_threads ();
+      if (dm->runtime_temp_spaces)
+       clib_mem_free (dm->runtime_temp_spaces);
+      dm->runtime_temp_spaces = clib_mem_alloc_aligned (sz, align);
+      clib_memset (dm->runtime_temp_spaces, 0, sz);
+      log_debug (0, "runtime temp storage resized to %u", sz);
+    }
+
+  return 0;
+}
+
+VLIB_NUM_WORKERS_CHANGE_FN (vnet_dev_num_workers_change);
diff --git a/src/vnet/dev/dev.h b/src/vnet/dev/dev.h
new file mode 100644 (file)
index 0000000..5c80b98
--- /dev/null
@@ -0,0 +1,701 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _VNET_DEV_H_
+#define _VNET_DEV_H_
+
+#include <vppinfra/clib.h>
+#include <vppinfra/error_bootstrap.h>
+#include <vppinfra/format.h>
+#include <vnet/vnet.h>
+#include <vnet/dev/types.h>
+
+#define VNET_DEV_DEVICE_ID_PREFIX_DELIMITER "/"
+
+#define foreach_vnet_dev_port_type                                            \
+  _ (0, UNKNOWN)                                                              \
+  _ (1, ETHERNET)
+
+typedef char vnet_dev_device_id_t[32];
+
+typedef enum
+{
+#define _(b, n) VNET_DEV_PORT_TYPE_##n = (1U << (b)),
+  foreach_vnet_dev_port_type
+#undef _
+} vnet_dev_port_type_t;
+
+#define foreach_vnet_dev_port_caps                                            \
+  _ (interrupt_mode)                                                          \
+  _ (rss)
+
+typedef union
+{
+  struct
+  {
+#define _(n) u8 n : 1;
+    foreach_vnet_dev_port_caps
+#undef _
+  };
+  u8 as_number;
+} vnet_dev_port_caps_t;
+
+typedef union
+{
+  u8 eth_mac[6];
+  u8 raw[8];
+} vnet_dev_hw_addr_t;
+
+typedef struct vnet_dev_bus_registration vnet_dev_bus_registration_t;
+typedef struct vnet_dev_driver_registration vnet_dev_driver_registration_t;
+
+typedef struct vnet_dev vnet_dev_t;
+typedef struct vnet_dev_port vnet_dev_port_t;
+typedef struct vnet_dev_rx_queue vnet_dev_rx_queue_t;
+typedef struct vnet_dev_tx_queue vnet_dev_tx_queue_t;
+typedef struct vnet_dev_bus_registration vnet_dev_bus_registration_t;
+typedef struct vnet_dev_driver_registration vnet_dev_driver_registration_t;
+typedef struct vnet_dev_counter vnet_dev_counter_t;
+typedef struct vnet_dev_counter_main vnet_dev_counter_main_t;
+typedef struct vnet_dev_port_cfg_change_req vnet_dev_port_cfg_change_req_t;
+
+typedef vnet_dev_rv_t (vnet_dev_op_t) (vlib_main_t *, vnet_dev_t *);
+typedef vnet_dev_rv_t (vnet_dev_port_op_t) (vlib_main_t *, vnet_dev_port_t *);
+typedef vnet_dev_rv_t (vnet_dev_port_cfg_change_op_t) (
+  vlib_main_t *, vnet_dev_port_t *, vnet_dev_port_cfg_change_req_t *);
+typedef vnet_dev_rv_t (vnet_dev_rx_queue_op_t) (vlib_main_t *,
+                                               vnet_dev_rx_queue_t *);
+typedef vnet_dev_rv_t (vnet_dev_tx_queue_op_t) (vlib_main_t *,
+                                               vnet_dev_tx_queue_t *);
+typedef void (vnet_dev_op_no_rv_t) (vlib_main_t *, vnet_dev_t *);
+typedef void (vnet_dev_port_op_no_rv_t) (vlib_main_t *, vnet_dev_port_t *);
+typedef void (vnet_dev_rx_queue_op_no_rv_t) (vlib_main_t *,
+                                            vnet_dev_rx_queue_t *);
+typedef void (vnet_dev_tx_queue_op_no_rv_t) (vlib_main_t *,
+                                            vnet_dev_tx_queue_t *);
+
+typedef u16 vnet_dev_queue_id_t;
+typedef u16 vnet_dev_bus_index_t;
+typedef u16 vnet_dev_driver_index_t;
+
+typedef struct
+{
+  vnet_dev_rx_queue_op_t *alloc;
+  vnet_dev_rx_queue_op_t *start;
+  vnet_dev_rx_queue_op_no_rv_t *stop;
+  vnet_dev_rx_queue_op_no_rv_t *free;
+} vnet_dev_rx_queue_ops_t;
+
+typedef struct
+{
+  vnet_dev_tx_queue_op_t *alloc;
+  vnet_dev_tx_queue_op_t *start;
+  vnet_dev_tx_queue_op_no_rv_t *stop;
+  vnet_dev_tx_queue_op_no_rv_t *free;
+} vnet_dev_tx_queue_ops_t;
+
+typedef struct
+{
+  u16 data_size;
+  u16 min_size;
+  u16 max_size;
+  u16 default_size;
+  u8 multiplier;
+  u8 size_is_power_of_two : 1;
+} vnet_dev_queue_config_t;
+
+#define foreach_vnet_dev_port_cfg_type                                        \
+  _ (PROMISC_MODE)                                                            \
+  _ (MAX_FRAME_SIZE)                                                          \
+  _ (CHANGE_PRIMARY_HW_ADDR)                                                  \
+  _ (ADD_SECONDARY_HW_ADDR)                                                   \
+  _ (REMOVE_SECONDARY_HW_ADDR)
+
+typedef enum
+{
+  VNET_DEV_PORT_CFG_UNKNOWN,
+#define _(n) VNET_DEV_PORT_CFG_##n,
+  foreach_vnet_dev_port_cfg_type
+#undef _
+} __clib_packed vnet_dev_port_cfg_type_t;
+
+typedef struct vnet_dev_port_cfg_change_req
+{
+  vnet_dev_port_cfg_type_t type;
+  u8 validated : 1;
+
+  union
+  {
+    u8 promisc : 1;
+    vnet_dev_hw_addr_t addr;
+    u16 max_frame_size;
+  };
+
+} vnet_dev_port_cfg_change_req_t;
+
+typedef struct
+{
+  vnet_dev_hw_addr_t hw_addr;
+  u16 max_rx_queues;
+  u16 max_tx_queues;
+  u16 max_supported_frame_size;
+  vnet_dev_port_type_t type;
+  vnet_dev_port_caps_t caps;
+} vnet_dev_port_attr_t;
+
+typedef enum
+{
+  VNET_DEV_PERIODIC_OP_TYPE_DEV = 1,
+  VNET_DEV_PERIODIC_OP_TYPE_PORT = 2,
+} __clib_packed vnet_dev_periodic_op_type_t;
+
+typedef struct
+{
+  f64 interval;
+  f64 last_run;
+  vnet_dev_periodic_op_type_t type;
+  union
+  {
+    vnet_dev_t *dev;
+    vnet_dev_port_t *port;
+    void *arg;
+  };
+  union
+  {
+    vnet_dev_op_no_rv_t *dev_op;
+    vnet_dev_port_op_no_rv_t *port_op;
+    void *op;
+  };
+} vnet_dev_periodic_op_t;
+
+typedef struct
+{
+  struct _vlib_node_fn_registration *registrations;
+  format_function_t *format_trace;
+  vlib_error_desc_t *error_counters;
+  u16 n_error_counters;
+} vnet_dev_node_t;
+
+typedef struct
+{
+  vnet_dev_op_t *alloc;
+  vnet_dev_op_t *init;
+  vnet_dev_op_no_rv_t *deinit;
+  vnet_dev_op_t *reset;
+  vnet_dev_op_no_rv_t *free;
+  u8 *(*probe) (vlib_main_t *, vnet_dev_bus_index_t, void *);
+  format_function_t *format_info;
+} vnet_dev_ops_t;
+
+typedef struct
+{
+  vnet_dev_port_op_t *alloc;
+  vnet_dev_port_op_t *init;
+  vnet_dev_port_cfg_change_op_t *config_change;
+  vnet_dev_port_cfg_change_op_t *config_change_validate;
+  vnet_dev_port_op_t *start;
+  vnet_dev_port_op_no_rv_t *stop;
+  vnet_dev_port_op_no_rv_t *deinit;
+  vnet_dev_port_op_no_rv_t *free;
+  format_function_t *format_status;
+} vnet_dev_port_ops_t;
+
+typedef union
+{
+  struct
+  {
+    u8 update_next_index : 1;
+    u8 update_feature_arc : 1;
+    u8 suspend_off : 1;
+    u8 suspend_on : 1;
+  };
+  u8 as_number;
+} vnet_dev_rx_queue_rt_req_t;
+
+typedef struct vnet_dev_rx_queue
+{
+  CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+  vnet_dev_port_t *port;
+  u16 rx_thread_index;
+  u16 index;
+  vnet_dev_counter_main_t *counter_main;
+  CLIB_CACHE_LINE_ALIGN_MARK (runtime0);
+  u8 enabled : 1;
+  u8 started : 1;
+  u8 suspended : 1;
+  vnet_dev_queue_id_t queue_id;
+  u16 size;
+  u16 next_index;
+  vnet_dev_rx_queue_rt_req_t runtime_request;
+  CLIB_CACHE_LINE_ALIGN_MARK (runtime1);
+  vlib_buffer_template_t buffer_template;
+  CLIB_ALIGN_MARK (private_data, 16);
+  u8 data[];
+} vnet_dev_rx_queue_t;
+
+STATIC_ASSERT_SIZEOF (vnet_dev_rx_queue_t, 3 * CLIB_CACHE_LINE_BYTES);
+
+typedef struct vnet_dev_tx_queue
+{
+  CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+  vnet_dev_port_t *port;
+  clib_bitmap_t *assigned_threads;
+  u16 index;
+  vnet_dev_counter_main_t *counter_main;
+  CLIB_CACHE_LINE_ALIGN_MARK (runtime0);
+  vnet_dev_queue_id_t queue_id;
+  u8 started : 1;
+  u8 enabled : 1;
+  u8 lock_needed : 1;
+  u8 lock;
+  u16 size;
+  CLIB_ALIGN_MARK (private_data, 16);
+  u8 data[];
+} vnet_dev_tx_queue_t;
+
+STATIC_ASSERT_SIZEOF (vnet_dev_tx_queue_t, 2 * CLIB_CACHE_LINE_BYTES);
+
+typedef struct vnet_dev_port
+{
+  CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+  vnet_dev_t *dev;
+  vnet_dev_port_id_t port_id;
+  vnet_dev_driver_index_t driver_index;
+  u8 initialized : 1;
+  u8 started : 1;
+  u8 link_up : 1;
+  u8 promisc : 1;
+  u8 interface_created : 1;
+  u8 rx_node_assigned : 1;
+  vnet_dev_counter_main_t *counter_main;
+  vnet_dev_queue_config_t rx_queue_config;
+  vnet_dev_queue_config_t tx_queue_config;
+  vnet_dev_port_attr_t attr;
+  u32 max_frame_size;
+  vnet_dev_hw_addr_t primary_hw_addr;
+  vnet_dev_hw_addr_t *secondary_hw_addr;
+  u32 index;
+  u32 speed;
+  vnet_dev_rx_queue_t **rx_queues;
+  vnet_dev_tx_queue_t **tx_queues;
+  vnet_dev_port_ops_t port_ops;
+  vnet_dev_rx_queue_ops_t rx_queue_ops;
+  vnet_dev_tx_queue_ops_t tx_queue_ops;
+  vnet_dev_node_t rx_node;
+  vnet_dev_node_t tx_node;
+
+  struct
+  {
+    vnet_dev_if_name_t name;
+    u32 dev_instance;
+    u32 rx_node_index;
+    u32 current_config_index;
+    u16 rx_next_index;
+    u16 redirect_to_node_next_index;
+    u8 feature_arc_index;
+    u8 feature_arc : 1;
+    u8 redirect_to_node : 1;
+    u32 tx_node_index;
+    u32 hw_if_index;
+    u32 sw_if_index;
+    u16 num_rx_queues;
+    u16 num_tx_queues;
+    u16 txq_sz;
+    u16 rxq_sz;
+  } intf;
+
+  CLIB_CACHE_LINE_ALIGN_MARK (data0);
+  u8 data[];
+} vnet_dev_port_t;
+
+typedef struct vnet_dev
+{
+  vnet_dev_device_id_t device_id;
+  u16 initialized : 1;
+  u16 not_first_init : 1;
+  u16 va_dma : 1;
+  u16 process_node_quit : 1;
+  u16 process_node_periodic : 1;
+  u16 poll_stats : 1;
+  u16 bus_index;
+  u8 numa_node;
+  u16 max_rx_queues;
+  u16 max_tx_queues;
+  vnet_dev_driver_index_t driver_index;
+  u32 index;
+  u32 process_node_index;
+  u8 bus_data[32] __clib_aligned (16);
+  vnet_dev_ops_t ops;
+  vnet_dev_port_t **ports;
+  vnet_dev_periodic_op_t *periodic_ops;
+  u8 *description;
+  u8 __clib_aligned (16)
+  data[];
+} vnet_dev_t;
+
+typedef struct
+{
+  u16 vendor_id, device_id;
+  char *description;
+} vnet_dev_match_t;
+
+#define VNET_DEV_MATCH(...)                                                   \
+  (vnet_dev_match_t[])                                                        \
+  {                                                                           \
+    __VA_ARGS__, {}                                                           \
+  }
+
+typedef struct
+{
+  vnet_dev_op_t *device_open;
+  vnet_dev_op_no_rv_t *device_close;
+  vnet_dev_rv_t (*dma_mem_alloc_fn) (vlib_main_t *, vnet_dev_t *, u32, u32,
+                                    void **);
+  void (*dma_mem_free_fn) (vlib_main_t *, vnet_dev_t *, void *);
+  void *(*get_device_info) (vlib_main_t *, char *);
+  void (*free_device_info) (vlib_main_t *, void *);
+  format_function_t *format_device_info;
+  format_function_t *format_device_addr;
+} vnet_dev_bus_ops_t;
+
+struct vnet_dev_bus_registration
+{
+  vnet_dev_bus_registration_t *next_registration;
+  vnet_dev_driver_name_t name;
+  u16 device_data_size;
+  vnet_dev_bus_ops_t ops;
+};
+
+struct vnet_dev_driver_registration
+{
+  vnet_dev_driver_registration_t *next_registration;
+  u8 bus_master_enable : 1;
+  vnet_dev_driver_name_t name;
+  vnet_dev_bus_name_t bus;
+  u16 device_data_sz;
+  u16 runtime_temp_space_sz;
+  vnet_dev_match_t *match;
+  int priority;
+  vnet_dev_ops_t ops;
+};
+
+typedef struct
+{
+  u32 index;
+  vnet_dev_bus_registration_t *registration;
+  vnet_dev_bus_ops_t ops;
+} vnet_dev_bus_t;
+
+typedef struct
+{
+  u32 index;
+  void *dev_data;
+  vnet_dev_driver_registration_t *registration;
+  u32 dev_class_index;
+  vnet_dev_bus_index_t bus_index;
+  vnet_dev_ops_t ops;
+} vnet_dev_driver_t;
+
+typedef struct
+{
+  vnet_dev_bus_t *buses;
+  vnet_dev_driver_t *drivers;
+  vnet_dev_t **devices;
+  vnet_dev_port_t **ports_by_dev_instance;
+  vnet_dev_bus_registration_t *bus_registrations;
+  vnet_dev_driver_registration_t *driver_registrations;
+  void *runtime_temp_spaces;
+  u32 log2_runtime_temp_space_sz;
+  u32 *free_process_node_indices;
+  u32 *free_rx_node_indices;
+  uword *device_index_by_id;
+
+  u8 *startup_config;
+  u16 next_rx_queue_thread;
+  u8 eth_port_rx_feature_arc_index;
+} vnet_dev_main_t;
+
+extern vnet_dev_main_t vnet_dev_main;
+
+typedef struct
+{
+  struct
+  {
+    vnet_dev_port_attr_t attr;
+    vnet_dev_port_ops_t ops;
+    u16 data_size;
+    void *initial_data;
+  } port;
+
+  vnet_dev_node_t *rx_node;
+  vnet_dev_node_t *tx_node;
+
+  struct
+  {
+    vnet_dev_queue_config_t config;
+    vnet_dev_rx_queue_ops_t ops;
+  } rx_queue;
+
+  struct
+  {
+    vnet_dev_queue_config_t config;
+    vnet_dev_tx_queue_ops_t ops;
+  } tx_queue;
+} vnet_dev_port_add_args_t;
+
+typedef struct
+{
+  union
+  {
+    struct
+    {
+      u8 link_speed : 1;
+      u8 link_state : 1;
+      u8 link_duplex : 1;
+    };
+    u8 any;
+  } change;
+  u8 link_state : 1;
+  u8 full_duplex : 1;
+  u32 link_speed;
+} vnet_dev_port_state_changes_t;
+
+/* dev.c */
+vnet_dev_t *vnet_dev_alloc (vlib_main_t *, vnet_dev_device_id_t,
+                           vnet_dev_driver_t *);
+void vnet_dev_free (vlib_main_t *, vnet_dev_t *);
+vnet_dev_rv_t vnet_dev_init (vlib_main_t *, vnet_dev_t *);
+void vnet_dev_deinit (vlib_main_t *, vnet_dev_t *);
+vnet_dev_rv_t vnet_dev_reset (vlib_main_t *, vnet_dev_t *);
+void vnet_dev_detach (vlib_main_t *, vnet_dev_t *);
+vnet_dev_rv_t vnet_dev_port_add (vlib_main_t *, vnet_dev_t *,
+                                vnet_dev_port_id_t,
+                                vnet_dev_port_add_args_t *);
+vnet_dev_rv_t vnet_dev_dma_mem_alloc (vlib_main_t *, vnet_dev_t *, u32, u32,
+                                     void **);
+void vnet_dev_dma_mem_free (vlib_main_t *, vnet_dev_t *, void *);
+vnet_dev_bus_t *vnet_dev_find_device_bus (vlib_main_t *, vnet_dev_device_id_t);
+void *vnet_dev_get_device_info (vlib_main_t *, vnet_dev_device_id_t);
+
+/* error.c */
+clib_error_t *vnet_dev_port_err (vlib_main_t *, vnet_dev_port_t *,
+                                vnet_dev_rv_t, char *, ...);
+
+/* handlers.c */
+clib_error_t *vnet_dev_port_set_max_frame_size (vnet_main_t *,
+                                               vnet_hw_interface_t *, u32);
+u32 vnet_dev_port_eth_flag_change (vnet_main_t *, vnet_hw_interface_t *, u32);
+clib_error_t *vnet_dev_port_mac_change (vnet_hw_interface_t *, const u8 *,
+                                       const u8 *);
+clib_error_t *vnet_dev_add_del_mac_address (vnet_hw_interface_t *, const u8 *,
+                                           u8);
+int vnet_dev_flow_ops_fn (vnet_main_t *, vnet_flow_dev_op_t, u32, u32,
+                         uword *);
+clib_error_t *vnet_dev_interface_set_rss_queues (vnet_main_t *,
+                                                vnet_hw_interface_t *,
+                                                clib_bitmap_t *);
+void vnet_dev_clear_hw_interface_counters (u32);
+clib_error_t *vnet_dev_rx_mode_change_fn (vnet_main_t *, u32, u32,
+                                         vnet_hw_if_rx_mode);
+void vnet_dev_set_interface_next_node (vnet_main_t *, u32, u32);
+
+/* port.c */
+vnet_dev_rv_t vnet_dev_port_start (vlib_main_t *, vnet_dev_port_t *);
+vnet_dev_rv_t vnet_dev_port_start_all_rx_queues (vlib_main_t *,
+                                                vnet_dev_port_t *);
+vnet_dev_rv_t vnet_dev_port_start_all_tx_queues (vlib_main_t *,
+                                                vnet_dev_port_t *);
+void vnet_dev_port_stop (vlib_main_t *, vnet_dev_port_t *);
+void vnet_dev_port_deinit (vlib_main_t *, vnet_dev_port_t *);
+void vnet_dev_port_free (vlib_main_t *, vnet_dev_port_t *);
+void vnet_dev_port_add_counters (vlib_main_t *, vnet_dev_port_t *,
+                                vnet_dev_counter_t *, u16);
+void vnet_dev_port_free_counters (vlib_main_t *, vnet_dev_port_t *);
+void vnet_dev_port_update_tx_node_runtime (vlib_main_t *, vnet_dev_port_t *);
+void vnet_dev_port_state_change (vlib_main_t *, vnet_dev_port_t *,
+                                vnet_dev_port_state_changes_t);
+void vnet_dev_port_clear_counters (vlib_main_t *, vnet_dev_port_t *);
+vnet_dev_rv_t
+vnet_dev_port_cfg_change_req_validate (vlib_main_t *, vnet_dev_port_t *,
+                                      vnet_dev_port_cfg_change_req_t *);
+vnet_dev_rv_t vnet_dev_port_cfg_change (vlib_main_t *, vnet_dev_port_t *,
+                                       vnet_dev_port_cfg_change_req_t *);
+vnet_dev_rv_t vnet_dev_port_if_create (vlib_main_t *, vnet_dev_port_t *);
+vnet_dev_rv_t vnet_dev_port_if_remove (vlib_main_t *, vnet_dev_port_t *);
+
+/* queue.c */
+vnet_dev_rv_t vnet_dev_rx_queue_alloc (vlib_main_t *, vnet_dev_port_t *, u16);
+vnet_dev_rv_t vnet_dev_tx_queue_alloc (vlib_main_t *, vnet_dev_port_t *, u16);
+void vnet_dev_rx_queue_free (vlib_main_t *, vnet_dev_rx_queue_t *);
+void vnet_dev_tx_queue_free (vlib_main_t *, vnet_dev_tx_queue_t *);
+void vnet_dev_rx_queue_add_counters (vlib_main_t *, vnet_dev_rx_queue_t *,
+                                    vnet_dev_counter_t *, u16);
+void vnet_dev_rx_queue_free_counters (vlib_main_t *, vnet_dev_rx_queue_t *);
+void vnet_dev_tx_queue_add_counters (vlib_main_t *, vnet_dev_tx_queue_t *,
+                                    vnet_dev_counter_t *, u16);
+void vnet_dev_tx_queue_free_counters (vlib_main_t *, vnet_dev_tx_queue_t *);
+vnet_dev_rv_t vnet_dev_rx_queue_start (vlib_main_t *, vnet_dev_rx_queue_t *);
+vnet_dev_rv_t vnet_dev_tx_queue_start (vlib_main_t *, vnet_dev_tx_queue_t *);
+void vnet_dev_rx_queue_stop (vlib_main_t *, vnet_dev_rx_queue_t *);
+void vnet_dev_tx_queue_stop (vlib_main_t *, vnet_dev_tx_queue_t *);
+
+/* process.c */
+vnet_dev_rv_t vnet_dev_process_create (vlib_main_t *, vnet_dev_t *);
+vnet_dev_rv_t vnet_dev_process_call_op (vlib_main_t *, vnet_dev_t *,
+                                       vnet_dev_op_t *);
+vnet_dev_rv_t vnet_dev_process_call_op_no_rv (vlib_main_t *, vnet_dev_t *,
+                                             vnet_dev_op_no_rv_t *);
+void vnet_dev_process_call_op_no_wait (vlib_main_t *, vnet_dev_t *,
+                                      vnet_dev_op_no_rv_t *);
+vnet_dev_rv_t vnet_dev_process_call_port_op (vlib_main_t *, vnet_dev_port_t *,
+                                            vnet_dev_port_op_t *);
+vnet_dev_rv_t vnet_dev_process_call_port_op_no_rv (vlib_main_t *vm,
+                                                  vnet_dev_port_t *,
+                                                  vnet_dev_port_op_no_rv_t *);
+void vnet_dev_process_call_port_op_no_wait (vlib_main_t *, vnet_dev_port_t *,
+                                           vnet_dev_port_op_no_rv_t *);
+vnet_dev_rv_t
+vnet_dev_process_port_cfg_change_req (vlib_main_t *, vnet_dev_port_t *,
+                                     vnet_dev_port_cfg_change_req_t *);
+void vnet_dev_process_quit (vlib_main_t *, vnet_dev_t *);
+void vnet_dev_poll_dev_add (vlib_main_t *, vnet_dev_t *, f64,
+                           vnet_dev_op_no_rv_t *);
+void vnet_dev_poll_dev_remove (vlib_main_t *, vnet_dev_t *,
+                              vnet_dev_op_no_rv_t *);
+void vnet_dev_poll_port_add (vlib_main_t *, vnet_dev_port_t *, f64,
+                            vnet_dev_port_op_no_rv_t *);
+void vnet_dev_poll_port_remove (vlib_main_t *, vnet_dev_port_t *,
+                               vnet_dev_port_op_no_rv_t *);
+
+/* runtime.c */
+typedef enum
+{
+  VNET_DEV_RT_OP_TYPE_UNKNOWN,
+  VNET_DEV_RT_OP_TYPE_RX_QUEUE,
+} __clib_packed vnet_dev_rt_op_type_t;
+
+typedef enum
+{
+  VNET_DEV_RT_OP_ACTION_UNKNOWN,
+  VNET_DEV_RT_OP_ACTION_START,
+  VNET_DEV_RT_OP_ACTION_STOP,
+} __clib_packed vnet_dev_rt_op_action_t;
+
+typedef struct
+{
+  u16 thread_index;
+  u8 type : 4;
+  u8 action : 4;
+  u8 completed;
+  vnet_dev_rx_queue_t *rx_queue;
+} vnet_dev_rt_op_t;
+
+vnet_dev_rv_t vnet_dev_rt_exec_ops (vlib_main_t *, vnet_dev_t *,
+                                   vnet_dev_rt_op_t *, u32);
+
+/* format.c */
+typedef struct
+{
+  u8 counters : 1;
+  u8 show_zero_counters : 1;
+  u8 debug : 1;
+} vnet_dev_format_args_t;
+
+format_function_t format_vnet_dev_addr;
+format_function_t format_vnet_dev_hw_addr;
+format_function_t format_vnet_dev_info;
+format_function_t format_vnet_dev_interface_info;
+format_function_t format_vnet_dev_interface_name;
+format_function_t format_vnet_dev_port_info;
+format_function_t format_vnet_dev_rv;
+format_function_t format_vnet_dev_rx_queue_info;
+format_function_t format_vnet_dev_tx_queue_info;
+format_function_t format_vnet_dev_flags;
+format_function_t format_vnet_dev_port_flags;
+unformat_function_t unformat_vnet_dev_flags;
+unformat_function_t unformat_vnet_dev_port_flags;
+
+typedef struct
+{
+  u8 n_rx_queues;
+  vnet_dev_rx_queue_t *rx_queues[4];
+} vnet_dev_rx_node_runtime_t;
+
+STATIC_ASSERT (sizeof (vnet_dev_rx_node_runtime_t) <=
+                VLIB_NODE_RUNTIME_DATA_SIZE,
+              "must fit into runtime data");
+
+#define foreach_vnet_dev_port_rx_next                                         \
+  _ (ETH_INPUT, "ethernet-input")                                             \
+  _ (DROP, "error-drop")
+
+typedef enum
+{
+#define _(n, s) VNET_DEV_ETH_RX_PORT_NEXT_##n,
+  foreach_vnet_dev_port_rx_next
+#undef _
+    VNET_DEV_ETH_RX_PORT_N_NEXTS
+} vnet_dev_eth_port_rx_next_t;
+
+extern u16 vnet_dev_default_next_index_by_port_type[];
+extern vlib_node_registration_t port_rx_eth_node;
+
+typedef vnet_interface_output_runtime_t vnet_dev_tx_node_runtime_t;
+
+STATIC_ASSERT (sizeof (vnet_dev_tx_node_runtime_t) <=
+                VLIB_NODE_RUNTIME_DATA_SIZE,
+              "must fit into runtime data");
+
+#define VNET_DEV_REGISTER_BUS(x, ...)                                         \
+  __VA_ARGS__ vnet_dev_bus_registration_t __vnet_dev_bus_registration_##x;    \
+  static void __clib_constructor __vnet_dev_bus_registration_fn_##x (void)    \
+  {                                                                           \
+    vnet_dev_main_t *dm = &vnet_dev_main;                                     \
+    __vnet_dev_bus_registration_##x.next_registration =                       \
+      dm->bus_registrations;                                                  \
+    dm->bus_registrations = &__vnet_dev_bus_registration_##x;                 \
+  }                                                                           \
+  __VA_ARGS__ vnet_dev_bus_registration_t __vnet_dev_bus_registration_##x
+
+#define VNET_DEV_REGISTER_DRIVER(x, ...)                                      \
+  __VA_ARGS__ vnet_dev_driver_registration_t                                  \
+    __vnet_dev_driver_registration_##x;                                       \
+  static void __clib_constructor __vnet_dev_driver_registration_fn_##x (void) \
+  {                                                                           \
+    vnet_dev_main_t *dm = &vnet_dev_main;                                     \
+    __vnet_dev_driver_registration_##x.next_registration =                    \
+      dm->driver_registrations;                                               \
+    dm->driver_registrations = &__vnet_dev_driver_registration_##x;           \
+  }                                                                           \
+  __VA_ARGS__ vnet_dev_driver_registration_t __vnet_dev_driver_registration_##x
+
+#define VNET_DEV_NODE_FN(node)                                                \
+  uword CLIB_MARCH_SFX (node##_fn) (vlib_main_t *, vlib_node_runtime_t *,     \
+                                   vlib_frame_t *);                          \
+  static vlib_node_fn_registration_t CLIB_MARCH_SFX (                         \
+    node##_fn_registration) = {                                               \
+    .function = &CLIB_MARCH_SFX (node##_fn),                                  \
+  };                                                                          \
+                                                                              \
+  static void __clib_constructor CLIB_MARCH_SFX (                             \
+    node##_fn_multiarch_register) (void)                                      \
+  {                                                                           \
+    extern vnet_dev_node_t node;                                              \
+    vlib_node_fn_registration_t *r;                                           \
+    r = &CLIB_MARCH_SFX (node##_fn_registration);                             \
+    r->march_variant = CLIB_MARCH_SFX (CLIB_MARCH_VARIANT_TYPE);              \
+    r->next_registration = (node).registrations;                              \
+    (node).registrations = r;                                                 \
+  }                                                                           \
+  uword CLIB_MARCH_SFX (node##_fn)
+
+#define foreach_vnet_dev_port(p, d) pool_foreach_pointer (p, d->ports)
+#define foreach_vnet_dev_port_rx_queue(q, p)                                  \
+  pool_foreach_pointer (q, p->rx_queues)
+#define foreach_vnet_dev_port_tx_queue(q, p)                                  \
+  pool_foreach_pointer (q, p->tx_queues)
+
+#include <vnet/dev/dev_funcs.h>
+
+#endif /* _VNET_DEV_H_ */
diff --git a/src/vnet/dev/dev_funcs.h b/src/vnet/dev/dev_funcs.h
new file mode 100644 (file)
index 0000000..892cef4
--- /dev/null
@@ -0,0 +1,251 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _VNET_DEV_FUNCS_H_
+#define _VNET_DEV_FUNCS_H_
+
+#include <vppinfra/clib.h>
+#include <vnet/dev/dev.h>
+
+static_always_inline void *
+vnet_dev_get_data (vnet_dev_t *dev)
+{
+  return dev->data;
+}
+
+static_always_inline vnet_dev_t *
+vnet_dev_from_data (void *p)
+{
+  return (void *) ((u8 *) p - STRUCT_OFFSET_OF (vnet_dev_t, data));
+}
+
+static_always_inline void *
+vnet_dev_get_port_data (vnet_dev_port_t *port)
+{
+  return port->data;
+}
+
+static_always_inline void *
+vnet_dev_get_rx_queue_data (vnet_dev_rx_queue_t *rxq)
+{
+  return rxq->data;
+}
+
+static_always_inline void *
+vnet_dev_get_tx_queue_data (vnet_dev_tx_queue_t *txq)
+{
+  return txq->data;
+}
+
+static_always_inline vnet_dev_t *
+vnet_dev_get_by_index (u32 index)
+{
+  vnet_dev_main_t *dm = &vnet_dev_main;
+  return pool_elt_at_index (dm->devices, index)[0];
+}
+
+static_always_inline vnet_dev_port_t *
+vnet_dev_get_port_by_index (vnet_dev_t *dev, u32 index)
+{
+  return pool_elt_at_index (dev->ports, index)[0];
+}
+
+static_always_inline vnet_dev_port_t *
+vnet_dev_get_port_from_dev_instance (u32 dev_instance)
+{
+  vnet_dev_main_t *dm = &vnet_dev_main;
+  if (pool_is_free_index (dm->ports_by_dev_instance, dev_instance))
+    return 0;
+  return pool_elt_at_index (dm->ports_by_dev_instance, dev_instance)[0];
+}
+
+static_always_inline vnet_dev_t *
+vnet_dev_by_id (char *id)
+{
+  vnet_dev_main_t *dm = &vnet_dev_main;
+  uword *p = hash_get (dm->device_index_by_id, id);
+  if (p)
+    return *pool_elt_at_index (dm->devices, p[0]);
+  return 0;
+}
+
+static_always_inline uword
+vnet_dev_get_dma_addr (vlib_main_t *vm, vnet_dev_t *dev, void *p)
+{
+  return dev->va_dma ? pointer_to_uword (p) : vlib_physmem_get_pa (vm, p);
+}
+
+static_always_inline void *
+vnet_dev_get_bus_data (vnet_dev_t *dev)
+{
+  return (void *) dev->bus_data;
+}
+
+static_always_inline vnet_dev_bus_t *
+vnet_dev_get_bus (vnet_dev_t *dev)
+{
+  vnet_dev_main_t *dm = &vnet_dev_main;
+  return pool_elt_at_index (dm->buses, dev->bus_index);
+}
+
+static_always_inline void
+vnet_dev_validate (vlib_main_t *vm, vnet_dev_t *dev)
+{
+  ASSERT (dev->process_node_index == vlib_get_current_process_node_index (vm));
+  ASSERT (vm->thread_index == 0);
+}
+
+static_always_inline void
+vnet_dev_port_validate (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+  ASSERT (port->dev->process_node_index ==
+         vlib_get_current_process_node_index (vm));
+  ASSERT (vm->thread_index == 0);
+}
+
+static_always_inline u32
+vnet_dev_port_get_sw_if_index (vnet_dev_port_t *port)
+{
+  return port->intf.sw_if_index;
+}
+
+static_always_inline vnet_dev_port_t *
+vnet_dev_get_port_by_id (vnet_dev_t *dev, vnet_dev_port_id_t port_id)
+{
+  foreach_vnet_dev_port (p, dev)
+    if (p->port_id == port_id)
+      return p;
+  return 0;
+}
+
+static_always_inline void *
+vnet_dev_alloc_with_data (u32 sz, u32 data_sz)
+{
+  void *p;
+  sz += data_sz;
+  sz = round_pow2 (sz, CLIB_CACHE_LINE_BYTES);
+  p = clib_mem_alloc_aligned (sz, CLIB_CACHE_LINE_BYTES);
+  clib_memset (p, 0, sz);
+  return p;
+}
+
+static_always_inline void
+vnet_dev_tx_queue_lock_if_needed (vnet_dev_tx_queue_t *txq)
+{
+  u8 free = 0;
+
+  if (!txq->lock_needed)
+    return;
+
+  while (!__atomic_compare_exchange_n (&txq->lock, &free, 1, 0,
+                                      __ATOMIC_ACQUIRE, __ATOMIC_RELAXED))
+    {
+      while (__atomic_load_n (&txq->lock, __ATOMIC_RELAXED))
+       CLIB_PAUSE ();
+      free = 0;
+    }
+}
+
+static_always_inline void
+vnet_dev_tx_queue_unlock_if_needed (vnet_dev_tx_queue_t *txq)
+{
+  if (!txq->lock_needed)
+    return;
+  __atomic_store_n (&txq->lock, 0, __ATOMIC_RELEASE);
+}
+
+static_always_inline u8
+vnet_dev_get_rx_queue_buffer_pool_index (vnet_dev_rx_queue_t *rxq)
+{
+  return rxq->buffer_template.buffer_pool_index;
+}
+
+static_always_inline void
+vnet_dev_rx_queue_rt_request (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq,
+                             vnet_dev_rx_queue_rt_req_t req)
+{
+  __atomic_fetch_or (&rxq->runtime_request.as_number, req.as_number,
+                    __ATOMIC_RELEASE);
+}
+
+static_always_inline vnet_dev_rx_node_runtime_t *
+vnet_dev_get_rx_node_runtime (vlib_node_runtime_t *node)
+{
+  return (void *) node->runtime_data;
+}
+
+static_always_inline vnet_dev_tx_node_runtime_t *
+vnet_dev_get_tx_node_runtime (vlib_node_runtime_t *node)
+{
+  return (void *) node->runtime_data;
+}
+
+static_always_inline vnet_dev_rx_queue_t **
+foreach_vnet_dev_rx_queue_runtime_helper (vlib_node_runtime_t *node)
+{
+  vnet_dev_rx_node_runtime_t *rt = vnet_dev_get_rx_node_runtime (node);
+  return rt->rx_queues;
+}
+
+static_always_inline int
+vnet_dev_rx_queue_runtime_update (vnet_dev_rx_queue_t *rxq)
+{
+  vnet_dev_port_t *port;
+  vnet_dev_rx_queue_rt_req_t req;
+  int rv = 1;
+
+  if (PREDICT_TRUE (rxq->runtime_request.as_number == 0))
+    return 1;
+
+  req.as_number =
+    __atomic_exchange_n (&rxq->runtime_request.as_number, 0, __ATOMIC_ACQUIRE);
+
+  port = rxq->port;
+  if (req.update_next_index)
+    rxq->next_index = port->intf.rx_next_index;
+
+  if (req.update_feature_arc)
+    {
+      vlib_buffer_template_t *bt = &rxq->buffer_template;
+      bt->current_config_index = port->intf.current_config_index;
+      vnet_buffer (bt)->feature_arc_index = port->intf.feature_arc_index;
+    }
+
+  if (req.suspend_on)
+    {
+      rxq->suspended = 1;
+      rv = 0;
+    }
+
+  if (req.suspend_off)
+    rxq->suspended = 0;
+
+  return rv;
+}
+
+static_always_inline void *
+vnet_dev_get_rt_temp_space (vlib_main_t *vm)
+{
+  return vnet_dev_main.runtime_temp_spaces +
+        ((uword) vm->thread_index
+         << vnet_dev_main.log2_runtime_temp_space_sz);
+}
+
+static_always_inline void
+vnet_dev_set_hw_addr_eth_mac (vnet_dev_hw_addr_t *addr, const u8 *eth_mac_addr)
+{
+  vnet_dev_hw_addr_t ha = {};
+  clib_memcpy_fast (&ha.eth_mac, eth_mac_addr, sizeof (ha.eth_mac));
+  *addr = ha;
+}
+
+#define foreach_vnet_dev_rx_queue_runtime(q, node)                            \
+  for (vnet_dev_rx_queue_t *                                                  \
+        *__qp = foreach_vnet_dev_rx_queue_runtime_helper (node),             \
+       **__last = __qp + (vnet_dev_get_rx_node_runtime (node))->n_rx_queues, \
+       *(q) = *__qp;                                                         \
+       __qp < __last; __qp++, (q) = *__qp)                                    \
+    if (vnet_dev_rx_queue_runtime_update (q))
+
+#endif /* _VNET_DEV_FUNCS_H_ */
diff --git a/src/vnet/dev/error.c b/src/vnet/dev/error.c
new file mode 100644 (file)
index 0000000..df9c6d3
--- /dev/null
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/counters.h>
+
+clib_error_t *
+vnet_dev_port_err (vlib_main_t *vm, vnet_dev_port_t *port, vnet_dev_rv_t rv,
+                  char *fmt, ...)
+{
+  clib_error_t *err;
+  va_list va;
+  u8 *s;
+
+  if (rv == VNET_DEV_OK)
+    return 0;
+
+  va_start (va, fmt);
+  s = va_format (0, fmt, &va);
+  va_end (va);
+
+  err = clib_error_return (0, "%s port %u: %U (%v)", port->dev->device_id,
+                          port->port_id, format_vnet_dev_rv, rv, s);
+  vec_free (s);
+  return err;
+}
diff --git a/src/vnet/dev/errors.h b/src/vnet/dev/errors.h
new file mode 100644 (file)
index 0000000..2256e1e
--- /dev/null
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _VNET_DEV_ERRORS_H_
+#define _VNET_DEV_ERRORS_H_
+
+#define foreach_vnet_dev_rv_type                                              \
+  _ (ALREADY_EXISTS, "already exists")                                        \
+  _ (ALREADY_IN_USE, "already in use")                                        \
+  _ (BUFFER_ALLOC_FAIL, "packet buffer allocation failure")                   \
+  _ (BUG, "bug")                                                              \
+  _ (BUS, "bus error")                                                        \
+  _ (DEVICE_NO_REPLY, "no reply from device")                                 \
+  _ (DMA_MEM_ALLOC_FAIL, "DMA memory allocation error")                       \
+  _ (DRIVER_NOT_AVAILABLE, "driver not available")                            \
+  _ (INVALID_BUS, "invalid bus")                                              \
+  _ (INVALID_DATA, "invalid data")                                            \
+  _ (INVALID_DEVICE_ID, "invalid device id")                                  \
+  _ (INVALID_NUM_RX_QUEUES, "invalid number of rx queues")                    \
+  _ (INVALID_NUM_TX_QUEUES, "invalid number of tx queues")                    \
+  _ (INVALID_PORT_ID, "invalid port id")                                      \
+  _ (INVALID_RX_QUEUE_SIZE, "invalid rx queue size")                          \
+  _ (INVALID_TX_QUEUE_SIZE, "invalid tx queue size")                          \
+  _ (INVALID_VALUE, "invalid value")                                          \
+  _ (INTERNAL, "internal error")                                              \
+  _ (NOT_FOUND, "not found")                                                  \
+  _ (NOT_READY, "not ready")                                                  \
+  _ (NOT_SUPPORTED, "not supported")                                          \
+  _ (NO_CHANGE, "no change")                                                  \
+  _ (NO_AVAIL_QUEUES, "no queues available")                                  \
+  _ (NO_SUCH_ENTRY, "no such enty")                                           \
+  _ (PORT_STARTED, "port started")                                            \
+  _ (PROCESS_REPLY, "dev process reply error")                                \
+  _ (RESOURCE_NOT_AVAILABLE, "resource not available")                        \
+  _ (TIMEOUT, "timeout")                                                      \
+  _ (UNKNOWN_INTERFACE, "unknown interface")                                  \
+  _ (UNSUPPORTED_CONFIG, "unsupported config")                                \
+  _ (UNSUPPORTED_DEVICE, "unsupported device")                                \
+  _ (UNSUPPORTED_DEVICE_VER, "unsupported device version")
+
+#endif /* _VNET_DEV_ERRORS_H_ */
diff --git a/src/vnet/dev/format.c b/src/vnet/dev/format.c
new file mode 100644 (file)
index 0000000..4e1ece6
--- /dev/null
@@ -0,0 +1,405 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include "vlib/pci/pci.h"
+#include "vnet/dev/counters.h"
+#include "vppinfra/error.h"
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/counters.h>
+#include <vnet/ethernet/ethernet.h>
+
+u8 *
+format_vnet_dev_rv (u8 *s, va_list *args)
+{
+  vnet_dev_rv_t rv = va_arg (*args, vnet_dev_rv_t);
+  u32 index = -rv;
+
+  char *strings[] = { [0] = "OK",
+#define _(n, d) [-VNET_DEV_ERR_##n] = d,
+                     foreach_vnet_dev_rv_type
+#undef _
+  };
+
+  if (index >= ARRAY_LEN (strings))
+    return format (s, "unknown return value (%d)", rv);
+  return format (s, "%s", strings[index]);
+}
+
+u8 *
+format_vnet_dev_addr (u8 *s, va_list *args)
+{
+  vnet_dev_main_t *dm = &vnet_dev_main;
+  vnet_dev_t *dev = va_arg (*args, vnet_dev_t *);
+  vnet_dev_bus_t *bus;
+
+  if (dev == 0)
+    return 0;
+
+  bus = pool_elt_at_index (dm->buses, dev->bus_index);
+  s = format (s, "%U", bus->ops.format_device_addr, dev);
+
+  return s;
+}
+
+u8 *
+format_vnet_dev_interface_name (u8 *s, va_list *args)
+{
+  u32 i = va_arg (*args, u32);
+  vnet_dev_port_t *port = vnet_dev_get_port_from_dev_instance (i);
+
+  return format (s, "%s", port->intf.name);
+}
+
+u8 *
+format_vnet_dev_info (u8 *s, va_list *args)
+{
+  vnet_dev_format_args_t *a = va_arg (*args, vnet_dev_format_args_t *);
+  vlib_main_t *vm = vlib_get_main ();
+  vnet_dev_main_t *dm = &vnet_dev_main;
+  vnet_dev_t *dev = va_arg (*args, vnet_dev_t *);
+  vnet_dev_driver_t *dr = pool_elt_at_index (dm->drivers, dev->driver_index);
+  vnet_dev_bus_t *bus = pool_elt_at_index (dm->buses, dev->bus_index);
+
+  u32 indent = format_get_indent (s);
+  s = format (s, "Driver is '%s', bus is '%s'", dr->registration->name,
+             bus->registration->name);
+
+  if (dev->description)
+    s = format (s, ", description is '%v'", dev->description);
+
+  if (bus->ops.format_device_info)
+    s = format (s, "\n%U%U", format_white_space, indent,
+               bus->ops.format_device_info, a, dev);
+
+  s = format (s, "\n%UAssigned process node is '%U'", format_white_space,
+             indent, format_vlib_node_name, vm, dev->process_node_index);
+  if (dev->ops.format_info)
+    s = format (s, "\n%U%U", format_white_space, indent, dev->ops.format_info,
+               a, dev);
+  return s;
+}
+
+u8 *
+format_vnet_dev_hw_addr (u8 *s, va_list *args)
+{
+  vnet_dev_hw_addr_t *addr = va_arg (*args, vnet_dev_hw_addr_t *);
+  return format (s, "%U", format_ethernet_address, addr->eth_mac);
+}
+
+u8 *
+format_vnet_dev_port_info (u8 *s, va_list *args)
+{
+  vnet_dev_format_args_t *a = va_arg (*args, vnet_dev_format_args_t *);
+  vlib_main_t *vm = vlib_get_main ();
+  vnet_main_t *vnm = vnet_get_main ();
+  vnet_dev_port_t *port = va_arg (*args, vnet_dev_port_t *);
+
+  u32 indent = format_get_indent (s);
+
+  s = format (s, "Hardware Address is %U", format_vnet_dev_hw_addr,
+             &port->attr.hw_addr);
+  s = format (s, ", %u RX queues (max %u), %u TX queues (max %u)",
+             pool_elts (port->rx_queues), port->attr.max_rx_queues,
+             pool_elts (port->tx_queues), port->attr.max_tx_queues);
+  if (pool_elts (port->secondary_hw_addr))
+    {
+      u32 i = 0;
+      vnet_dev_hw_addr_t *a;
+      s = format (s, "\n%USecondary Hardware Address%s:", format_white_space,
+                 indent,
+                 pool_elts (port->secondary_hw_addr) > 1 ? "es are" : " is");
+      pool_foreach (a, port->secondary_hw_addr)
+       {
+         if (i++ % 6 == 0)
+           s = format (s, "\n%U", format_white_space, indent + 1);
+         s = format (s, " %U", format_vnet_dev_hw_addr, a);
+       }
+    }
+  s = format (s, "\n%UMax frame size is %u (max supported %u)",
+             format_white_space, indent, port->max_frame_size,
+             port->attr.max_supported_frame_size);
+  if (port->port_ops.format_status)
+    s = format (s, "\n%U%U", format_white_space, indent,
+               port->port_ops.format_status, a, port);
+
+  s = format (s, "\n%UInterface ", format_white_space, indent);
+  if (port->interface_created)
+    {
+      s = format (s, "assigned, interface name is '%U', RX node is '%U'",
+                 format_vnet_sw_if_index_name, vnm, port->intf.sw_if_index,
+                 format_vlib_node_name, vm, port->intf.rx_node_index);
+    }
+  else
+    s = format (s, "not assigned");
+  return s;
+}
+
+u8 *
+format_vnet_dev_rx_queue_info (u8 *s, va_list *args)
+{
+  vnet_dev_format_args_t __clib_unused *a =
+    va_arg (*args, vnet_dev_format_args_t *);
+  vnet_dev_rx_queue_t *rxq = va_arg (*args, vnet_dev_rx_queue_t *);
+  u32 indent = format_get_indent (s);
+
+  s = format (s, "Size is %u, buffer pool index is %u", rxq->size,
+             vnet_dev_get_rx_queue_buffer_pool_index (rxq));
+  s = format (s, "\n%UPolling thread is %u, %sabled, %sstarted",
+             format_white_space, indent, rxq->rx_thread_index,
+             rxq->enabled ? "en" : "dis", rxq->started ? "" : "not-");
+
+  return s;
+}
+
+u8 *
+format_vnet_dev_tx_queue_info (u8 *s, va_list *args)
+{
+  vnet_dev_format_args_t __clib_unused *a =
+    va_arg (*args, vnet_dev_format_args_t *);
+  vnet_dev_tx_queue_t *txq = va_arg (*args, vnet_dev_tx_queue_t *);
+  u32 indent = format_get_indent (s);
+  u32 n;
+
+  s = format (s, "Size is %u", txq->size);
+  s = format (s, "\n%U", format_white_space, indent);
+  n = clib_bitmap_count_set_bits (txq->assigned_threads);
+  if (n == 0)
+    s = format (s, "Not used by any thread");
+  else
+    s = format (s, "Used by thread%s %U", n > 1 ? "s" : "", format_bitmap_list,
+               txq->assigned_threads);
+
+  return s;
+}
+
+u8 *
+format_vnet_dev_interface_info (u8 *s, va_list *args)
+{
+  u32 i = va_arg (*args, u32);
+  vnet_dev_format_args_t fa = {}, *a = &fa;
+  vnet_dev_port_t *port = vnet_dev_get_port_from_dev_instance (i);
+  vnet_dev_t *dev = port->dev;
+  u32 indent = format_get_indent (s);
+
+  s = format (s, "Device:");
+  s = format (s, "\n%U%U", format_white_space, indent + 2,
+             format_vnet_dev_info, a, dev);
+
+  s = format (s, "\n%UPort %u:", format_white_space, indent, port->port_id);
+  s = format (s, "\n%U%U", format_white_space, indent + 2,
+             format_vnet_dev_port_info, a, port);
+
+  foreach_vnet_dev_port_rx_queue (q, port)
+    {
+      s = format (s, "\n%URX queue %u:", format_white_space, indent + 2,
+                 q->queue_id);
+      s = format (s, "\n%U%U", format_white_space, indent + 4,
+                 format_vnet_dev_rx_queue_info, a, q);
+    }
+
+  foreach_vnet_dev_port_tx_queue (q, port)
+    {
+      s = format (s, "\n%UTX queue %u:", format_white_space, indent + 2,
+                 q->queue_id);
+      s = format (s, "\n%U%U", format_white_space, indent + 4,
+                 format_vnet_dev_tx_queue_info, a, q);
+    }
+  return s;
+}
+
+static u64
+unformat_flags (unformat_input_t *input, char *names[], u64 val[], u32 n_flags)
+{
+  u64 rv = 0;
+  uword c = 0;
+  u8 *s = 0;
+
+  while ((c = unformat_get_input (input)) != UNFORMAT_END_OF_INPUT)
+    {
+      switch (c)
+       {
+       case 'a' ... 'z':
+         c -= 'a' - 'A';
+       case '0' ... '9':
+       case 'A' ... 'Z':
+         vec_add1 (s, c);
+         break;
+       case '-':
+         vec_add1 (s, '_');
+         break;
+       case ',':
+         vec_add1 (s, 0);
+         break;
+       default:
+         goto end_of_string;
+       }
+    }
+end_of_string:
+
+  if (s == 0)
+    return 0;
+
+  vec_add1 (s, 0);
+
+  for (u8 *p = s, *end = vec_end (s); p < end; p += strlen ((char *) p) + 1)
+    {
+      for (c = 0; c < n_flags; c++)
+       if (strcmp (names[c], (char *) p) == 0)
+         {
+           rv |= val[c];
+           break;
+         }
+      if (c == n_flags)
+       goto done;
+    }
+
+done:
+  vec_free (s);
+  return rv;
+}
+
+uword
+unformat_vnet_dev_flags (unformat_input_t *input, va_list *args)
+{
+  vnet_dev_flags_t *fp = va_arg (*args, vnet_dev_flags_t *);
+  u64 val;
+
+  char *names[] = {
+#define _(b, n, d) #n,
+    foreach_vnet_dev_flag
+#undef _
+  };
+  u64 vals[] = {
+#define _(b, n, d) 1ull << (b)
+    foreach_vnet_dev_flag
+#undef _
+  };
+
+  val = unformat_flags (input, names, vals, ARRAY_LEN (names));
+
+  if (!val)
+    return 0;
+
+  fp->n = val;
+  return 1;
+}
+
+uword
+unformat_vnet_dev_port_flags (unformat_input_t *input, va_list *args)
+{
+  vnet_dev_port_flags_t *fp = va_arg (*args, vnet_dev_port_flags_t *);
+  u64 val;
+
+  char *flag_names[] = {
+#define _(b, n, d) #n,
+    foreach_vnet_dev_port_flag
+#undef _
+  };
+  u64 flag_values[] = {
+#define _(b, n, d) 1ull << (b)
+    foreach_vnet_dev_port_flag
+#undef _
+  };
+
+  val =
+    unformat_flags (input, flag_names, flag_values, ARRAY_LEN (flag_names));
+
+  if (!val)
+    return 0;
+
+  fp->n = val;
+  return 1;
+}
+
+static u8 *
+format_flags (u8 *s, u64 val, char *flag_names[], u64 flag_values[],
+             u32 n_flags)
+{
+  u32 n = 0;
+  for (int i = 0; i < n_flags; i++)
+    {
+      if ((val & flag_values[i]) == 0)
+       continue;
+
+      if (n++)
+       vec_add1 (s, ' ');
+
+      for (char *c = flag_names[i]; c[0] != 0; c++)
+       {
+         switch (c[0])
+           {
+           case 'A' ... 'Z':
+             vec_add1 (s, c[0] + 'a' - 'A');
+             break;
+           case '_':
+             vec_add1 (s, '-');
+             break;
+           default:
+             vec_add1 (s, c[0]);
+           }
+       }
+    }
+
+  return s;
+}
+
+u8 *
+format_vnet_dev_flags (u8 *s, va_list *args)
+{
+  vnet_dev_flags_t *fp = va_arg (*args, vnet_dev_flags_t *);
+  char *flag_names[] = {
+#define _(b, n, d) #n,
+    foreach_vnet_dev_flag
+#undef _
+  };
+  u64 flag_values[] = {
+#define _(b, n, d) 1ull << (b)
+    foreach_vnet_dev_flag
+#undef _
+  };
+
+  return format_flags (s, fp->n, flag_names, flag_values,
+                      ARRAY_LEN (flag_names));
+}
+
+u8 *
+format_vnet_dev_port_flags (u8 *s, va_list *args)
+{
+  vnet_dev_port_flags_t *fp = va_arg (*args, vnet_dev_port_flags_t *);
+  char *flag_names[] = {
+#define _(b, n, d) #n,
+    foreach_vnet_dev_port_flag
+#undef _
+  };
+  u64 flag_values[] = {
+#define _(b, n, d) 1ull << (b)
+    foreach_vnet_dev_port_flag
+#undef _
+  };
+
+  return format_flags (s, fp->n, flag_names, flag_values,
+                      ARRAY_LEN (flag_names));
+}
+
+u8 *
+format_vnet_dev_log (u8 *s, va_list *args)
+{
+  vnet_dev_t *dev = va_arg (*args, vnet_dev_t *);
+  char *func = va_arg (*args, char *);
+
+  if (dev)
+    s = format (s, "%U", format_vnet_dev_addr, dev);
+  if (dev && func)
+    vec_add1 (s, ' ');
+  if (func)
+    {
+      if (strncmp (func, "vnet_dev_", 9) == 0)
+       func += 9;
+      s = format (s, "%s", func);
+    }
+  vec_add1 (s, ':');
+  vec_add1 (s, ' ');
+  return s;
+}
diff --git a/src/vnet/dev/handlers.c b/src/vnet/dev/handlers.c
new file mode 100644 (file)
index 0000000..7e7347e
--- /dev/null
@@ -0,0 +1,225 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/counters.h>
+#include <vnet/dev/log.h>
+#include <vnet/flow/flow.h>
+
+VLIB_REGISTER_LOG_CLASS (dev_log, static) = {
+  .class_name = "dev",
+  .subclass_name = "handler",
+};
+
+clib_error_t *
+vnet_dev_port_set_max_frame_size (vnet_main_t *vnm, vnet_hw_interface_t *hw,
+                                 u32 frame_size)
+{
+  vlib_main_t *vm = vlib_get_main ();
+  vnet_dev_port_t *p = vnet_dev_get_port_from_dev_instance (hw->dev_instance);
+  vnet_dev_rv_t rv;
+
+  vnet_dev_port_cfg_change_req_t req = {
+    .type = VNET_DEV_PORT_CFG_MAX_FRAME_SIZE,
+    .max_frame_size = frame_size,
+  };
+
+  log_debug (p->dev, "size %u", frame_size);
+
+  rv = vnet_dev_port_cfg_change_req_validate (vm, p, &req);
+  if (rv == VNET_DEV_ERR_NO_CHANGE)
+    return 0;
+
+  if (rv != VNET_DEV_OK)
+    return vnet_dev_port_err (vm, p, rv,
+                             "new max frame size is not valid for port");
+
+  if ((rv = vnet_dev_process_port_cfg_change_req (vm, p, &req)) != VNET_DEV_OK)
+    return vnet_dev_port_err (vm, p, rv,
+                             "device failed to change max frame size");
+
+  return 0;
+}
+
+u32
+vnet_dev_port_eth_flag_change (vnet_main_t *vnm, vnet_hw_interface_t *hw,
+                              u32 flags)
+{
+  vlib_main_t *vm = vlib_get_main ();
+  vnet_dev_port_t *p = vnet_dev_get_port_from_dev_instance (hw->dev_instance);
+  vnet_dev_rv_t rv;
+
+  vnet_dev_port_cfg_change_req_t req = {
+    .type = VNET_DEV_PORT_CFG_PROMISC_MODE,
+  };
+
+  switch (flags)
+    {
+    case ETHERNET_INTERFACE_FLAG_DEFAULT_L3:
+      log_debug (p->dev, "promisc off");
+      break;
+    case ETHERNET_INTERFACE_FLAG_ACCEPT_ALL:
+      log_debug (p->dev, "promisc on");
+      req.promisc = 1;
+      break;
+    default:
+      return ~0;
+    }
+
+  rv = vnet_dev_port_cfg_change_req_validate (vm, p, &req);
+  if (rv == VNET_DEV_ERR_NO_CHANGE)
+    return 0;
+
+  if (rv != VNET_DEV_OK)
+    return ~0;
+
+  rv = vnet_dev_process_port_cfg_change_req (vm, p, &req);
+  if (rv == VNET_DEV_OK || rv == VNET_DEV_ERR_NO_CHANGE)
+    return 0;
+  return ~0;
+}
+
+clib_error_t *
+vnet_dev_port_mac_change (vnet_hw_interface_t *hi, const u8 *old,
+                         const u8 *new)
+{
+  vlib_main_t *vm = vlib_get_main ();
+  vnet_dev_port_t *p = vnet_dev_get_port_from_dev_instance (hi->dev_instance);
+  vnet_dev_rv_t rv;
+
+  vnet_dev_port_cfg_change_req_t req = {
+    .type = VNET_DEV_PORT_CFG_CHANGE_PRIMARY_HW_ADDR,
+  };
+
+  vnet_dev_set_hw_addr_eth_mac (&req.addr, new);
+
+  log_debug (p->dev, "new mac  %U", format_vnet_dev_hw_addr, &req.addr);
+
+  rv = vnet_dev_port_cfg_change_req_validate (vm, p, &req);
+  if (rv == VNET_DEV_ERR_NO_CHANGE)
+    return 0;
+
+  if (rv != VNET_DEV_OK)
+    return vnet_dev_port_err (vm, p, rv, "hw address is not valid for port");
+
+  if ((rv = vnet_dev_process_port_cfg_change_req (vm, p, &req)) != VNET_DEV_OK)
+    return vnet_dev_port_err (vm, p, rv, "device failed to change hw address");
+
+  return 0;
+}
+
+clib_error_t *
+vnet_dev_add_del_mac_address (vnet_hw_interface_t *hi, const u8 *address,
+                             u8 is_add)
+{
+  vlib_main_t *vm = vlib_get_main ();
+  vnet_dev_port_t *p = vnet_dev_get_port_from_dev_instance (hi->dev_instance);
+  vnet_dev_rv_t rv;
+
+  vnet_dev_port_cfg_change_req_t req = {
+    .type = is_add ? VNET_DEV_PORT_CFG_ADD_SECONDARY_HW_ADDR :
+                          VNET_DEV_PORT_CFG_REMOVE_SECONDARY_HW_ADDR,
+  };
+
+  vnet_dev_set_hw_addr_eth_mac (&req.addr, address);
+
+  log_debug (p->dev, "received (addr %U is_add %u", format_vnet_dev_hw_addr,
+            &req.addr, is_add);
+
+  rv = vnet_dev_port_cfg_change_req_validate (vm, p, &req);
+  if (rv != VNET_DEV_OK)
+    return vnet_dev_port_err (vm, p, rv,
+                             "provided secondary hw addresses cannot "
+                             "be added/removed");
+
+  if ((rv = vnet_dev_process_port_cfg_change_req (vm, p, &req)) != VNET_DEV_OK)
+    return vnet_dev_port_err (
+      vm, p, rv, "device failed to add/remove secondary hw address");
+
+  return 0;
+}
+
+int
+vnet_dev_flow_ops_fn (vnet_main_t *vnm, vnet_flow_dev_op_t op,
+                     u32 dev_instance, u32 flow_index, uword *private_data)
+{
+  vnet_dev_port_t *p = vnet_dev_get_port_from_dev_instance (dev_instance);
+  log_warn (p->dev, "unsupported request for flow_ops received");
+  return VNET_FLOW_ERROR_NOT_SUPPORTED;
+}
+
+clib_error_t *
+vnet_dev_interface_set_rss_queues (vnet_main_t *vnm, vnet_hw_interface_t *hi,
+                                  clib_bitmap_t *bitmap)
+{
+  vnet_dev_port_t *p = vnet_dev_get_port_from_dev_instance (hi->dev_instance);
+  log_warn (p->dev, "unsupported request for flow_ops received");
+  return vnet_error (VNET_ERR_UNSUPPORTED, "not implemented");
+}
+
+void
+vnet_dev_clear_hw_interface_counters (u32 instance)
+{
+  vnet_dev_port_t *port = vnet_dev_get_port_from_dev_instance (instance);
+  vlib_main_t *vm = vlib_get_main ();
+
+  vnet_dev_process_call_port_op_no_rv (vm, port, vnet_dev_port_clear_counters);
+}
+
+clib_error_t *
+vnet_dev_rx_mode_change_fn (vnet_main_t *vnm, u32 hw_if_index, u32 qid,
+                           vnet_hw_if_rx_mode mode)
+{
+  return clib_error_return (0, "not supported");
+}
+
+void
+vnet_dev_set_interface_next_node (vnet_main_t *vnm, u32 hw_if_index,
+                                 u32 node_index)
+{
+  vlib_main_t *vm = vlib_get_main ();
+  vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
+  vnet_dev_port_t *port =
+    vnet_dev_get_port_from_dev_instance (hw->dev_instance);
+  int runtime_update = 0;
+
+  if (node_index == ~0)
+    {
+      port->intf.redirect_to_node_next_index = 0;
+      if (port->intf.feature_arc == 0)
+       {
+         port->intf.rx_next_index =
+           vnet_dev_default_next_index_by_port_type[port->attr.type];
+         runtime_update = 1;
+       }
+      port->intf.redirect_to_node = 0;
+    }
+  else
+    {
+      u16 next_index = vlib_node_add_next (vlib_get_main (),
+                                          port_rx_eth_node.index, node_index);
+      port->intf.redirect_to_node_next_index = next_index;
+      if (port->intf.feature_arc == 0)
+       {
+         port->intf.rx_next_index = next_index;
+         runtime_update = 1;
+       }
+      port->intf.redirect_to_node = 1;
+    }
+  port->intf.rx_next_index =
+    node_index == ~0 ?
+           vnet_dev_default_next_index_by_port_type[port->attr.type] :
+           node_index;
+
+  if (runtime_update)
+    {
+      foreach_vnet_dev_port_rx_queue (rxq, port)
+       vnet_dev_rx_queue_rt_request (
+         vm, rxq, (vnet_dev_rx_queue_rt_req_t){ .update_next_index = 1 });
+      log_debug (port->dev, "runtime update requested due to chgange in "
+                           "reditect-to-next configuration");
+    }
+}
diff --git a/src/vnet/dev/log.h b/src/vnet/dev/log.h
new file mode 100644 (file)
index 0000000..432e7b8
--- /dev/null
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _VNET_DEV_LOG_H_
+#define _VNET_DEV_LOG_H_
+
+format_function_t format_vnet_dev_log;
+
+#define log_debug(dev, f, ...)                                                \
+  vlib_log (VLIB_LOG_LEVEL_DEBUG, dev_log.class, "%U" f, format_vnet_dev_log, \
+           dev, __func__, ##__VA_ARGS__)
+#define log_notice(dev, f, ...)                                               \
+  vlib_log (VLIB_LOG_LEVEL_NOTICE, dev_log.class, "%U" f,                     \
+           format_vnet_dev_log, dev, 0, ##__VA_ARGS__)
+#define log_warn(dev, f, ...)                                                 \
+  vlib_log (VLIB_LOG_LEVEL_WARNING, dev_log.class, "%U" f,                    \
+           format_vnet_dev_log, dev, 0, ##__VA_ARGS__)
+#define log_err(dev, f, ...)                                                  \
+  vlib_log (VLIB_LOG_LEVEL_ERR, dev_log.class, "%U" f, format_vnet_dev_log,   \
+           dev, 0, ##__VA_ARGS__)
+
+#endif /* _VNET_DEV_LOG_H_ */
diff --git a/src/vnet/dev/mgmt.h b/src/vnet/dev/mgmt.h
new file mode 100644 (file)
index 0000000..f13f407
--- /dev/null
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _VNET_DEV_MGMT_H_
+#define _VNET_DEV_MGMT_H_
+
+#include <vppinfra/clib.h>
+
+#endif /* _VNET_DEV_MGMT_H_ */
diff --git a/src/vnet/dev/pci.c b/src/vnet/dev/pci.c
new file mode 100644 (file)
index 0000000..3310841
--- /dev/null
@@ -0,0 +1,447 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include "vlib/pci/pci.h"
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/pci.h>
+#include <vnet/dev/log.h>
+
+VLIB_REGISTER_LOG_CLASS (dev_log, static) = {
+  .class_name = "dev",
+  .subclass_name = "pci",
+  .default_syslog_level = VLIB_LOG_LEVEL_DEBUG,
+};
+
+static int
+vnet_dev_bus_pci_device_id_to_pci_addr (vlib_pci_addr_t *addr, char *str)
+{
+  unformat_input_t input;
+  uword rv;
+  unformat_init_string (&input, str, strlen (str));
+  rv = unformat (&input, "pci" VNET_DEV_DEVICE_ID_PREFIX_DELIMITER "%U",
+                unformat_vlib_pci_addr, addr);
+  unformat_free (&input);
+  return rv;
+}
+
+static void *
+vnet_dev_bus_pci_get_device_info (vlib_main_t *vm, char *device_id)
+{
+  vnet_dev_bus_pci_device_info_t *info;
+  vlib_pci_addr_t addr = {};
+  clib_error_t *err = 0;
+  vlib_pci_device_info_t *di = 0;
+
+  vlib_log_debug (dev_log.class, "device %s", device_id);
+
+  if (vnet_dev_bus_pci_device_id_to_pci_addr (&addr, device_id) == 0)
+    return 0;
+
+  di = vlib_pci_get_device_info (vm, &addr, &err);
+  if (err)
+    {
+      vlib_log_err (dev_log.class, "get_device_info: %U", format_clib_error,
+                   err);
+      clib_error_free (err);
+      return 0;
+    }
+
+  info = clib_mem_alloc (sizeof (vnet_dev_bus_pci_device_info_t));
+  info->addr = addr;
+  info->vendor_id = di->vendor_id;
+  info->device_id = di->device_id;
+  info->revision = di->revision;
+
+  vlib_pci_free_device_info (di);
+  return info;
+}
+
+static void
+vnet_dev_bus_pci_free_device_info (vlib_main_t *vm, void *dev_info)
+{
+  clib_mem_free (dev_info);
+}
+
+static vnet_dev_rv_t
+vnet_dev_bus_pci_open (vlib_main_t *vm, vnet_dev_t *dev)
+{
+  clib_error_t *err = 0;
+  vnet_dev_bus_pci_device_data_t *pdd = vnet_dev_get_bus_pci_device_data (dev);
+
+  if (vnet_dev_bus_pci_device_id_to_pci_addr (&pdd->addr, dev->device_id) == 0)
+    return VNET_DEV_ERR_INVALID_DEVICE_ID;
+
+  if ((err = vlib_pci_device_open (vm, &pdd->addr, 0, &pdd->handle)))
+    {
+      log_err (dev, "device_open: %U", format_clib_error, err);
+      clib_error_free (err);
+      return VNET_DEV_ERR_BUS;
+    }
+
+  dev->numa_node = vlib_pci_get_numa_node (vm, pdd->handle);
+
+  if (vlib_pci_supports_virtual_addr_dma (vm, pdd->handle))
+    {
+      dev->va_dma = 1;
+      log_debug (dev, "device supports VA DMA");
+    }
+
+  vlib_pci_set_private_data (vm, pdd->handle, (uword) dev);
+
+  pdd->n_msix_int = vlib_pci_get_num_msix_interrupts (vm, pdd->handle);
+  if (pdd->n_msix_int)
+    {
+      u32 sz = sizeof (pdd->msix_handlers[0]) * pdd->n_msix_int;
+      sz = round_pow2 (sz, CLIB_CACHE_LINE_BYTES);
+      pdd->msix_handlers = clib_mem_alloc_aligned (sz, CLIB_CACHE_LINE_BYTES);
+      clib_memset (pdd->msix_handlers, 0, sz);
+    }
+
+  return VNET_DEV_OK;
+}
+
+static void
+vnet_dev_bus_pci_close (vlib_main_t *vm, vnet_dev_t *dev)
+{
+  vnet_dev_bus_pci_device_data_t *pdd = vnet_dev_get_bus_pci_device_data (dev);
+
+  if (pdd->intx_handler)
+    vnet_dev_pci_intx_remove_handler (vm, dev);
+
+  if (pdd->msix_handlers)
+    {
+      for (u16 i = 0; i < pdd->n_msix_int; i++)
+       if (pdd->msix_handlers[i])
+         vnet_dev_pci_msix_remove_handler (vm, dev, i, 1);
+      clib_mem_free (pdd->msix_handlers);
+      pdd->msix_handlers = 0;
+    }
+
+  if (pdd->pci_handle_valid)
+    vlib_pci_device_close (vm, pdd->handle);
+}
+
+static vnet_dev_rv_t
+vnet_dev_bus_pci_dma_mem_alloc (vlib_main_t *vm, vnet_dev_t *dev, u32 size,
+                               u32 align, void **pp)
+{
+  clib_error_t *err;
+  void *p;
+
+  align = align ? align : CLIB_CACHE_LINE_BYTES;
+  size = round_pow2 (size, align);
+
+  p = vlib_physmem_alloc_aligned_on_numa (vm, size, align, dev->numa_node);
+
+  if (p == 0)
+    {
+      err = vlib_physmem_last_error (vm);
+      log_err (dev, "dev_dma_mem_alloc: physmem_alloc_aligned error %U",
+              format_clib_error, err);
+      clib_error_free (err);
+      return VNET_DEV_ERR_DMA_MEM_ALLOC_FAIL;
+    }
+
+  if ((err = vlib_pci_map_dma (vm, vnet_dev_get_pci_handle (dev), p)))
+    {
+      log_err (dev, "dev_dma_mem_alloc: pci_map_dma: %U", format_clib_error,
+              err);
+      clib_error_free (err);
+      return VNET_DEV_ERR_DMA_MEM_ALLOC_FAIL;
+    }
+
+  clib_memset (p, 0, size);
+  pp[0] = p;
+  return VNET_DEV_OK;
+}
+
+static void
+vnet_dev_bus_pci_dma_mem_free (vlib_main_t *vm, vnet_dev_t *dev, void *p)
+{
+  if (p)
+    vlib_physmem_free (vm, p);
+}
+
+vnet_dev_rv_t
+vnet_dev_pci_read_config_header (vlib_main_t *vm, vnet_dev_t *dev,
+                                vlib_pci_config_hdr_t *hdr)
+{
+  vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev);
+  clib_error_t *err;
+
+  err = vlib_pci_read_write_config (vm, h, VLIB_READ, 0, hdr, sizeof (*hdr));
+  if (err)
+    {
+      log_err (dev, "pci_read_config_header: %U", format_clib_error, err);
+      clib_error_free (err);
+      return VNET_DEV_ERR_BUS;
+    }
+  return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+vnet_dev_pci_map_region (vlib_main_t *vm, vnet_dev_t *dev, u8 region,
+                        void **pp)
+{
+  vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev);
+  clib_error_t *err;
+
+  if ((err = vlib_pci_map_region (vm, h, region, pp)))
+    {
+      log_err (dev, "pci_map_region: %U", format_clib_error, err);
+      clib_error_free (err);
+      return VNET_DEV_ERR_BUS;
+    }
+
+  return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+vnet_dev_pci_function_level_reset (vlib_main_t *vm, vnet_dev_t *dev)
+{
+  vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev);
+  clib_error_t *err;
+
+  if ((err = vlib_pci_function_level_reset (vm, h)))
+    {
+      log_err (dev, "pci_function_level_reset: %U", format_clib_error, err);
+      clib_error_free (err);
+      return VNET_DEV_ERR_BUS;
+    }
+
+  return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+vnet_dev_pci_bus_master_enable (vlib_main_t *vm, vnet_dev_t *dev)
+{
+  vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev);
+  clib_error_t *err;
+
+  if ((err = vlib_pci_bus_master_enable (vm, h)))
+    {
+      log_err (dev, "pci_bus_master_enable: %U", format_clib_error, err);
+      clib_error_free (err);
+      return VNET_DEV_ERR_BUS;
+    }
+  return VNET_DEV_OK;
+}
+
+static void
+vnet_dev_pci_intx_handler (vlib_main_t *vm, vlib_pci_dev_handle_t h)
+{
+  vnet_dev_t *dev = (vnet_dev_t *) vlib_pci_get_private_data (vm, h);
+  vnet_dev_bus_pci_device_data_t *pdd = vnet_dev_get_bus_pci_device_data (dev);
+
+  if (pdd->intx_handler)
+    pdd->intx_handler (vm, dev);
+}
+
+vnet_dev_rv_t
+vnet_dev_pci_intx_add_handler (vlib_main_t *vm, vnet_dev_t *dev,
+                              vnet_dev_pci_intx_handler_fn_t *fn)
+{
+  vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev);
+  clib_error_t *err;
+
+  err = vlib_pci_register_intx_handler (vm, h, vnet_dev_pci_intx_handler);
+
+  if (err)
+    {
+      log_err (dev, "pci_register_intx_handler: %U", format_clib_error, err);
+      clib_error_free (err);
+      return VNET_DEV_ERR_BUS;
+    }
+
+  return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+vnet_dev_pci_intx_remove_handler (vlib_main_t *vm, vnet_dev_t *dev)
+{
+  vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev);
+  vnet_dev_bus_pci_device_data_t *pdd = vnet_dev_get_bus_pci_device_data (dev);
+  clib_error_t *err;
+
+  err = vlib_pci_unregister_intx_handler (vm, h);
+
+  if (err)
+    {
+      log_err (dev, "pci_unregister_intx_handler: %U", format_clib_error, err);
+      clib_error_free (err);
+      return VNET_DEV_ERR_BUS;
+    }
+
+  pdd->intx_handler = 0;
+
+  return VNET_DEV_OK;
+}
+
+static void
+vnet_dev_pci_msix_handler (vlib_main_t *vm, vlib_pci_dev_handle_t h, u16 line)
+{
+  vnet_dev_t *dev = (vnet_dev_t *) vlib_pci_get_private_data (vm, h);
+  vnet_dev_bus_pci_device_data_t *pdd = vnet_dev_get_bus_pci_device_data (dev);
+
+  if (line < vec_len (pdd->msix_handlers) && pdd->msix_handlers[line])
+    pdd->msix_handlers[line](vm, dev, line);
+}
+
+vnet_dev_rv_t
+vnet_dev_pci_msix_add_handler (vlib_main_t *vm, vnet_dev_t *dev,
+                              vnet_dev_pci_msix_handler_fn_t *fn, u16 first,
+                              u16 count)
+{
+  vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev);
+  vnet_dev_bus_pci_device_data_t *pdd = vnet_dev_get_bus_pci_device_data (dev);
+  clib_error_t *err;
+
+  err = vlib_pci_register_msix_handler (vm, h, first, count,
+                                       vnet_dev_pci_msix_handler);
+
+  if (err)
+    {
+      log_err (dev, "pci_register_msix_handler: %U", format_clib_error, err);
+      clib_error_free (err);
+      return VNET_DEV_ERR_BUS;
+    }
+
+  for (u16 i = first; i < first + count; i++)
+    {
+      ASSERT (pdd->msix_handlers[i] == 0);
+      pdd->msix_handlers[i] = fn;
+    }
+
+  return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+vnet_dev_pci_msix_remove_handler (vlib_main_t *vm, vnet_dev_t *dev, u16 first,
+                                 u16 count)
+{
+  vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev);
+  vnet_dev_bus_pci_device_data_t *pdd = vnet_dev_get_bus_pci_device_data (dev);
+  clib_error_t *err;
+
+  err = vlib_pci_unregister_msix_handler (vm, h, first, count);
+
+  if (err)
+    {
+      log_err (dev, "pci_unregister_msix_handler: %U", format_clib_error, err);
+      clib_error_free (err);
+      return VNET_DEV_ERR_BUS;
+    }
+
+  for (u16 i = first; i < first + count; i++)
+    {
+      ASSERT (pdd->msix_handlers[i] != 0);
+      pdd->msix_handlers[i] = 0;
+    }
+
+  return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+vnet_dev_pci_msix_enable (vlib_main_t *vm, vnet_dev_t *dev, u16 first,
+                         u16 count)
+{
+  vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev);
+  clib_error_t *err;
+
+  err = vlib_pci_enable_msix_irq (vm, h, first, count);
+
+  if (err)
+    {
+      log_err (dev, "pci_enable_msix_irq: %U", format_clib_error, err);
+      clib_error_free (err);
+      return VNET_DEV_ERR_BUS;
+    }
+
+  return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+vnet_dev_pci_msix_disable (vlib_main_t *vm, vnet_dev_t *dev, u16 first,
+                          u16 count)
+{
+  vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev);
+  clib_error_t *err;
+
+  err = vlib_pci_disable_msix_irq (vm, h, first, count);
+
+  if (err)
+    {
+      log_err (dev, "pci_disble_msix_irq: %U", format_clib_error, err);
+      clib_error_free (err);
+      return VNET_DEV_ERR_BUS;
+    }
+
+  return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+vnet_dev_pci_bus_master_disable (vlib_main_t *vm, vnet_dev_t *dev)
+{
+  vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev);
+  clib_error_t *err;
+
+  if ((err = vlib_pci_bus_master_disable (vm, h)))
+    {
+      log_err (dev, "pci_bus_master_disable: %U", format_clib_error, err);
+      clib_error_free (err);
+      return VNET_DEV_ERR_BUS;
+    }
+  return VNET_DEV_OK;
+}
+
+static u8 *
+format_dev_pci_device_info (u8 *s, va_list *args)
+{
+  vnet_dev_format_args_t __clib_unused *a =
+    va_arg (*args, vnet_dev_format_args_t *);
+  vnet_dev_t *dev = va_arg (*args, vnet_dev_t *);
+  vnet_dev_bus_pci_device_data_t *pdd = vnet_dev_get_bus_pci_device_data (dev);
+  vlib_main_t *vm = vlib_get_main ();
+  vlib_pci_config_t cfg = {};
+  clib_error_t *err;
+
+  s = format (s, "PCIe address is %U", format_vlib_pci_addr, &pdd->addr);
+
+  err = vlib_pci_read_write_config (vm, pdd->handle, VLIB_READ, 0, &cfg,
+                                   sizeof (cfg));
+  if (!err)
+    {
+      s = format (s, ", port is %U, speed is %U (max %U)",
+                 format_vlib_pci_link_port, &cfg, format_vlib_pci_link_speed,
+                 &cfg, format_vlib_pci_link_speed_cap, &cfg);
+    }
+  else
+    clib_error_free (err);
+
+  return s;
+}
+
+static u8 *
+format_dev_pci_device_addr (u8 *s, va_list *args)
+{
+  vnet_dev_t *dev = va_arg (*args, vnet_dev_t *);
+  vnet_dev_bus_pci_device_data_t *pdd = vnet_dev_get_bus_pci_device_data (dev);
+  return format (s, "%U", format_vlib_pci_addr, &pdd->addr);
+}
+
+VNET_DEV_REGISTER_BUS (pci) = {
+  .name = "pci",
+  .device_data_size = sizeof (vnet_dev_bus_pci_device_info_t),
+  .ops = {
+    .device_open = vnet_dev_bus_pci_open,
+    .device_close = vnet_dev_bus_pci_close,
+    .get_device_info = vnet_dev_bus_pci_get_device_info,
+    .free_device_info = vnet_dev_bus_pci_free_device_info,
+    .dma_mem_alloc_fn = vnet_dev_bus_pci_dma_mem_alloc,
+    .dma_mem_free_fn = vnet_dev_bus_pci_dma_mem_free,
+    .format_device_info = format_dev_pci_device_info,
+    .format_device_addr = format_dev_pci_device_addr,
+  },
+};
diff --git a/src/vnet/dev/pci.h b/src/vnet/dev/pci.h
new file mode 100644 (file)
index 0000000..dd104ea
--- /dev/null
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _VNET_DEV_PCI_H_
+#define _VNET_DEV_PCI_H_
+
+#include <vppinfra/clib.h>
+#include <vlib/pci/pci.h>
+#include <vnet/dev/dev.h>
+
+typedef void (vnet_dev_pci_intx_handler_fn_t) (vlib_main_t *vm,
+                                              vnet_dev_t *dev);
+typedef void (vnet_dev_pci_msix_handler_fn_t) (vlib_main_t *vm,
+                                              vnet_dev_t *dev, u16 line);
+
+typedef struct
+{
+  vlib_pci_addr_t addr;
+  u16 vendor_id;
+  u16 device_id;
+  u8 revision;
+} vnet_dev_bus_pci_device_info_t;
+
+typedef struct
+{
+  u8 pci_handle_valid : 1;
+  u16 n_msix_int;
+  vlib_pci_addr_t addr;
+  vlib_pci_dev_handle_t handle;
+  vnet_dev_pci_intx_handler_fn_t *intx_handler;
+  vnet_dev_pci_msix_handler_fn_t **msix_handlers;
+} vnet_dev_bus_pci_device_data_t;
+
+static_always_inline vnet_dev_bus_pci_device_data_t *
+vnet_dev_get_bus_pci_device_data (vnet_dev_t *dev)
+{
+  return (void *) dev->bus_data;
+}
+static_always_inline vlib_pci_dev_handle_t
+vnet_dev_get_pci_handle (vnet_dev_t *dev)
+{
+  return ((vnet_dev_bus_pci_device_data_t *) (dev->bus_data))->handle;
+}
+
+static_always_inline vlib_pci_addr_t
+vnet_dev_get_pci_addr (vnet_dev_t *dev)
+{
+  return ((vnet_dev_bus_pci_device_data_t *) (dev->bus_data))->addr;
+}
+
+static_always_inline vlib_pci_dev_handle_t
+vnet_dev_get_pci_n_msix_interrupts (vnet_dev_t *dev)
+{
+  return vnet_dev_get_bus_pci_device_data (dev)->n_msix_int;
+}
+
+vnet_dev_rv_t vnet_dev_pci_read_config_header (vlib_main_t *, vnet_dev_t *,
+                                              vlib_pci_config_hdr_t *);
+
+vnet_dev_rv_t vnet_dev_pci_map_region (vlib_main_t *, vnet_dev_t *, u8,
+                                      void **);
+vnet_dev_rv_t vnet_dev_pci_function_level_reset (vlib_main_t *, vnet_dev_t *);
+vnet_dev_rv_t vnet_dev_pci_bus_master_enable (vlib_main_t *, vnet_dev_t *);
+vnet_dev_rv_t vnet_dev_pci_bus_master_disable (vlib_main_t *, vnet_dev_t *);
+vnet_dev_rv_t vnet_dev_pci_intx_add_handler (vlib_main_t *, vnet_dev_t *,
+                                            vnet_dev_pci_intx_handler_fn_t *);
+vnet_dev_rv_t vnet_dev_pci_intx_remove_handler (vlib_main_t *, vnet_dev_t *);
+vnet_dev_rv_t vnet_dev_pci_msix_add_handler (vlib_main_t *, vnet_dev_t *,
+                                            vnet_dev_pci_msix_handler_fn_t *,
+                                            u16, u16);
+vnet_dev_rv_t vnet_dev_pci_msix_remove_handler (vlib_main_t *, vnet_dev_t *,
+                                               u16, u16);
+vnet_dev_rv_t vnet_dev_pci_msix_enable (vlib_main_t *, vnet_dev_t *, u16, u16);
+vnet_dev_rv_t vnet_dev_pci_msix_disable (vlib_main_t *, vnet_dev_t *, u16,
+                                        u16);
+
+#endif /* _VNET_DEV_PCI_H_ */
diff --git a/src/vnet/dev/port.c b/src/vnet/dev/port.c
new file mode 100644 (file)
index 0000000..f9d6c01
--- /dev/null
@@ -0,0 +1,678 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/counters.h>
+#include <vnet/dev/log.h>
+
+VLIB_REGISTER_LOG_CLASS (dev_log, static) = {
+  .class_name = "dev",
+  .subclass_name = "port",
+};
+
+static uword
+dummy_input_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
+               vlib_frame_t *frame)
+{
+  ASSERT (0);
+  return 0;
+}
+
+VLIB_REGISTER_NODE (port_rx_eth_node) = {
+  .function = dummy_input_fn,
+  .name = "port-rx-eth",
+  .runtime_data_bytes = sizeof (vnet_dev_rx_node_runtime_t),
+  .type = VLIB_NODE_TYPE_INPUT,
+  .state = VLIB_NODE_STATE_DISABLED,
+  .n_next_nodes = VNET_DEV_ETH_RX_PORT_N_NEXTS,
+  .next_nodes = {
+#define _(n, s) [VNET_DEV_ETH_RX_PORT_NEXT_##n] = s,
+  foreach_vnet_dev_port_rx_next
+#undef _
+  },
+};
+
+u16 vnet_dev_default_next_index_by_port_type[] = {
+  [VNET_DEV_PORT_TYPE_ETHERNET] = VNET_DEV_ETH_RX_PORT_NEXT_ETH_INPUT,
+};
+
+VNET_FEATURE_ARC_INIT (eth_port_rx, static) = {
+  .arc_name = "port-rx-eth",
+  .start_nodes = VNET_FEATURES ("port-rx-eth"),
+  .last_in_arc = "ethernet-input",
+  .arc_index_ptr = &vnet_dev_main.eth_port_rx_feature_arc_index,
+};
+
+VNET_FEATURE_INIT (l2_patch, static) = {
+  .arc_name = "port-rx-eth",
+  .node_name = "l2-patch",
+  .runs_before = VNET_FEATURES ("ethernet-input"),
+};
+
+VNET_FEATURE_INIT (worker_handoff, static) = {
+  .arc_name = "port-rx-eth",
+  .node_name = "worker-handoff",
+  .runs_before = VNET_FEATURES ("ethernet-input"),
+};
+
+VNET_FEATURE_INIT (span_input, static) = {
+  .arc_name = "port-rx-eth",
+  .node_name = "span-input",
+  .runs_before = VNET_FEATURES ("ethernet-input"),
+};
+
+VNET_FEATURE_INIT (p2p_ethernet_node, static) = {
+  .arc_name = "port-rx-eth",
+  .node_name = "p2p-ethernet-input",
+  .runs_before = VNET_FEATURES ("ethernet-input"),
+};
+
+VNET_FEATURE_INIT (ethernet_input, static) = {
+  .arc_name = "port-rx-eth",
+  .node_name = "ethernet-input",
+  .runs_before = 0, /* not before any other features */
+};
+
+void
+vnet_dev_port_free (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+  vnet_dev_t *dev = port->dev;
+
+  vnet_dev_port_validate (vm, port);
+
+  ASSERT (port->started == 0);
+
+  log_debug (dev, "port %u", port->port_id);
+
+  if (port->port_ops.free)
+    port->port_ops.free (vm, port);
+
+  pool_free (port->secondary_hw_addr);
+  pool_free (port->rx_queues);
+  pool_free (port->tx_queues);
+  pool_put_index (dev->ports, port->index);
+  clib_mem_free (port);
+}
+
+void
+vnet_dev_port_update_tx_node_runtime (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+  vnet_dev_port_validate (vm, port);
+
+  foreach_vnet_dev_port_tx_queue (q, port)
+    {
+      u32 ti;
+      clib_bitmap_foreach (ti, q->assigned_threads)
+       {
+         vlib_main_t *tvm = vlib_get_main_by_index (ti);
+         vlib_node_runtime_t *nr =
+           vlib_node_get_runtime (tvm, port->intf.tx_node_index);
+         vnet_dev_tx_node_runtime_t *tnr = vnet_dev_get_tx_node_runtime (nr);
+         tnr->hw_if_index = port->intf.hw_if_index;
+         tnr->tx_queue = q;
+       }
+    }
+}
+
+void
+vnet_dev_port_stop (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+  vnet_dev_t *dev = port->dev;
+  vnet_dev_rt_op_t *ops = 0;
+
+  log_debug (dev, "stopping port %u", port->port_id);
+
+  foreach_vnet_dev_port_rx_queue (q, port)
+    if (q->started)
+      {
+       vnet_dev_rt_op_t op = {
+         .type = VNET_DEV_RT_OP_TYPE_RX_QUEUE,
+         .action = VNET_DEV_RT_OP_ACTION_STOP,
+         .thread_index = q->rx_thread_index,
+         .rx_queue = q,
+       };
+       vec_add1 (ops, op);
+      }
+
+  vnet_dev_rt_exec_ops (vm, dev, ops, vec_len (ops));
+  vec_free (ops);
+
+  port->port_ops.stop (vm, port);
+
+  foreach_vnet_dev_port_rx_queue (q, port)
+    {
+      q->started = 0;
+      log_debug (dev, "port %u rx queue %u stopped", port->port_id,
+                q->queue_id);
+    }
+
+  foreach_vnet_dev_port_tx_queue (q, port)
+    {
+      q->started = 0;
+      log_debug (dev, "port %u tx queue %u stopped", port->port_id,
+                q->queue_id);
+    }
+
+  log_debug (dev, "port %u stopped", port->port_id);
+  port->started = 0;
+}
+
+vnet_dev_rv_t
+vnet_dev_port_start_all_rx_queues (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+  vnet_dev_rv_t rv = VNET_DEV_OK;
+
+  vnet_dev_port_validate (vm, port);
+
+  foreach_vnet_dev_port_rx_queue (q, port)
+    {
+      rv = vnet_dev_rx_queue_start (vm, q);
+      if (rv != VNET_DEV_OK)
+       return rv;
+    }
+  return rv;
+}
+
+vnet_dev_rv_t
+vnet_dev_port_start_all_tx_queues (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+  vnet_dev_rv_t rv = VNET_DEV_OK;
+
+  vnet_dev_port_validate (vm, port);
+
+  foreach_vnet_dev_port_tx_queue (q, port)
+    {
+      rv = vnet_dev_tx_queue_start (vm, q);
+      if (rv != VNET_DEV_OK)
+       return rv;
+    }
+  return rv;
+}
+
+vnet_dev_rv_t
+vnet_dev_port_start (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+  vnet_dev_t *dev = port->dev;
+  vnet_dev_rt_op_t *ops = 0;
+  vnet_dev_rv_t rv;
+
+  vnet_dev_port_validate (vm, port);
+
+  log_debug (dev, "starting port %u", port->port_id);
+
+  vnet_dev_port_update_tx_node_runtime (vm, port);
+
+  if ((rv = port->port_ops.start (vm, port)) != VNET_DEV_OK)
+    {
+      vnet_dev_port_stop (vm, port);
+      return rv;
+    }
+
+  foreach_vnet_dev_port_rx_queue (q, port)
+    if (q->enabled)
+      {
+       vnet_dev_rt_op_t op = {
+         .type = VNET_DEV_RT_OP_TYPE_RX_QUEUE,
+         .action = VNET_DEV_RT_OP_ACTION_START,
+         .thread_index = q->rx_thread_index,
+         .rx_queue = q,
+       };
+       vec_add1 (ops, op);
+      }
+
+  vnet_dev_rt_exec_ops (vm, dev, ops, vec_len (ops));
+  vec_free (ops);
+
+  foreach_vnet_dev_port_rx_queue (q, port)
+    if (q->enabled)
+      {
+       log_debug (dev, "port %u rx queue %u started", port->port_id,
+                  q->queue_id);
+       q->started = 1;
+      }
+
+  foreach_vnet_dev_port_tx_queue (q, port)
+    if (q->enabled)
+      {
+       log_debug (dev, "port %u tx queue %u started", port->port_id,
+                  q->queue_id);
+       q->started = 1;
+      }
+
+  port->started = 1;
+  log_debug (dev, "port %u started", port->port_id);
+
+  return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+vnet_dev_port_add (vlib_main_t *vm, vnet_dev_t *dev, vnet_dev_port_id_t id,
+                  vnet_dev_port_add_args_t *args)
+{
+  vnet_dev_port_t **pp, *port;
+  vnet_dev_rv_t rv = VNET_DEV_OK;
+
+  ASSERT (args->port.attr.type != VNET_DEV_PORT_TYPE_UNKNOWN);
+  ASSERT (args->port.attr.max_supported_frame_size);
+
+  port =
+    vnet_dev_alloc_with_data (sizeof (vnet_dev_port_t), args->port.data_size);
+  pool_get (dev->ports, pp);
+  pp[0] = port;
+  clib_memcpy (vnet_dev_get_port_data (port), args->port.initial_data,
+              args->port.data_size);
+  port->port_id = id;
+  port->index = pp - dev->ports;
+  port->dev = dev;
+  port->attr = args->port.attr;
+  port->rx_queue_config = args->rx_queue.config;
+  port->tx_queue_config = args->tx_queue.config;
+  port->rx_queue_ops = args->rx_queue.ops;
+  port->tx_queue_ops = args->tx_queue.ops;
+  port->port_ops = args->port.ops;
+  port->rx_node = *args->rx_node;
+  port->tx_node = *args->tx_node;
+
+  /* defaults out of port attributes */
+  port->max_frame_size = args->port.attr.max_supported_frame_size;
+  port->primary_hw_addr = args->port.attr.hw_addr;
+
+  if (port->port_ops.alloc)
+    rv = port->port_ops.alloc (vm, port);
+
+  if (rv == VNET_DEV_OK)
+    port->initialized = 1;
+
+  return rv;
+}
+
+vnet_dev_rv_t
+vnet_dev_port_cfg_change_req_validate (vlib_main_t *vm, vnet_dev_port_t *port,
+                                      vnet_dev_port_cfg_change_req_t *req)
+{
+  vnet_dev_rv_t rv;
+  vnet_dev_hw_addr_t *addr;
+  int found;
+
+  if (req->validated)
+    return VNET_DEV_OK;
+
+  switch (req->type)
+    {
+    case VNET_DEV_PORT_CFG_MAX_FRAME_SIZE:
+      if (req->max_frame_size > port->attr.max_supported_frame_size)
+       return VNET_DEV_ERR_INVALID_VALUE;
+      if (req->max_frame_size == port->max_frame_size)
+       return VNET_DEV_ERR_NO_CHANGE;
+      break;
+
+    case VNET_DEV_PORT_CFG_PROMISC_MODE:
+      if (req->promisc == port->promisc)
+       return VNET_DEV_ERR_NO_CHANGE;
+      break;
+
+    case VNET_DEV_PORT_CFG_CHANGE_PRIMARY_HW_ADDR:
+      if (clib_memcmp (&req->addr, &port->primary_hw_addr,
+                      sizeof (vnet_dev_hw_addr_t)) == 0)
+       return VNET_DEV_ERR_NO_CHANGE;
+      break;
+
+    case VNET_DEV_PORT_CFG_ADD_SECONDARY_HW_ADDR:
+      pool_foreach (addr, port->secondary_hw_addr)
+       if (clib_memcmp (addr, &req->addr, sizeof (*addr)) == 0)
+         return VNET_DEV_ERR_ALREADY_EXISTS;
+      break;
+
+    case VNET_DEV_PORT_CFG_REMOVE_SECONDARY_HW_ADDR:
+      found = 0;
+      pool_foreach (addr, port->secondary_hw_addr)
+       if (clib_memcmp (addr, &req->addr, sizeof (*addr)) == 0)
+         found = 1;
+      if (!found)
+       return VNET_DEV_ERR_NO_SUCH_ENTRY;
+      break;
+
+    default:
+      break;
+    }
+
+  if (port->port_ops.config_change_validate)
+    {
+      rv = port->port_ops.config_change_validate (vm, port, req);
+      if (rv != VNET_DEV_OK)
+       return rv;
+    }
+
+  req->validated = 1;
+  return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+vnet_dev_port_cfg_change (vlib_main_t *vm, vnet_dev_port_t *port,
+                         vnet_dev_port_cfg_change_req_t *req)
+{
+  vnet_dev_rv_t rv = VNET_DEV_OK;
+  vnet_dev_hw_addr_t *a;
+
+  vnet_dev_port_validate (vm, port);
+
+  vnet_dev_port_cfg_change_req_validate (vm, port, req);
+
+  if (port->port_ops.config_change)
+    rv = port->port_ops.config_change (vm, port, req);
+
+  if (rv != VNET_DEV_OK)
+    return rv;
+
+  switch (req->type)
+    {
+    case VNET_DEV_PORT_CFG_MAX_FRAME_SIZE:
+      port->max_frame_size = req->max_frame_size;
+      break;
+
+    case VNET_DEV_PORT_CFG_PROMISC_MODE:
+      port->promisc = req->promisc;
+      break;
+
+    case VNET_DEV_PORT_CFG_CHANGE_PRIMARY_HW_ADDR:
+      clib_memcpy (&port->primary_hw_addr, &req->addr,
+                  sizeof (vnet_dev_hw_addr_t));
+      break;
+
+    case VNET_DEV_PORT_CFG_ADD_SECONDARY_HW_ADDR:
+      pool_get (port->secondary_hw_addr, a);
+      clib_memcpy (a, &req->addr, sizeof (vnet_dev_hw_addr_t));
+      break;
+
+    case VNET_DEV_PORT_CFG_REMOVE_SECONDARY_HW_ADDR:
+      pool_foreach (a, port->secondary_hw_addr)
+       if (clib_memcmp (a, &req->addr, sizeof (vnet_dev_hw_addr_t)) == 0)
+         {
+           pool_put (port->secondary_hw_addr, a);
+           break;
+         }
+      break;
+
+    default:
+      break;
+    }
+
+  return VNET_DEV_OK;
+}
+
+void
+vnet_dev_port_state_change (vlib_main_t *vm, vnet_dev_port_t *port,
+                           vnet_dev_port_state_changes_t changes)
+{
+  vnet_main_t *vnm = vnet_get_main ();
+
+  vnet_dev_port_validate (vm, port);
+
+  if (changes.change.link_speed)
+    {
+      port->speed = changes.link_speed;
+      if (port->interface_created)
+       vnet_hw_interface_set_link_speed (vnm, port->intf.hw_if_index,
+                                         changes.link_speed);
+      log_debug (port->dev, "port speed changed to %u", changes.link_speed);
+    }
+
+  if (changes.change.link_state)
+    {
+      port->link_up = changes.link_state;
+      if (port->interface_created)
+       vnet_hw_interface_set_flags (
+         vnm, port->intf.hw_if_index,
+         changes.link_state ? VNET_HW_INTERFACE_FLAG_LINK_UP : 0);
+      log_debug (port->dev, "port link state changed to %s",
+                changes.link_state ? "up" : "down");
+    }
+}
+
+void
+vnet_dev_port_add_counters (vlib_main_t *vm, vnet_dev_port_t *port,
+                           vnet_dev_counter_t *counters, u16 n_counters)
+{
+  vnet_dev_port_validate (vm, port);
+
+  port->counter_main =
+    vnet_dev_counters_alloc (vm, counters, n_counters, "%s port %u counters",
+                            port->dev->device_id, port->port_id);
+}
+
+void
+vnet_dev_port_free_counters (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+  vnet_dev_port_validate (vm, port);
+
+  if (port->counter_main)
+    vnet_dev_counters_free (vm, port->counter_main);
+}
+
+vnet_dev_rv_t
+vnet_dev_port_if_create (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+  vnet_main_t *vnm = vnet_get_main ();
+  u16 n_threads = vlib_get_n_threads ();
+  vnet_dev_main_t *dm = &vnet_dev_main;
+  vnet_dev_t *dev = port->dev;
+  vnet_dev_port_t **pp;
+  vnet_dev_rv_t rv;
+  u16 ti = 0;
+
+  if (port->intf.name[0] == 0)
+    {
+      u8 *s;
+      s = format (0, "%s%u/%u",
+                 dm->drivers[port->dev->driver_index].registration->name,
+                 port->dev->index, port->index);
+      u32 n = vec_len (s);
+
+      if (n >= sizeof (port->intf.name))
+       {
+         vec_free (s);
+         return VNET_DEV_ERR_BUG;
+       }
+      clib_memcpy (port->intf.name, s, n);
+      port->intf.name[n] = 0;
+      vec_free (s);
+    }
+
+  log_debug (
+    dev, "allocating %u rx queues with size %u and %u tx queues with size %u",
+    port->intf.num_rx_queues, port->intf.rxq_sz, port->intf.num_tx_queues,
+    port->intf.txq_sz);
+
+  for (int i = 0; i < port->intf.num_rx_queues; i++)
+    if ((rv = vnet_dev_rx_queue_alloc (vm, port, port->intf.rxq_sz)) !=
+       VNET_DEV_OK)
+      goto error;
+
+  for (u32 i = 0; i < port->intf.num_tx_queues; i++)
+    if ((rv = vnet_dev_tx_queue_alloc (vm, port, port->intf.txq_sz)) !=
+       VNET_DEV_OK)
+      goto error;
+
+  foreach_vnet_dev_port_tx_queue (q, port)
+    {
+      q->assigned_threads = clib_bitmap_set (q->assigned_threads, ti, 1);
+      log_debug (dev, "port %u tx queue %u assigned to thread %u",
+                port->port_id, q->queue_id, ti);
+      if (++ti >= n_threads)
+       break;
+    }
+
+  /* pool of port pointers helps us to assign unique dev_instance */
+  pool_get (dm->ports_by_dev_instance, pp);
+  port->intf.dev_instance = pp - dm->ports_by_dev_instance;
+  pp[0] = port;
+
+  if (port->attr.type == VNET_DEV_PORT_TYPE_ETHERNET)
+    {
+      vnet_device_class_t *dev_class;
+      vnet_dev_driver_t *driver;
+      vnet_sw_interface_t *sw;
+      vnet_hw_interface_t *hw;
+      u32 rx_node_index;
+
+      driver = pool_elt_at_index (dm->drivers, dev->driver_index);
+
+      /* hack to provide per-port tx node function */
+      dev_class = vnet_get_device_class (vnm, driver->dev_class_index);
+      dev_class->tx_fn_registrations = port->tx_node.registrations;
+      dev_class->format_tx_trace = port->tx_node.format_trace;
+      dev_class->tx_function_error_counters = port->tx_node.error_counters;
+      dev_class->tx_function_n_errors = port->tx_node.n_error_counters;
+
+      /* create new interface including tx and output nodes */
+      port->intf.hw_if_index = vnet_eth_register_interface (
+       vnm, &(vnet_eth_interface_registration_t){
+              .address = port->primary_hw_addr.eth_mac,
+              .max_frame_size = port->max_frame_size,
+              .dev_class_index = driver->dev_class_index,
+              .dev_instance = port->intf.dev_instance,
+              .cb.set_max_frame_size = vnet_dev_port_set_max_frame_size,
+              .cb.flag_change = vnet_dev_port_eth_flag_change,
+            });
+
+      sw = vnet_get_hw_sw_interface (vnm, port->intf.hw_if_index);
+      hw = vnet_get_hw_interface (vnm, port->intf.hw_if_index);
+      port->intf.sw_if_index = sw->sw_if_index;
+      vnet_hw_interface_set_flags (
+       vnm, port->intf.hw_if_index,
+       port->link_up ? VNET_HW_INTERFACE_FLAG_LINK_UP : 0);
+      if (port->speed)
+       vnet_hw_interface_set_link_speed (vnm, port->intf.hw_if_index,
+                                         port->speed);
+
+      port->intf.tx_node_index = hw->tx_node_index;
+
+      /* create / reuse rx node */
+      if (vec_len (dm->free_rx_node_indices))
+       {
+         vlib_node_t *n;
+         rx_node_index = vec_pop (dm->free_rx_node_indices);
+         vlib_node_rename (vm, rx_node_index, "%s-rx", port->intf.name);
+         n = vlib_get_node (vm, rx_node_index);
+         n->function = vlib_node_get_preferred_node_fn_variant (
+           vm, port->rx_node.registrations);
+         n->format_trace = port->rx_node.format_trace;
+         vlib_register_errors (vm, rx_node_index,
+                               port->rx_node.n_error_counters, 0,
+                               port->rx_node.error_counters);
+       }
+      else
+       {
+         dev_class->format_tx_trace = port->tx_node.format_trace;
+         dev_class->tx_function_error_counters = port->tx_node.error_counters;
+         dev_class->tx_function_n_errors = port->tx_node.n_error_counters;
+         vlib_node_registration_t rx_node_reg = {
+           .sibling_of = "port-rx-eth",
+           .type = VLIB_NODE_TYPE_INPUT,
+           .state = VLIB_NODE_STATE_DISABLED,
+           .flags = VLIB_NODE_FLAG_TRACE_SUPPORTED,
+           .node_fn_registrations = port->rx_node.registrations,
+           .format_trace = port->rx_node.format_trace,
+           .error_counters = port->rx_node.error_counters,
+           .n_errors = port->rx_node.n_error_counters,
+         };
+         rx_node_index =
+           vlib_register_node (vm, &rx_node_reg, "%s-rx", port->intf.name);
+       }
+      port->rx_node_assigned = 1;
+      port->intf.rx_node_index = rx_node_index;
+      port->intf.rx_next_index =
+       vnet_dev_default_next_index_by_port_type[port->attr.type];
+
+      vlib_worker_thread_node_runtime_update ();
+      log_debug (dev,
+                "ethernet interface created, hw_if_index %u sw_if_index %u "
+                "rx_node_index %u tx_node_index %u",
+                port->intf.hw_if_index, port->intf.sw_if_index,
+                port->intf.rx_node_index, port->intf.tx_node_index);
+    }
+
+  port->interface_created = 1;
+  foreach_vnet_dev_port_rx_queue (q, port)
+    {
+      vnet_buffer (&q->buffer_template)->sw_if_index[VLIB_RX] =
+       port->intf.sw_if_index;
+      /* poison to catch node not calling runtime update function */
+      q->next_index = ~0;
+      vnet_dev_rx_queue_rt_request (
+       vm, q, (vnet_dev_rx_queue_rt_req_t){ .update_next_index = 1 });
+    }
+
+  vnet_dev_port_update_tx_node_runtime (vm, port);
+
+  if (port->port_ops.init)
+    rv = port->port_ops.init (vm, port);
+
+error:
+  if (rv != VNET_DEV_OK)
+    vnet_dev_port_if_remove (vm, port);
+  return rv;
+}
+
+vnet_dev_rv_t
+vnet_dev_port_if_remove (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+  vnet_dev_main_t *dm = &vnet_dev_main;
+  vnet_main_t *vnm = vnet_get_main ();
+
+  vnet_dev_port_validate (vm, port);
+
+  if (port->started)
+    vnet_dev_port_stop (vm, port);
+
+  if (port->rx_node_assigned)
+    {
+      vlib_node_rename (vm, port->intf.rx_node_index, "deleted-%u",
+                       port->intf.rx_node_index);
+      vec_add1 (dm->free_rx_node_indices, port->intf.rx_node_index);
+      port->rx_node_assigned = 0;
+    }
+
+  if (port->interface_created)
+    {
+      vlib_worker_thread_barrier_sync (vm);
+      vnet_delete_hw_interface (vnm, port->intf.hw_if_index);
+      vlib_worker_thread_barrier_release (vm);
+      pool_put_index (dm->ports_by_dev_instance, port->intf.dev_instance);
+      port->interface_created = 0;
+    }
+
+  port->intf = (typeof (port->intf)){};
+
+  if (port->port_ops.deinit)
+    port->port_ops.deinit (vm, port);
+
+  foreach_vnet_dev_port_tx_queue (q, port)
+    vnet_dev_tx_queue_free (vm, q);
+
+  foreach_vnet_dev_port_rx_queue (q, port)
+    vnet_dev_rx_queue_free (vm, q);
+
+  vnet_dev_port_free_counters (vm, port);
+
+  return VNET_DEV_OK;
+}
+void
+vnet_dev_port_clear_counters (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+  if (port->counter_main)
+    vnet_dev_counters_clear (vm, port->counter_main);
+
+  foreach_vnet_dev_port_rx_queue (q, port)
+    if (q->counter_main)
+      vnet_dev_counters_clear (vm, q->counter_main);
+
+  foreach_vnet_dev_port_tx_queue (q, port)
+    if (q->counter_main)
+      vnet_dev_counters_clear (vm, q->counter_main);
+
+  log_notice (port->dev, "counters cleared on port %u", port->port_id);
+}
diff --git a/src/vnet/dev/process.c b/src/vnet/dev/process.c
new file mode 100644 (file)
index 0000000..3c1f0b8
--- /dev/null
@@ -0,0 +1,474 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include "vppinfra/error.h"
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/log.h>
+
+VLIB_REGISTER_LOG_CLASS (dev_log, static) = {
+  .class_name = "dev",
+  .subclass_name = "process",
+};
+
+typedef enum
+{
+  VNET_DEV_EVENT_PERIODIC_STOP,
+  VNET_DEV_EVENT_PERIODIC_START,
+  VNET_DEV_EVENT_PORT_CONFIG_CHANGE_REQ,
+  VNET_DEV_EVENT_PROCESS_QUIT,
+  VNET_DEV_EVENT_CALL_OP,
+  VNET_DEV_EVENT_CALL_OP_NO_RV,
+  VNET_DEV_EVENT_CALL_OP_NO_WAIT,
+  VNET_DEV_EVENT_CALL_PORT_OP,
+  VNET_DEV_EVENT_CALL_PORT_OP_NO_RV,
+  VNET_DEV_EVENT_CALL_PORT_OP_NO_WAIT,
+  VNET_DEV_EVENT_CLOCK = ~0
+} __clib_packed vnet_dev_event_t;
+
+typedef struct
+{
+  vnet_dev_event_t event;
+  u8 reply_needed : 1;
+  u32 calling_process_index;
+  union
+  {
+    struct
+    {
+      vnet_dev_port_t *port;
+      vnet_dev_port_cfg_change_req_t *change_req;
+    } port_cfg_change;
+    struct
+    {
+      vnet_dev_op_t *op;
+    } call_op;
+    struct
+    {
+      vnet_dev_op_no_rv_t *op;
+    } call_op_no_rv;
+    struct
+    {
+      vnet_dev_op_no_rv_t *op;
+    } call_op_no_wait;
+    struct
+    {
+      vnet_dev_port_op_t *op;
+      vnet_dev_port_t *port;
+    } call_port_op;
+    struct
+    {
+      vnet_dev_port_op_no_rv_t *op;
+      vnet_dev_port_t *port;
+    } call_port_op_no_rv;
+    struct
+    {
+      vnet_dev_port_op_no_rv_t *op;
+      vnet_dev_port_t *port;
+    } call_port_op_no_wait;
+  };
+} vnet_dev_event_data_t;
+
+static vnet_dev_rv_t
+vnet_dev_process_one_event (vlib_main_t *vm, vnet_dev_t *dev,
+                           vnet_dev_event_data_t *ed)
+{
+  vnet_dev_port_t *p;
+  vnet_dev_rv_t rv = VNET_DEV_OK;
+
+  switch (ed->event)
+    {
+    case VNET_DEV_EVENT_CLOCK:
+      break;
+    case VNET_DEV_EVENT_PROCESS_QUIT:
+      log_debug (dev, "quit requested");
+      dev->process_node_quit = 1;
+      break;
+    case VNET_DEV_EVENT_PERIODIC_START:
+      log_debug (dev, "periodic start");
+      dev->process_node_periodic = 1;
+      break;
+    case VNET_DEV_EVENT_PERIODIC_STOP:
+      log_debug (dev, "periodic stop");
+      dev->process_node_periodic = 0;
+      break;
+    case VNET_DEV_EVENT_PORT_CONFIG_CHANGE_REQ:
+      log_debug (dev, "port config change");
+      p = ed->port_cfg_change.port;
+      rv = vnet_dev_port_cfg_change (vm, p, ed->port_cfg_change.change_req);
+      break;
+    case VNET_DEV_EVENT_CALL_OP:
+      log_debug (dev, "call op");
+      rv = ed->call_op.op (vm, dev);
+      break;
+    case VNET_DEV_EVENT_CALL_OP_NO_RV:
+      log_debug (dev, "call op no rv");
+      ed->call_op_no_rv.op (vm, dev);
+      break;
+    case VNET_DEV_EVENT_CALL_OP_NO_WAIT:
+      log_debug (dev, "call op no wait");
+      ed->call_op_no_wait.op (vm, dev);
+      break;
+    case VNET_DEV_EVENT_CALL_PORT_OP:
+      log_debug (dev, "call port op");
+      rv = ed->call_port_op.op (vm, ed->call_port_op.port);
+      break;
+    case VNET_DEV_EVENT_CALL_PORT_OP_NO_RV:
+      log_debug (dev, "call port op no rv");
+      ed->call_port_op_no_rv.op (vm, ed->call_port_op_no_rv.port);
+      break;
+    case VNET_DEV_EVENT_CALL_PORT_OP_NO_WAIT:
+      log_debug (dev, "call port op no wait");
+      ed->call_port_op_no_wait.op (vm, ed->call_port_op_no_wait.port);
+      break;
+    default:
+      ASSERT (0);
+    }
+  return rv;
+}
+
+static uword
+vnet_dev_process (vlib_main_t *vm, vlib_node_runtime_t *rt, vlib_frame_t *f)
+{
+  vnet_dev_main_t *dm = &vnet_dev_main;
+  vnet_dev_periodic_op_t *pop, *pops = 0;
+  f64 next = CLIB_F64_MAX;
+  vnet_dev_event_data_t *event_data = 0, *new_event_data, *ed;
+
+  vnet_dev_t *dev =
+    *((vnet_dev_t **) vlib_node_get_runtime_data (vm, rt->node_index));
+
+  log_debug (dev, "process '%U' started", format_vlib_node_name, vm,
+            rt->node_index);
+
+  while (dev->process_node_quit == 0)
+    {
+      uword event_type;
+      f64 now = vlib_time_now (vm);
+
+      if (dev->process_node_periodic)
+       vlib_process_wait_for_event_or_clock (vm, next > now ? next - now : 0);
+      else
+       vlib_process_wait_for_event (vm);
+
+      new_event_data = vlib_process_get_event_data (vm, &event_type);
+
+      if (new_event_data)
+       {
+         vec_append (event_data, new_event_data);
+         vlib_process_put_event_data (vm, new_event_data);
+
+         ASSERT (event_type == 0);
+
+         vec_foreach (ed, event_data)
+           {
+             vnet_dev_rv_t rv;
+             rv = vnet_dev_process_one_event (vm, dev, ed);
+             if (ed->reply_needed)
+               vlib_process_signal_event (vm, ed->calling_process_index,
+                                          ed->event, rv);
+           }
+         vec_reset_length (event_data);
+       }
+
+      next = CLIB_F64_MAX;
+      pool_foreach (pop, dev->periodic_ops)
+       {
+         if (pop->last_run + pop->interval < now)
+           {
+             vec_add1 (pops, *pop);
+             pop->last_run = now;
+           }
+         if (pop->last_run + pop->interval < next)
+           next = pop->last_run + pop->interval;
+       }
+
+      vec_foreach (pop, pops)
+       {
+         switch (pop->type)
+           {
+           case VNET_DEV_PERIODIC_OP_TYPE_DEV:
+             pop->dev_op (vm, pop->dev);
+             break;
+           case VNET_DEV_PERIODIC_OP_TYPE_PORT:
+             pop->port_op (vm, pop->port);
+             break;
+           default:
+             ASSERT (0);
+           }
+       }
+      vec_reset_length (pops);
+    }
+
+  log_debug (dev, "process '%U' quit", format_vlib_node_name, vm,
+            rt->node_index);
+  vlib_node_set_state (vm, rt->node_index, VLIB_NODE_STATE_DISABLED);
+  vlib_node_rename (vm, rt->node_index, "deleted-%u", rt->node_index);
+
+  /* add node index to the freelist */
+  vec_add1 (dm->free_process_node_indices, rt->node_index);
+  vec_free (pops);
+  vec_free (event_data);
+  return 0;
+}
+
+vnet_dev_rv_t
+vnet_dev_process_create (vlib_main_t *vm, vnet_dev_t *dev)
+{
+  vnet_dev_main_t *dm = &vnet_dev_main;
+  vlib_node_t *n;
+  uword l;
+
+  l = vec_len (dm->free_process_node_indices);
+  if (l > 0)
+    {
+      n = vlib_get_node (vm, dm->free_process_node_indices[l - 1]);
+      if (n->function != vnet_dev_process)
+       {
+         vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, n->index);
+         n->function = vnet_dev_process;
+         rt->function = vnet_dev_process;
+       }
+      vlib_node_rename (vm, n->index, "%s-process", dev->device_id);
+      vlib_node_set_state (vm, n->index, VLIB_NODE_STATE_POLLING);
+      vec_set_len (dm->free_process_node_indices, l - 1);
+      log_debug (dev, "process node '%U' (%u) reused", format_vlib_node_name,
+                vm, n->index, n->index);
+    }
+  else
+    {
+      vlib_node_registration_t r = {
+       .function = vnet_dev_process,
+       .type = VLIB_NODE_TYPE_PROCESS,
+       .process_log2_n_stack_bytes = 16,
+       .runtime_data_bytes = sizeof (void *),
+      };
+
+      vlib_register_node (vm, &r, "%s-process", dev->device_id);
+
+      n = vlib_get_node (vm, r.index);
+      log_debug (dev, "process node '%U' (%u) created", format_vlib_node_name,
+                vm, r.index, r.index);
+    }
+
+  dev->process_node_index = n->index;
+  *(vnet_dev_t **) vlib_node_get_runtime_data (vm, n->index) = dev;
+  vlib_start_process (vm, n->runtime_index);
+
+  return VNET_DEV_OK;
+}
+
+static void
+vnet_dev_process_event_send (vlib_main_t *vm, vnet_dev_t *dev,
+                            vnet_dev_event_data_t ed)
+{
+  vnet_dev_event_data_t *edp = vlib_process_signal_event_data (
+    vm, dev->process_node_index, 0, 1, sizeof (ed));
+  *edp = ed;
+}
+
+static vnet_dev_rv_t
+vnet_dev_process_event_send_and_wait (vlib_main_t *vm, vnet_dev_t *dev,
+                                     vnet_dev_event_data_t ed)
+{
+  uword event, *event_data = 0;
+  vnet_dev_rv_t rv;
+
+  ed.calling_process_index = vlib_get_current_process_node_index (vm);
+
+  if (ed.calling_process_index == dev->process_node_index)
+    return vnet_dev_process_one_event (vm, dev, &ed);
+
+  ed.reply_needed = 1;
+  vnet_dev_process_event_send (vm, dev, ed);
+  vlib_process_wait_for_event_or_clock (vm, 5.0);
+  event = vlib_process_get_events (vm, &event_data);
+  if (event != ed.event)
+    {
+      log_err (dev, "%s",
+              event == VNET_DEV_EVENT_CLOCK ?
+                      "timeout waiting for process node to respond" :
+                      "unexpected event received");
+      rv = VNET_DEV_ERR_PROCESS_REPLY;
+    }
+  else
+    rv = event_data[0];
+  vec_free (event_data);
+  return rv;
+}
+
+void
+vnet_dev_process_quit (vlib_main_t *vm, vnet_dev_t *dev)
+{
+  vnet_dev_event_data_t ed = { .event = VNET_DEV_EVENT_PROCESS_QUIT };
+  vnet_dev_process_event_send_and_wait (vm, dev, ed);
+}
+
+static int
+_vnet_dev_poll_add (vlib_main_t *vm, vnet_dev_t *dev,
+                   vnet_dev_periodic_op_t pop)
+{
+  const vnet_dev_event_data_t ed = { .event = VNET_DEV_EVENT_PERIODIC_START };
+  vnet_dev_periodic_op_t *p;
+
+  pool_foreach (p, dev->periodic_ops)
+    if (p->op == pop.op && p->arg == pop.arg)
+      return 0;
+
+  pool_get_zero (dev->periodic_ops, p);
+  *p = pop;
+  if (pool_elts (dev->periodic_ops) == 1)
+    vnet_dev_process_event_send (vm, dev, ed);
+  return 1;
+}
+
+static int
+_vnet_dev_poll_remove (vlib_main_t *vm, vnet_dev_t *dev, void *op, void *arg)
+{
+  const vnet_dev_event_data_t ed = { .event = VNET_DEV_EVENT_PERIODIC_STOP };
+  vnet_dev_periodic_op_t *pop;
+
+  pool_foreach (pop, dev->periodic_ops)
+    if (pop->op == op && pop->arg == arg)
+      {
+       pool_put (dev->periodic_ops, pop);
+       if (pool_elts (dev->periodic_ops) == 0)
+         vnet_dev_process_event_send (vm, dev, ed);
+       return 1;
+      }
+  return 0;
+}
+
+void
+vnet_dev_poll_dev_add (vlib_main_t *vm, vnet_dev_t *dev, f64 interval,
+                      vnet_dev_op_no_rv_t *dev_op)
+{
+  vnet_dev_periodic_op_t pop = {
+    .interval = interval,
+    .type = VNET_DEV_PERIODIC_OP_TYPE_DEV,
+    .dev_op = dev_op,
+    .dev = dev,
+  };
+
+  if (_vnet_dev_poll_add (vm, dev, pop) == 0)
+    log_warn (dev, "poll_dev_add: op already exists, not added");
+}
+
+void
+vnet_dev_poll_dev_remove (vlib_main_t *vm, vnet_dev_t *dev,
+                         vnet_dev_op_no_rv_t *dev_op)
+{
+  if (_vnet_dev_poll_remove (vm, dev, (void *) dev_op, (void *) dev) == 0)
+    log_warn (dev, "poll_dev_remove: op not found, not removed");
+}
+
+void
+vnet_dev_poll_port_add (vlib_main_t *vm, vnet_dev_port_t *port, f64 interval,
+                       vnet_dev_port_op_no_rv_t *port_op)
+{
+  vnet_dev_t *dev = port->dev;
+  vnet_dev_periodic_op_t pop = {
+    .interval = interval,
+    .type = VNET_DEV_PERIODIC_OP_TYPE_PORT,
+    .port_op = port_op,
+    .port = port,
+  };
+
+  if (_vnet_dev_poll_add (vm, dev, pop) == 0)
+    log_warn (dev, "poll_port_add: op already exists, not added");
+}
+
+void
+vnet_dev_poll_port_remove (vlib_main_t *vm, vnet_dev_port_t *port,
+                          vnet_dev_port_op_no_rv_t *port_op)
+{
+  vnet_dev_t *dev = port->dev;
+  if (_vnet_dev_poll_remove (vm, dev, (void *) port_op, (void *) port) == 0)
+    log_warn (dev, "poll_port_remove: op not found, not removed");
+}
+
+vnet_dev_rv_t
+vnet_dev_process_port_cfg_change_req (vlib_main_t *vm, vnet_dev_port_t *port,
+                                     vnet_dev_port_cfg_change_req_t *pccr)
+{
+  const vnet_dev_event_data_t ed = {
+      .event = VNET_DEV_EVENT_PORT_CONFIG_CHANGE_REQ,
+      .port_cfg_change = {
+        .port = port,
+      .change_req = pccr,
+      },
+    };
+
+  return vnet_dev_process_event_send_and_wait (vm, port->dev, ed);
+}
+
+vnet_dev_rv_t
+vnet_dev_process_call_op (vlib_main_t *vm, vnet_dev_t *dev, vnet_dev_op_t *op)
+{
+  const vnet_dev_event_data_t ed = {
+    .event = VNET_DEV_EVENT_CALL_OP,
+    .call_op.op = op,
+  };
+
+  return vnet_dev_process_event_send_and_wait (vm, dev, ed);
+}
+
+vnet_dev_rv_t
+vnet_dev_process_call_op_no_rv (vlib_main_t *vm, vnet_dev_t *dev,
+                               vnet_dev_op_no_rv_t *op)
+{
+  const vnet_dev_event_data_t ed = {
+    .event = VNET_DEV_EVENT_CALL_OP_NO_RV,
+    .call_op_no_rv.op = op,
+  };
+
+  return vnet_dev_process_event_send_and_wait (vm, dev, ed);
+}
+
+void
+vnet_dev_process_call_op_no_wait (vlib_main_t *vm, vnet_dev_t *dev,
+                                 vnet_dev_op_no_rv_t *op)
+{
+  const vnet_dev_event_data_t ed = {
+    .event = VNET_DEV_EVENT_CALL_OP_NO_WAIT,
+    .call_op_no_rv.op = op,
+  };
+
+  vnet_dev_process_event_send (vm, dev, ed);
+}
+
+vnet_dev_rv_t
+vnet_dev_process_call_port_op (vlib_main_t *vm, vnet_dev_port_t *port,
+                              vnet_dev_port_op_t *op)
+{
+  const vnet_dev_event_data_t ed = {
+    .event = VNET_DEV_EVENT_CALL_PORT_OP,
+    .call_port_op = { .op = op, .port = port },
+  };
+
+  return vnet_dev_process_event_send_and_wait (vm, port->dev, ed);
+}
+
+vnet_dev_rv_t
+vnet_dev_process_call_port_op_no_rv (vlib_main_t *vm, vnet_dev_port_t *port,
+                                    vnet_dev_port_op_no_rv_t *op)
+{
+  const vnet_dev_event_data_t ed = {
+    .event = VNET_DEV_EVENT_CALL_PORT_OP_NO_RV,
+    .call_port_op_no_rv = { .op = op, .port = port },
+  };
+
+  return vnet_dev_process_event_send_and_wait (vm, port->dev, ed);
+}
+
+void
+vnet_dev_process_call_port_op_no_wait (vlib_main_t *vm, vnet_dev_port_t *port,
+                                      vnet_dev_port_op_no_rv_t *op)
+{
+  const vnet_dev_event_data_t ed = {
+    .event = VNET_DEV_EVENT_CALL_PORT_OP_NO_WAIT,
+    .call_port_op_no_wait = { .op = op, .port = port },
+  };
+
+  vnet_dev_process_event_send (vm, port->dev, ed);
+}
diff --git a/src/vnet/dev/process.h b/src/vnet/dev/process.h
new file mode 100644 (file)
index 0000000..9223973
--- /dev/null
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _VNET_DEV_PROCESS_H_
+#define _VNET_DEV_PROCESS_H_
+
+#include <vppinfra/clib.h>
+
+#endif /* _VNET_DEV_PROCESS_H_ */
diff --git a/src/vnet/dev/queue.c b/src/vnet/dev/queue.c
new file mode 100644 (file)
index 0000000..9a016a6
--- /dev/null
@@ -0,0 +1,227 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/counters.h>
+#include <vnet/dev/log.h>
+
+VLIB_REGISTER_LOG_CLASS (dev_log, static) = {
+  .class_name = "dev",
+  .subclass_name = "error",
+};
+
+void
+vnet_dev_rx_queue_free (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq)
+{
+  vnet_dev_port_t *port = rxq->port;
+  vnet_dev_t *dev = port->dev;
+  log_debug (dev, "queue %u", rxq->queue_id);
+  if (port->rx_queue_ops.free)
+    port->rx_queue_ops.free (vm, rxq);
+
+  vnet_dev_rx_queue_free_counters (vm, rxq);
+  pool_put_index (port->rx_queues, rxq->index);
+  clib_mem_free (rxq);
+}
+
+vnet_dev_rv_t
+vnet_dev_rx_queue_alloc (vlib_main_t *vm, vnet_dev_port_t *port,
+                        u16 queue_size)
+{
+  vnet_dev_main_t *dm = &vnet_dev_main;
+  vnet_dev_rx_queue_t *rxq, **qp;
+  vnet_dev_t *dev = port->dev;
+  vnet_dev_rv_t rv = VNET_DEV_OK;
+  u16 n_threads = vlib_get_n_threads ();
+  u8 buffer_pool_index;
+
+  vnet_dev_port_validate (vm, port);
+
+  log_debug (dev, "port %u queue_size %u", port->port_id, queue_size);
+
+  if (pool_elts (port->rx_queues) == port->attr.max_rx_queues)
+    return VNET_DEV_ERR_NO_AVAIL_QUEUES;
+
+  rxq = vnet_dev_alloc_with_data (sizeof (vnet_dev_port_t),
+                                 port->rx_queue_config.data_size);
+  pool_get (port->rx_queues, qp);
+  qp[0] = rxq;
+  rxq->enabled = 1;
+  rxq->port = port;
+  rxq->size = queue_size;
+  rxq->index = qp - port->rx_queues;
+
+  /* default queue id - can be changed by driver */
+  rxq->queue_id = qp - port->rx_queues;
+  ASSERT (rxq->queue_id < port->attr.max_rx_queues);
+
+  if (n_threads > 1)
+    {
+      rxq->rx_thread_index = dm->next_rx_queue_thread++;
+      if (dm->next_rx_queue_thread >= n_threads)
+       dm->next_rx_queue_thread = 1;
+    }
+
+  buffer_pool_index =
+    vlib_buffer_pool_get_default_for_numa (vm, dev->numa_node);
+  vlib_buffer_pool_t *bp = vlib_get_buffer_pool (vm, buffer_pool_index);
+
+  rxq->buffer_template = bp->buffer_template;
+  vnet_buffer (&rxq->buffer_template)->sw_if_index[VLIB_TX] = ~0;
+
+  rxq->next_index = vnet_dev_default_next_index_by_port_type[port->attr.type];
+
+  if (port->rx_queue_ops.alloc)
+    rv = port->rx_queue_ops.alloc (vm, rxq);
+
+  if (rv != VNET_DEV_OK)
+    {
+      log_err (dev, "driver rejected rx queue add with rv %d", rv);
+      vnet_dev_rx_queue_free (vm, rxq);
+    }
+  else
+    log_debug (dev, "queue %u added, assigned to thread %u", rxq->queue_id,
+              rxq->rx_thread_index);
+
+  return rv;
+}
+
+vnet_dev_rv_t
+vnet_dev_rx_queue_start (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq)
+{
+  vnet_dev_rv_t rv = VNET_DEV_OK;
+  if (rxq->port->rx_queue_ops.start)
+    rv = rxq->port->rx_queue_ops.start (vm, rxq);
+
+  if (rv == VNET_DEV_OK)
+    rxq->started = 1;
+
+  return rv;
+}
+
+void
+vnet_dev_rx_queue_stop (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq)
+{
+  if (rxq->port->rx_queue_ops.stop)
+    rxq->port->rx_queue_ops.stop (vm, rxq);
+  vlib_node_set_state (vm, rxq->port->intf.rx_node_index,
+                      VLIB_NODE_STATE_DISABLED);
+  rxq->started = 0;
+}
+
+void
+vnet_dev_tx_queue_free (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
+{
+  vnet_dev_port_t *port = txq->port;
+  vnet_dev_t *dev = port->dev;
+
+  vnet_dev_port_validate (vm, port);
+
+  log_debug (dev, "queue %u", txq->queue_id);
+  if (port->tx_queue_ops.free)
+    port->tx_queue_ops.free (vm, txq);
+
+  clib_bitmap_free (txq->assigned_threads);
+  vnet_dev_tx_queue_free_counters (vm, txq);
+  pool_put_index (port->tx_queues, txq->index);
+  clib_mem_free (txq);
+}
+
+vnet_dev_rv_t
+vnet_dev_tx_queue_alloc (vlib_main_t *vm, vnet_dev_port_t *port,
+                        u16 queue_size)
+{
+  vnet_dev_tx_queue_t *txq, **qp;
+  vnet_dev_t *dev = port->dev;
+  vnet_dev_rv_t rv = VNET_DEV_OK;
+
+  log_debug (dev, "port %u size %u", port->port_id, queue_size);
+
+  if (pool_elts (port->tx_queues) == port->attr.max_tx_queues)
+    return VNET_DEV_ERR_NO_AVAIL_QUEUES;
+
+  txq = vnet_dev_alloc_with_data (sizeof (vnet_dev_port_t),
+                                 port->tx_queue_config.data_size);
+  pool_get (port->tx_queues, qp);
+  qp[0] = txq;
+  txq->enabled = 1;
+  txq->port = port;
+  txq->size = queue_size;
+  txq->index = qp - port->tx_queues;
+
+  /* default queue id - can be changed by driver */
+  txq->queue_id = qp - port->tx_queues;
+  ASSERT (txq->queue_id < port->attr.max_tx_queues);
+
+  if (port->tx_queue_ops.alloc)
+    rv = port->tx_queue_ops.alloc (vm, txq);
+
+  if (rv != VNET_DEV_OK)
+    {
+      log_err (dev, "driver rejected tx queue alloc with rv %d", rv);
+      vnet_dev_tx_queue_free (vm, txq);
+    }
+  else
+    log_debug (dev, "queue %u added", txq->queue_id);
+
+  return rv;
+}
+
+vnet_dev_rv_t
+vnet_dev_tx_queue_start (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
+{
+  vnet_dev_rv_t rv = VNET_DEV_OK;
+  if (txq->port->tx_queue_ops.start)
+    rv = txq->port->tx_queue_ops.start (vm, txq);
+
+  if (rv == VNET_DEV_OK)
+    txq->started = 1;
+
+  return rv;
+}
+
+void
+vnet_dev_tx_queue_stop (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
+{
+  if (txq->port->tx_queue_ops.stop)
+    txq->port->tx_queue_ops.stop (vm, txq);
+  txq->started = 0;
+}
+
+void
+vnet_dev_rx_queue_add_counters (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq,
+                               vnet_dev_counter_t *counters, u16 n_counters)
+{
+  rxq->counter_main = vnet_dev_counters_alloc (
+    vm, counters, n_counters, "%s port %u rx-queue %u counters",
+    rxq->port->dev->device_id, rxq->port->port_id, rxq->queue_id);
+}
+
+void
+vnet_dev_rx_queue_free_counters (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq)
+{
+  if (rxq->counter_main)
+    vnet_dev_counters_free (vm, rxq->counter_main);
+}
+
+void
+vnet_dev_tx_queue_add_counters (vlib_main_t *vm, vnet_dev_tx_queue_t *txq,
+                               vnet_dev_counter_t *counters, u16 n_counters)
+{
+  txq->counter_main = vnet_dev_counters_alloc (
+    vm, counters, n_counters, "%s port %u tx-queue %u counters",
+    txq->port->dev->device_id, txq->port->port_id, txq->queue_id);
+}
+
+void
+vnet_dev_tx_queue_free_counters (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
+{
+  if (!txq->counter_main)
+    return;
+
+  log_debug (txq->port->dev, "free");
+  vnet_dev_counters_free (vm, txq->counter_main);
+}
diff --git a/src/vnet/dev/runtime.c b/src/vnet/dev/runtime.c
new file mode 100644 (file)
index 0000000..e8f96c4
--- /dev/null
@@ -0,0 +1,174 @@
+
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include "vppinfra/bitmap.h"
+#include "vppinfra/lock.h"
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/log.h>
+
+VLIB_REGISTER_LOG_CLASS (dev_log, static) = {
+  .class_name = "dev",
+  .subclass_name = "runtime",
+};
+
+static vnet_dev_rt_op_t *rt_ops;
+
+static void
+_vnet_dev_rt_exec_op (vlib_main_t *vm, vnet_dev_rt_op_t *op)
+{
+  if (op->type == VNET_DEV_RT_OP_TYPE_RX_QUEUE)
+    {
+      vnet_dev_rx_node_runtime_t *rtd;
+      vnet_dev_rx_queue_t *rxq = op->rx_queue;
+      u32 i, node_index = rxq->port->intf.rx_node_index;
+
+      rtd = vlib_node_get_runtime_data (vm, node_index);
+
+      if (op->action == VNET_DEV_RT_OP_ACTION_START)
+       {
+         for (i = 0; i < rtd->n_rx_queues; i++)
+           ASSERT (rtd->rx_queues[i] != op->rx_queue);
+         rtd->rx_queues[rtd->n_rx_queues++] = op->rx_queue;
+       }
+
+      else if (op->action == VNET_DEV_RT_OP_ACTION_STOP)
+       {
+         for (i = 0; i < rtd->n_rx_queues; i++)
+           if (rtd->rx_queues[i] == op->rx_queue)
+             break;
+         ASSERT (i < rtd->n_rx_queues);
+         rtd->n_rx_queues--;
+         for (; i < rtd->n_rx_queues; i++)
+           rtd->rx_queues[i] = rtd->rx_queues[i + 1];
+       }
+
+      if (rtd->n_rx_queues == 1)
+       vlib_node_set_state (vm, node_index, VLIB_NODE_STATE_POLLING);
+      else if (rtd->n_rx_queues == 0)
+       vlib_node_set_state (vm, node_index, VLIB_NODE_STATE_DISABLED);
+
+      __atomic_store_n (&op->completed, 1, __ATOMIC_RELEASE);
+    }
+}
+
+static int
+_vnet_dev_rt_op_not_occured_before (vnet_dev_rt_op_t *first,
+                                   vnet_dev_rt_op_t *current)
+{
+  for (vnet_dev_rt_op_t *op = first; op < current; op++)
+    if (op->rx_queue == current->rx_queue && op->completed == 0)
+      return 0;
+  return 1;
+}
+
+static uword
+vnet_dev_rt_mgmt_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
+                         vlib_frame_t *frame)
+{
+  u16 thread_index = vm->thread_index;
+  vnet_dev_rt_op_t *ops = __atomic_load_n (&rt_ops, __ATOMIC_ACQUIRE);
+  vnet_dev_rt_op_t *op;
+  int come_back = 0;
+  uword rv = 0;
+
+  vec_foreach (op, ops)
+    if (op->thread_index == thread_index)
+      {
+       if (_vnet_dev_rt_op_not_occured_before (ops, op))
+         {
+           _vnet_dev_rt_exec_op (vm, op);
+           rv++;
+         }
+       else
+         come_back = 1;
+      }
+
+  if (come_back)
+    vlib_node_set_interrupt_pending (vm, node->node_index);
+
+  return rv;
+}
+
+VLIB_REGISTER_NODE (vnet_dev_rt_mgmt_node, static) = {
+  .function = vnet_dev_rt_mgmt_node_fn,
+  .name = "dev-rt-mgmt",
+  .type = VLIB_NODE_TYPE_PRE_INPUT,
+  .state = VLIB_NODE_STATE_INTERRUPT,
+};
+
+u8 *
+format_vnet_dev_mgmt_op (u8 *s, va_list *args)
+{
+  vnet_dev_rt_op_t *op = va_arg (*args, vnet_dev_rt_op_t *);
+
+  char *types[] = {
+    [VNET_DEV_RT_OP_TYPE_RX_QUEUE] = "rx queue",
+  };
+  char *actions[] = {
+    [VNET_DEV_RT_OP_ACTION_START] = "start",
+    [VNET_DEV_RT_OP_ACTION_STOP] = "stop",
+  };
+
+  return format (s, "port %u %s %u %s on thread %u",
+                op->rx_queue->port->port_id, types[op->type],
+                op->rx_queue->queue_id, actions[op->action],
+                op->thread_index);
+}
+
+vnet_dev_rv_t
+vnet_dev_rt_exec_ops (vlib_main_t *vm, vnet_dev_t *dev, vnet_dev_rt_op_t *ops,
+                     u32 n_ops)
+{
+  vnet_dev_rt_op_t *op = ops;
+  vnet_dev_rt_op_t *remote_ops = 0;
+  clib_bitmap_t *remote_bmp = 0;
+  u32 i;
+
+  ASSERT (rt_ops == 0);
+
+  for (op = ops; op < (ops + n_ops); op++)
+    {
+      vlib_main_t *tvm = vlib_get_main_by_index (op->thread_index);
+
+      if ((vlib_worker_thread_barrier_held ()) ||
+         (op->thread_index == vm->thread_index &&
+          _vnet_dev_rt_op_not_occured_before (ops, op)))
+       {
+         _vnet_dev_rt_exec_op (tvm, op);
+         log_debug (dev, "%U executed locally", format_vnet_dev_mgmt_op, op);
+         continue;
+       }
+
+      vec_add1 (remote_ops, *op);
+      log_debug (dev, "%U enqueued for remote execution",
+                format_vnet_dev_mgmt_op, op);
+      remote_bmp = clib_bitmap_set (remote_bmp, op->thread_index, 1);
+    }
+
+  if (remote_ops == 0)
+    return VNET_DEV_OK;
+
+  __atomic_store_n (&rt_ops, remote_ops, __ATOMIC_RELEASE);
+
+  clib_bitmap_foreach (i, remote_bmp)
+    {
+      vlib_node_set_interrupt_pending (vlib_get_main_by_index (i),
+                                      vnet_dev_rt_mgmt_node.index);
+      log_debug (dev, "interrupt sent to %s node on thread %u",
+                vnet_dev_rt_mgmt_node.name, i);
+    }
+
+  vec_foreach (op, remote_ops)
+    {
+      while (op->completed == 0)
+       CLIB_PAUSE ();
+    }
+
+  __atomic_store_n (&rt_ops, 0, __ATOMIC_RELAXED);
+  vec_free (remote_ops);
+  clib_bitmap_free (remote_bmp);
+  return VNET_DEV_OK;
+}
diff --git a/src/vnet/dev/types.h b/src/vnet/dev/types.h
new file mode 100644 (file)
index 0000000..1a82c97
--- /dev/null
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _VNET_DEV_TYPES_H_
+#define _VNET_DEV_TYPES_H_
+
+#include <vppinfra/types.h>
+#include <vnet/dev/errors.h>
+
+typedef char vnet_dev_device_id_t[32];
+typedef char vnet_dev_if_name_t[32];
+typedef char vnet_dev_driver_name_t[16];
+typedef char vnet_dev_bus_name_t[8];
+typedef u16 vnet_dev_port_id_t;
+typedef struct vnet_dev vnet_dev_t;
+typedef struct vnet_dev_port vnet_dev_port_t;
+typedef struct vnet_dev_rx_queue vnet_dev_rx_queue_t;
+typedef struct vnet_dev_tx_queue vnet_dev_tx_queue_t;
+
+typedef enum
+{
+  VNET_DEV_MINUS_OK = 0,
+#define _(n, d) VNET_DEV_ERR_MINUS_##n,
+  foreach_vnet_dev_rv_type
+#undef _
+} vnet_dev_minus_rv_t;
+
+typedef enum
+{
+  VNET_DEV_OK = 0,
+#define _(n, d) VNET_DEV_ERR_##n = -(VNET_DEV_ERR_MINUS_##n),
+  foreach_vnet_dev_rv_type
+#undef _
+} vnet_dev_rv_t;
+
+/* do not change bit assignments - API dependency */
+#define foreach_vnet_dev_flag _ (3, NO_STATS, "don't poll device stats")
+
+typedef union
+{
+  enum
+  {
+#define _(b, n, d) VNET_DEV_F_##n = 1ull << (b),
+    foreach_vnet_dev_flag
+#undef _
+  } e;
+  u64 n;
+} vnet_dev_flags_t;
+
+/* do not change bit assignments - API dependency */
+#define foreach_vnet_dev_port_flag                                            \
+  _ (3, INTERRUPT_MODE, "enable interrupt mode")
+
+typedef union
+{
+  enum
+  {
+#define _(b, n, d) VNET_DEV_PORT_F_##n = 1ull << (b),
+    foreach_vnet_dev_port_flag
+#undef _
+  } e;
+  u64 n;
+} vnet_dev_port_flags_t;
+
+#endif /* _VNET_DEV_TYPES_H_ */
index 51aba13..0ece84f 100644 (file)
@@ -146,6 +146,8 @@ p2p_ethernet_add_del (vlib_main_t * vm, u32 parent_if_index,
              vnet_feature_enable_disable ("device-input",
                                           "p2p-ethernet-input",
                                           parent_if_index, 1, 0, 0);
+             vnet_feature_enable_disable ("port-rx-eth", "p2p-ethernet-input",
+                                          parent_if_index, 1, 0, 0);
              /* Set promiscuous mode on the l2 interface */
              ethernet_set_flags (vnm, parent_if_index,
                                  ETHERNET_INTERFACE_FLAG_ACCEPT_ALL);
@@ -176,6 +178,9 @@ p2p_ethernet_add_del (vlib_main_t * vm, u32 parent_if_index,
                  vnet_feature_enable_disable ("device-input",
                                               "p2p-ethernet-input",
                                               parent_if_index, 0, 0, 0);
+                 vnet_feature_enable_disable ("port-rx-eth",
+                                              "p2p-ethernet-input",
+                                              parent_if_index, 0, 0, 0);
                  /* Disable promiscuous mode on the l2 interface */
                  ethernet_set_flags (vnm, parent_if_index, 0);
                }
index 5d4ef6f..61ca30f 100644 (file)
@@ -244,6 +244,8 @@ interface_handoff_enable_disable (vlib_main_t *vm, u32 sw_if_index,
 
   vnet_feature_enable_disable ("device-input", "worker-handoff",
                               sw_if_index, enable_disable, 0, 0);
+  vnet_feature_enable_disable ("port-rx-eth", "worker-handoff", sw_if_index,
+                              enable_disable, 0, 0);
   return rv;
 }
 
index 02d8099..511df49 100644 (file)
@@ -483,12 +483,14 @@ unformat_function_t unformat_vnet_sw_interface_flags;
 format_function_t format_vtr;
 
 /* Node runtime for interface output function. */
+struct vnet_dev_tx_queue;
 typedef struct
 {
   u32 hw_if_index;
   u32 sw_if_index;
   u32 dev_instance;
-  u32 is_deleted;
+  u8 is_deleted;
+  struct vnet_dev_tx_queue *tx_queue;
 } vnet_interface_output_runtime_t;
 
 /* Interface output function. */
index 6de4e50..5697fb6 100644 (file)
@@ -270,6 +270,8 @@ vnet_l2_patch_add_del (u32 rx_sw_if_index, u32 tx_sw_if_index, int is_add)
 
       vnet_feature_enable_disable ("device-input", "l2-patch",
                                   rxhi->sw_if_index, 1, 0, 0);
+      vnet_feature_enable_disable ("port-rx-eth", "l2-patch",
+                                  rxhi->sw_if_index, 1, 0, 0);
     }
   else
     {
@@ -278,6 +280,8 @@ vnet_l2_patch_add_del (u32 rx_sw_if_index, u32 tx_sw_if_index, int is_add)
 
       vnet_feature_enable_disable ("device-input", "l2-patch",
                                   rxhi->sw_if_index, 0, 0, 0);
+      vnet_feature_enable_disable ("port-rx-eth", "l2-patch",
+                                  rxhi->sw_if_index, 0, 0, 0);
       if (vec_len (l2pm->tx_next_by_rx_sw_if_index) > rx_sw_if_index)
        {
          l2pm->tx_next_by_rx_sw_if_index[rx_sw_if_index] = ~0;
index ec47920..85ee0c2 100644 (file)
@@ -87,6 +87,9 @@ span_add_delete_entry (vlib_main_t * vm,
       if (enable_rx || disable_rx)
        vnet_feature_enable_disable ("device-input", "span-input",
                                     src_sw_if_index, rx, 0, 0);
+      if (enable_rx || disable_rx)
+       vnet_feature_enable_disable ("port-rx-eth", "span-input",
+                                    src_sw_if_index, rx, 0, 0);
       if (enable_tx || disable_tx)
        vnet_feature_enable_disable ("interface-output", "span-output",
                                     src_sw_if_index, tx, 0, 0);
index 4221796..ad85af3 100644 (file)
@@ -131,6 +131,9 @@ typedef u32 clib_address_t;
 #define CLIB_U32_MAX __UINT32_MAX__
 #define CLIB_U64_MAX __UINT64_MAX__
 
+#define CLIB_F64_MAX __DBL_MAX__
+#define CLIB_F32_MAX __FLT_MAX__
+
 #if clib_address_bits == 64
 #define CLIB_WORD_MAX  CLIB_I64_MAX
 #define CLIB_UWORD_MAX CLIB_U64_MAX
@@ -197,11 +200,17 @@ typedef word wordu __attribute__ ((aligned (1), __may_alias__));
 typedef uword uwordu __attribute__ ((aligned (1), __may_alias__));
 
 #define foreach_int(__var, ...)                                               \
-  for (int __int_array[] = { __VA_ARGS__ }, *__int_ptr = __int_array,         \
+  for (int __int_array[] = { __VA_ARGS__, 0 }, *__int_ptr = __int_array,      \
           __var = *__int_ptr;                                                \
-       __int_ptr - ARRAY_LEN (__int_array) < __int_array;                     \
+       __int_ptr - (ARRAY_LEN (__int_array) - 1) < __int_array;               \
        __var = *++__int_ptr)
 
+#define foreach_pointer(__var, ...)                                           \
+  for (void *__ptr_array[] = { __VA_ARGS__, 0 }, **__ptr_ptr = __ptr_array,   \
+           *__var = *__ptr_ptr;                                              \
+       __ptr_ptr - (ARRAY_LEN (__ptr_array) - 1) < __ptr_array;               \
+       __var = *++__ptr_ptr)
+
 #endif /* included_clib_types_h */
 
 /*
index 5c827d4..5ea7a80 100644 (file)
@@ -239,8 +239,9 @@ _vec_set_len (void *v, uword len, uword elt_sz)
     for ((var) = vec_len ((v)) - 1; (var) >= 0; (var)--)
 
 #define vec_foreach_pointer(e, v)                                             \
-  for (typeof (**v) **__ep = (v), *(e) = *__ep; __ep - (v) < vec_len (v);     \
-       __ep++, (e) = *__ep)
+  if (v)                                                                      \
+    for (typeof (**v) **__ep = (v), *(e) = *__ep; __ep - (v) < vec_len (v);   \
+        __ep++, (e) = *__ep)
 
 #endif /* included_clib_vec_bootstrap_h */