ige: native driver for Intel Gigabit Adapters (i211, i225, i226) 81/41081/16
authorDamjan Marion <[email protected]>
Thu, 2 Oct 2025 12:01:39 +0000 (14:01 +0200)
committerMohammed HAWARI <[email protected]>
Fri, 3 Oct 2025 12:30:52 +0000 (12:30 +0000)
Type: feature
Change-Id: I79bd1111fdfc777843de917ed061c8e818e20d2e
Signed-off-by: Damjan Marion <[email protected]>
14 files changed:
MAINTAINERS
docs/spelling_wordlist.txt
src/plugins/dev_ige/CMakeLists.txt [new file with mode: 0644]
src/plugins/dev_ige/counters.c [new file with mode: 0644]
src/plugins/dev_ige/format.c [new file with mode: 0644]
src/plugins/dev_ige/ige.c [new file with mode: 0644]
src/plugins/dev_ige/ige.h [new file with mode: 0644]
src/plugins/dev_ige/ige_regs.h [new file with mode: 0644]
src/plugins/dev_ige/phy.c [new file with mode: 0644]
src/plugins/dev_ige/port.c [new file with mode: 0644]
src/plugins/dev_ige/queue.c [new file with mode: 0644]
src/plugins/dev_ige/reg.c [new file with mode: 0644]
src/plugins/dev_ige/rx_node.c [new file with mode: 0644]
src/plugins/dev_ige/tx_node.c [new file with mode: 0644]

index b00762b..e4c766a 100644 (file)
@@ -414,6 +414,11 @@ I: iavf
 M:     Damjan Marion <[email protected]>
 F:     src/plugins/dev_iavf/
 
+Plugin - IGE Device driver
+I:     ige
+M:     Damjan Marion <[email protected]>
+F:     src/plugins/dev_ige/
+
 Plugin - Amazon Elastic Network Adapter (ENA) device driver
 I:     ena
 M:     Damjan Marion <[email protected]>
index cd9d750..4e22435 100644 (file)
@@ -251,6 +251,7 @@ det
 dev
 devbind
 dev_iavf
+dev_ige
 dev_octeon
 df
 dhcp
@@ -486,6 +487,7 @@ ietf
 iface
 ifndef
 igb
+ige
 igmp
 ikev
 Ikev
diff --git a/src/plugins/dev_ige/CMakeLists.txt b/src/plugins/dev_ige/CMakeLists.txt
new file mode 100644 (file)
index 0000000..fc9d033
--- /dev/null
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: Apache-2.0
+# Copyright(c) 2024 Cisco Systems, Inc.
+
+add_vpp_plugin(dev_ige
+  SOURCES
+  counters.c
+  ige.c
+  format.c
+  phy.c
+  port.c
+  queue.c
+  reg.c
+  rx_node.c
+  tx_node.c
+
+  MULTIARCH_SOURCES
+  rx_node.c
+  tx_node.c
+)
+
diff --git a/src/plugins/dev_ige/counters.c b/src/plugins/dev_ige/counters.c
new file mode 100644 (file)
index 0000000..5510b38
--- /dev/null
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2025 Damjan Marion
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/counters.h>
+#include <dev_ige/ige.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+
+#define _(hi, lo) ((u64) hi << 32 | lo)
+static vnet_dev_counter_t ige_port_counters[] = {
+  VNET_DEV_CTR_RX_BYTES (_ (0x40c4, 0x40c0)),
+  VNET_DEV_CTR_TX_BYTES (_ (0x40cc, 0x40c8)),
+  VNET_DEV_CTR_RX_PACKETS (0x40d0),
+  VNET_DEV_CTR_TX_PACKETS (0x40d4),
+  VNET_DEV_CTR_VENDOR (_ (0x408c, 0x4088), RX, BYTES, "good"),
+  VNET_DEV_CTR_VENDOR (_ (0x4094, 0x4090), TX, BYTES, "good"),
+  VNET_DEV_CTR_VENDOR (_ (0x412c, 0x4128), RX, BYTES, "host good"),
+  VNET_DEV_CTR_VENDOR (_ (0x4134, 0x4130), TX, BYTES, "host good"),
+  VNET_DEV_CTR_VENDOR (0x4104, RX, PACKETS, "host"),
+  VNET_DEV_CTR_VENDOR (0x4000, RX, PACKETS, "CRC error"),
+  VNET_DEV_CTR_VENDOR (0x4010, RX, PACKETS, "missed"),
+  VNET_DEV_CTR_VENDOR (0x405c, RX, PACKETS, "64 bytes"),
+  VNET_DEV_CTR_VENDOR (0x4060, RX, PACKETS, "65-127 byte"),
+  VNET_DEV_CTR_VENDOR (0x4064, RX, PACKETS, "128-255 byte"),
+  VNET_DEV_CTR_VENDOR (0x4068, RX, PACKETS, "256-511 byte"),
+  VNET_DEV_CTR_VENDOR (0x406c, RX, PACKETS, "512-1023 byte"),
+  VNET_DEV_CTR_VENDOR (0x4070, RX, PACKETS, ">=1024 byte"),
+  VNET_DEV_CTR_VENDOR (0x4074, RX, PACKETS, "good"),
+  VNET_DEV_CTR_VENDOR (0x4078, RX, PACKETS, "broadcast"),
+  VNET_DEV_CTR_VENDOR (0x407c, RX, PACKETS, "multicast"),
+  VNET_DEV_CTR_VENDOR (0x40d8, TX, PACKETS, "64 bytes"),
+  VNET_DEV_CTR_VENDOR (0x40dc, TX, PACKETS, "65-127 byte"),
+  VNET_DEV_CTR_VENDOR (0x40e0, TX, PACKETS, "128-255 byte"),
+  VNET_DEV_CTR_VENDOR (0x40e4, TX, PACKETS, "256-511 byte"),
+  VNET_DEV_CTR_VENDOR (0x40e8, TX, PACKETS, "512-1023 byte"),
+  VNET_DEV_CTR_VENDOR (0x40ec, TX, PACKETS, ">=1024 byte"),
+  VNET_DEV_CTR_VENDOR (0x40f0, TX, PACKETS, "multicast"),
+  VNET_DEV_CTR_VENDOR (0x40f4, TX, PACKETS, "broadcast"),
+  VNET_DEV_CTR_VENDOR (0x4108, NA, NA, "debug counter 1"),
+  VNET_DEV_CTR_VENDOR (0x410c, NA, NA, "debug counter 2"),
+  VNET_DEV_CTR_VENDOR (0x4110, NA, NA, "debug counter 3"),
+  VNET_DEV_CTR_VENDOR (0x411c, NA, NA, "debug counter 4"),
+};
+
+vnet_dev_counter_t ige_rxq_counters[] = {
+  VNET_DEV_CTR_RX_PACKETS (_ (0x100, 0x10010)),
+  VNET_DEV_CTR_RX_BYTES (_ (0x100, 0x10018)),
+  VNET_DEV_CTR_RX_DROPS (_ (0x40, 0xc030)),
+  VNET_DEV_CTR_VENDOR (_ (0x100, 0x10038), RX, PACKETS, "multicast"),
+};
+
+vnet_dev_counter_t ige_txq_counters[] = {
+  VNET_DEV_CTR_TX_PACKETS (_ (0x100, 0x10014)),
+  VNET_DEV_CTR_TX_BYTES (_ (0x100, 0x10034)),
+  VNET_DEV_CTR_TX_DROPS (_ (0x40, 0xe030)),
+};
+#undef _
+
+vnet_dev_rv_t
+ige_port_counters_init (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+  vnet_dev_port_add_counters (vm, port, ige_port_counters,
+                             ARRAY_LEN (ige_port_counters));
+  foreach_vnet_dev_port_rx_queue (rxq, port)
+    vnet_dev_rx_queue_add_counters (vm, rxq, ige_rxq_counters,
+                                   ARRAY_LEN (ige_rxq_counters));
+  foreach_vnet_dev_port_tx_queue (txq, port)
+    vnet_dev_tx_queue_add_counters (vm, txq, ige_txq_counters,
+                                   ARRAY_LEN (ige_txq_counters));
+  return 0;
+}
+
+void
+ige_port_counter_poll (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+  vnet_dev_t *dev = port->dev;
+  u32 val;
+
+  foreach_vnet_dev_counter (c, port->counter_main)
+    {
+      u64 reg = c->user_data;
+      u32 hi = 0, lo;
+      ige_reg_rd (dev, (u32) reg, &lo);
+      reg >>= 32;
+      if (reg)
+       ige_reg_rd (dev, (u32) reg, &hi);
+
+      vnet_dev_counter_value_add (vm, c, (u64) hi << 32 | lo);
+    }
+
+  foreach_vnet_dev_port_rx_queue (rxq, port)
+    if (rxq->started)
+      foreach_vnet_dev_counter (c, rxq->counter_main)
+       {
+         u32 reg = (u32) c->user_data + (c->user_data >> 32) * rxq->queue_id;
+         ige_reg_rd (dev, reg, &val);
+         vnet_dev_counter_value_update (vm, c, val);
+       }
+
+  foreach_vnet_dev_port_tx_queue (txq, port)
+    if (txq->started)
+      foreach_vnet_dev_counter (c, txq->counter_main)
+       {
+         u32 reg = (u32) c->user_data + (c->user_data >> 32) * txq->queue_id;
+         ige_reg_rd (dev, reg, &val);
+         vnet_dev_counter_value_update (vm, c, val);
+       }
+}
diff --git a/src/plugins/dev_ige/format.c b/src/plugins/dev_ige/format.c
new file mode 100644 (file)
index 0000000..07913cc
--- /dev/null
@@ -0,0 +1,259 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2025 Damjan Marion
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/dev/dev.h>
+#include <dev_ige/ige.h>
+#include <dev_ige/ige_regs.h>
+
+static u8 *
+_format_ige_reg (u8 *s, u32 offset, u32 val, int no_zero, u32 mask)
+{
+  u32 indent = format_get_indent (s);
+  u32 rv = 0, f, v;
+  u8 *s2 = 0;
+  int line = 0;
+
+#define _(o, rn, m)                                                           \
+  if (offset == o)                                                            \
+    {                                                                         \
+      if (line++)                                                             \
+       s = format (s, "\n%U", format_white_space, indent);                   \
+      vec_reset_length (s2);                                                  \
+      s2 = format (s2, "[0x%05x] %s:", o, #rn);                               \
+      rv = val;                                                               \
+      s = format (s, "%-32v = 0x%08x", s2, rv);                               \
+      f = 0;                                                                  \
+      m                                                                       \
+    }
+
+#define __(l, fn)                                                             \
+  v = (rv >> f) & pow2_mask (l);                                              \
+  if ((pow2_mask (l) << f) & mask)                                            \
+    if (v || (!no_zero && #fn[0] != '_'))                                     \
+      {                                                                       \
+       vec_reset_length (s2);                                                \
+       s = format (s, "\n%U", format_white_space, indent + 2);               \
+       s2 = format (s2, "[%2u:%2u] %s", f + l - 1, f, #fn);                  \
+       s = format (s, "%-30v = ", s2);                                       \
+       if (l < 3)                                                            \
+         s = format (s, "%u", v);                                            \
+       else if (l <= 8)                                                      \
+         s = format (s, "0x%02x (%u)", v, v);                                \
+       else if (l <= 16)                                                     \
+         s = format (s, "0x%04x", v);                                        \
+       else                                                                  \
+         s = format (s, "0x%08x", v);                                        \
+      }                                                                       \
+  f += l;
+
+  foreach_ige_reg;
+#undef _
+
+  vec_free (s2);
+
+  return s;
+}
+
+u8 *
+format_ige_reg_read (u8 *s, va_list *args)
+{
+  u32 offset = va_arg (*args, u32);
+  u32 val = va_arg (*args, u32);
+  return _format_ige_reg (s, offset, val, 0, 0xffffffff);
+}
+
+u8 *
+format_ige_reg_write (u8 *s, va_list *args)
+{
+  u32 offset = va_arg (*args, u32);
+  u32 val = va_arg (*args, u32);
+  return _format_ige_reg (s, offset, val, 1, 0xffffffff);
+}
+
+u8 *
+format_ige_reg_diff (u8 *s, va_list *args)
+{
+  u32 offset = va_arg (*args, u32);
+  u32 old = va_arg (*args, u32);
+  u32 new = va_arg (*args, u32);
+  return _format_ige_reg (s, offset, new, 0, old ^ new);
+}
+
+static u8 *
+format_ige_rss_type (u8 *s, va_list *args)
+{
+  static const char *rss_type_names[] = {
+    [0x0] = "none",
+    [0x1] = "HASH_TCP_IPV4",
+    [0x2] = "HASH_IPV4",
+    [0x3] = "HASH_TCP_IPV6",
+    [0x4] = "HASH_IPV6_EX",
+    [0x5] = "HASH_IPV6",
+    [0x6] = "HASH_TCP_IPV6_EX",
+    [0x7] = "HASH_UDP_IPV4",
+    [0x8] = "HASH_UDP_IPV6",
+    [0x9] = "HASH_UDP_IPV6_EX",
+  };
+
+  u32 rss_type = va_arg (*args, u32);
+
+  if (rss_type < ARRAY_LEN (rss_type_names) && rss_type_names[rss_type])
+    return format (s, "%s", rss_type_names[rss_type]);
+
+  return format (s, "0x%x", rss_type);
+}
+
+u8 *
+format_ige_port_status (u8 *s, va_list *args)
+{
+  vnet_dev_format_args_t __clib_unused *a =
+    va_arg (*args, vnet_dev_format_args_t *);
+  vnet_dev_port_t *port = va_arg (*args, vnet_dev_port_t *);
+  ige_port_t *ip = vnet_dev_get_port_data (port);
+  ige_device_t *id = vnet_dev_get_data (port->dev);
+  u32 speed = 0;
+  if (id->config.supports_2_5g && ip->last_status.speed_2p5)
+    speed = 2500;
+  else if (ip->last_status.speed < 3)
+    speed = (u32[]){ 10, 100, 1000 }[ip->last_status.speed];
+
+  if (ip->last_status.link_up)
+    s = format (s, "Link up, speed %u Mbps, duplex %s", speed,
+               ip->last_status.full_duplex ? "full" : "half");
+  else
+    s = format (s, "Link down");
+  return s;
+}
+
+u8 *
+format_ige_rx_desc (u8 *s, va_list *args)
+{
+  const ige_rx_desc_t *d = va_arg (*args, const ige_rx_desc_t *);
+  u32 indent = format_get_indent (s) + 2;
+  u32 hdr_len = (d->hdr_len_hi << 10) | d->hdr_len_lo;
+
+#define _(b) ((b) ? '+' : '-')
+
+  s = format (
+    s, "pkt_len %u vlan 0x%u hdr_len %u sph%c rss_type %U rss_hash 0x%08x",
+    d->pkt_len, d->vlan_tag, hdr_len, _ (d->sph), format_ige_rss_type,
+    d->rss_type, d->rss_hash);
+  s = format (s,
+             "\n%Upacket_type: ip4%c ip4e%c ip6%c ip6e%c tcp%c udp%c sctp%c "
+             "nfs%c etqf %u l2pkt%c vpkt%c",
+             format_white_space, indent, _ (d->ipv4), _ (d->ipv4e),
+             _ (d->ipv6), _ (d->ipv6e), _ (d->tcp), _ (d->udp), _ (d->sctp),
+             _ (d->nfs), d->etqf, _ (d->l2pkt), _ (d->vpkt));
+
+  s = format (s, "\n%Uext_status: dd%c eop%c", format_white_space, indent,
+             _ (d->dd), _ (d->eop));
+
+  if (d->eop)
+    {
+      s = format (s, " vp%c udpcs%c l4i%c ipcs%c pif%c", _ (d->vp),
+                 _ (d->udpcs), _ (d->l4i), _ (d->ipcs), _ (d->pif));
+      s = format (s,
+                 " vext%c udpv%c llint%c strip_crc%c smd_type %u tsip%c mc%c",
+                 _ (d->vext), _ (d->udpv), _ (d->llint), _ (d->strip_crc),
+                 (u32) d->smd_type, _ (d->tsip), _ (d->mc));
+    }
+
+  s = format (s, "\n%Uext_error: l4e%c ipe%c rxe%c", format_white_space,
+             indent, _ (d->l4e), _ (d->ipe), _ (d->rxe));
+  if (d->sph)
+    s = format (s, " hbo%c", _ (d->hbo));
+
+#undef _
+
+  return s;
+}
+
+u8 *
+format_ige_rx_trace (u8 *s, va_list *args)
+{
+  vlib_main_t *vm = va_arg (*args, vlib_main_t *);
+  vlib_node_t *node = va_arg (*args, vlib_node_t *);
+  ige_rx_trace_t *t = va_arg (*args, ige_rx_trace_t *);
+  vnet_main_t *vnm = vnet_get_main ();
+  vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, t->hw_if_index);
+  u32 indent = format_get_indent (s);
+
+  s = format (s, "ige: %v (%u) qid %u next-node %U buffer %u", hi->name,
+             t->hw_if_index, t->queue_id, format_vlib_next_node_name, vm,
+             node->index, t->next_index, t->buffer_index);
+
+  s = format (s, "\n%Udesc: %U", format_white_space, indent + 2,
+             format_ige_rx_desc, &t->desc);
+
+  return s;
+}
+
+u8 *
+format_ige_tx_desc (u8 *s, va_list *args)
+{
+  const ige_tx_desc_t *d = va_arg (*args, const ige_tx_desc_t *);
+  u32 indent = format_get_indent (s) + 2;
+
+#define _(b) ((b) ? '+' : '-')
+
+  s = format (
+    s,
+    "addr 0x%016llx dtalen %u paylen %u dtyp 0x%x ptp1 %u ptp2 %u popts 0x%x",
+    d->addr, d->dtalen, d->paylen, d->dtyp, d->ptp1, d->ptp2, d->popts);
+
+  s = format (s, "\n%Uflags: eop%c ifcs%c rs%c dext%c vle%c tse%c idx%c",
+             format_white_space, indent, _ (d->eop), _ (d->ifcs), _ (d->rs),
+             _ (d->dext), _ (d->vle), _ (d->tse), _ (d->idx));
+
+  s = format (s, "\n%Ustatus: dd%c ts_stat%c sta 0x%x", format_white_space,
+             indent, _ (d->dd), _ (d->ts_stat), d->sta);
+
+#undef _
+
+  return s;
+}
+
+u8 *
+format_ige_tx_trace (u8 *s, va_list *args)
+{
+  vlib_main_t __clib_unused *vm = va_arg (*args, vlib_main_t *);
+  vlib_node_t __clib_unused *node = va_arg (*args, vlib_node_t *);
+  ige_tx_trace_t *t = va_arg (*args, ige_tx_trace_t *);
+  vnet_main_t *vnm = vnet_get_main ();
+  vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, t->hw_if_index);
+  u32 indent = format_get_indent (s);
+
+  s = format (s, "ige-tx: %v (%u) qid %u buffer %u", hi->name, t->hw_if_index,
+             t->queue_id, t->buffer_index);
+
+  s = format (s, "\n%Udesc: %U", format_white_space, indent + 2,
+             format_ige_tx_desc, &t->desc);
+
+  return s;
+}
+u8 *
+format_ige_receive_addr_table (u8 *s, va_list *args)
+{
+  vnet_dev_t *dev = va_arg (*args, vnet_dev_t *);
+  u32 indent = format_get_indent (s);
+
+  for (int i = 0; i < 16; i++)
+    {
+      ige_receive_addr_t ra;
+      ige_reg_rd (dev, IGE_REG_RAH (i), &ra.rah);
+      ige_reg_rd (dev, IGE_REG_RAL (i), &ra.ral);
+      if (ra.av)
+       {
+         if (i)
+           s = format (s, "\n%U", format_white_space, indent);
+         s = format (s, "[%u] %U asel %u qsel %u qsel_enable %u av %u", i,
+                     format_ethernet_address, ra.hw_addr, ra.asel, ra.qsel,
+                     ra.qsel_enable, ra.av);
+       }
+    }
+
+  return s;
+}
diff --git a/src/plugins/dev_ige/ige.c b/src/plugins/dev_ige/ige.c
new file mode 100644 (file)
index 0000000..af9cb35
--- /dev/null
@@ -0,0 +1,230 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2025 Damjan Marion
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/counters.h>
+#include <vnet/dev/bus/pci.h>
+#include <dev_ige/ige.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+
+VLIB_REGISTER_LOG_CLASS (ige_log, static) = {
+  .class_name = "ige",
+  .subclass_name = "init",
+};
+
+#define _(f, n, s, d)                                                         \
+  { .name = #n, .desc = d, .severity = VL_COUNTER_SEVERITY_##s },
+
+vlib_error_desc_t ige_tx_node_counters[] = { foreach_ige_tx_node_counter };
+#undef _
+
+vnet_dev_node_t ige_rx_node = {
+  .format_trace = format_ige_rx_trace,
+};
+
+vnet_dev_node_t ige_tx_node = {
+  .error_counters = ige_tx_node_counters,
+  .n_error_counters = ARRAY_LEN (ige_tx_node_counters),
+  .format_trace = format_ige_tx_trace,
+};
+
+static ige_dev_config_t config_by_type[] = {
+  [IGE_DEV_TYPE_I211] = { .phy_type = IGE_PHY_TYPE_I210_INTERNAL },
+  [IGE_DEV_TYPE_I225] = { .phy_type = IGE_PHY_TYPE_GPY211,
+                         .supports_2_5g = 1 },
+  [IGE_DEV_TYPE_I226] = { .phy_type = IGE_PHY_TYPE_GPY211,
+                         .supports_2_5g = 1 },
+};
+
+static struct
+{
+  u16 device_id;
+  ige_dev_type_t type;
+  char *description;
+} ige_dev_types[] = {
+
+#define _(id, t, desc)                                                        \
+  {                                                                           \
+    .device_id = (id), .type = IGE_DEV_TYPE_##t, .description = (desc)        \
+  }
+
+  _ (0x1539, I211, "Intel(R) Ethernet Controller I211"),
+  _ (0x15f2, I225, "Intel(R) Ethernet Controller I225-LM"),
+  _ (0x15f3, I225, "Intel(R) Ethernet Controller I225-V"),
+  _ (0x0d9f, I225, "Intel(R) Ethernet Controller I225-IT"),
+  _ (0x125b, I226, "Intel(R) Ethernet Controller I226-LM"),
+  _ (0x125c, I226, "Intel(R) Ethernet Controller I226-V"),
+  _ (0x125d, I226, "Intel(R) Ethernet Controller I226-IT"),
+#undef _
+};
+
+static u8 *
+ige_probe (vlib_main_t *vm, vnet_dev_bus_index_t bus_index, void *dev_info)
+{
+  vnet_dev_bus_pci_device_info_t *di = dev_info;
+
+  if (di->vendor_id != 0x8086)
+    return 0;
+
+  FOREACH_ARRAY_ELT (dt, ige_dev_types)
+    {
+      if (dt->device_id == di->device_id)
+       return format (0, "%s", dt->description);
+    }
+
+  return 0;
+}
+
+static vnet_dev_rv_t
+ige_init (vlib_main_t *vm, vnet_dev_t *dev)
+{
+  ige_device_t *id = vnet_dev_get_data (dev);
+  vlib_pci_config_hdr_t pci_hdr;
+  vnet_dev_rv_t rv;
+  u32 match, mask, tmp;
+
+  rv = vnet_dev_pci_read_config_header (vm, dev, &pci_hdr);
+  if (rv != VNET_DEV_OK)
+    return rv;
+
+  if (pci_hdr.vendor_id != 0x8086)
+    return VNET_DEV_ERR_UNSUPPORTED_DEVICE;
+
+  rv = VNET_DEV_ERR_UNSUPPORTED_DEVICE;
+
+  FOREACH_ARRAY_ELT (dt, ige_dev_types)
+    if (dt->device_id == pci_hdr.device_id)
+      {
+       id->config = config_by_type[dt->type];
+       rv = VNET_DEV_OK;
+       break;
+      }
+
+  if (rv != VNET_DEV_OK)
+    return rv;
+
+  /* map BAR0 */
+  if (id->bar0 == 0)
+    {
+      rv = vnet_dev_pci_map_region (vm, dev, 0, &id->bar0);
+      if (rv != VNET_DEV_OK)
+       return rv;
+    }
+
+  /* disable interrupts */
+  ige_reg_wr (dev, IGE_REG_IMC, 0xffffffff);
+  ige_reg_rd (dev, IGE_REG_ICR, &tmp);
+
+  rv = vnet_dev_pci_function_level_reset (vm, dev);
+  if (rv != VNET_DEV_OK)
+    return rv;
+
+  rv = vnet_dev_pci_bus_master_enable (vm, dev);
+  if (rv != VNET_DEV_OK)
+    return rv;
+
+  mask = (ige_reg_status_t){ .rst_done = 1 }.as_u32;
+  match = mask;
+
+  if (ige_reg_poll (vm, dev, IGE_REG_STATUS, mask, match, 1e-5, 1e-1) == 0)
+    {
+      log_err (dev, "reset timeout");
+      return VNET_DEV_ERR_TIMEOUT;
+    }
+
+  /* disable interrupts again */
+  ige_reg_wr (dev, IGE_REG_IMC, 0xffffffff);
+  ige_reg_rd (dev, IGE_REG_ICR, &tmp);
+
+  /* notify ME that driver is loaded */
+  ige_reg_ctrl_ext_t ctrl_ext;
+  ige_reg_rd (dev, IGE_REG_CTRL_EXT, &ctrl_ext.as_u32);
+  ctrl_ext.driver_loaded = 1;
+  ige_reg_wr (dev, IGE_REG_CTRL_EXT, ctrl_ext.as_u32);
+
+  rv = ige_phy_init (vm, dev);
+
+  if (rv != VNET_DEV_OK)
+    {
+      log_err (dev, "failed to read PHY ID");
+      return rv;
+    }
+
+  vnet_dev_port_add_args_t port = {
+    .port = {
+      .attr = {
+        .type = VNET_DEV_PORT_TYPE_ETHERNET,
+        .max_rx_queues = 4,
+        .max_tx_queues = 4,
+        .max_supported_rx_frame_size = 9728,
+      },
+      .ops = {
+        .init = ige_port_init,
+        .start = ige_port_start,
+        .stop = ige_port_stop,
+        .format_status = format_ige_port_status,
+        .config_change = ige_port_cfg_change,
+        .config_change_validate = ige_port_cfg_change_validate,
+      },
+      .data_size = sizeof (ige_port_t),
+    },
+    .rx_node = &ige_rx_node,
+    .tx_node = &ige_tx_node,
+    .rx_queue = {
+      .config = {
+        .data_size = sizeof (ige_rxq_t),
+        .default_size = 512,
+        .size_is_power_of_two = 1,
+        .min_size = 512,
+        .max_size = 32768,
+      },
+      .ops = {
+        .alloc = ige_rx_queue_alloc,
+        .free = ige_rx_queue_free,
+      },
+    },
+    .tx_queue = {
+      .config = {
+        .data_size = sizeof (ige_txq_t),
+        .default_size = 512,
+        .size_is_power_of_two = 1,
+        .min_size = 512,
+        .max_size = 32768,
+      },
+      .ops = {
+        .alloc = ige_tx_queue_alloc,
+        .free = ige_tx_queue_free,
+      },
+    },
+  };
+
+  ige_reg_rd (dev, IGE_REG_RAL0, &tmp);
+  clib_memcpy (&port.port.attr.hw_addr.eth_mac[0], &tmp, 4);
+  ige_reg_rd (dev, IGE_REG_RAH0, &tmp);
+  clib_memcpy (&port.port.attr.hw_addr.eth_mac[4], &tmp, 2);
+  log_info (dev, "MAC address is %U", format_ethernet_address,
+           port.port.attr.hw_addr.eth_mac);
+
+  id->avail_rxq_bmp = pow2_mask (4);
+  id->avail_txq_bmp = pow2_mask (4);
+  return vnet_dev_port_add (vm, dev, 0, &port);
+}
+
+VNET_DEV_REGISTER_DRIVER (ige) = {
+  .name = "ige",
+  .bus = "pci",
+  .device_data_sz = sizeof (ige_device_t),
+  .ops = {
+    .init = ige_init,
+    .probe = ige_probe,
+  },
+};
+
+VLIB_PLUGIN_REGISTER () = {
+  .version = VPP_BUILD_VER,
+  .description = "dev_ige",
+};
diff --git a/src/plugins/dev_ige/ige.h b/src/plugins/dev_ige/ige.h
new file mode 100644 (file)
index 0000000..444fbbc
--- /dev/null
@@ -0,0 +1,301 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2025 Damjan Marion
+ */
+
+#pragma once
+
+#include <vppinfra/clib.h>
+#include <vppinfra/error_bootstrap.h>
+#include <vppinfra/format.h>
+#include <vnet/vnet.h>
+#include <vnet/dev/types.h>
+
+#include <dev_ige/ige_regs.h>
+
+typedef union
+{
+  struct
+  {
+    u64 pkt_addr;
+    u64 hdr_addr;
+  };
+  struct
+  {
+    u64 rss_type : 4;
+
+    /* packet type */
+    u64 ipv4 : 1;
+    u64 ipv4e : 1;
+    u64 ipv6 : 1;
+    u64 ipv6e : 1;
+    u64 tcp : 1;
+    u64 udp : 1;
+    u64 sctp : 1;
+    u64 nfs : 1;
+    u64 etqf : 3;
+    u64 l2pkt : 1;
+    u64 vpkt : 1;
+
+    u64 _reserved_17 : 2;
+    u64 hdr_len_hi : 2;
+    u64 hdr_len_lo : 10;
+    u64 sph : 1;
+    u64 rss_hash : 32;
+
+    /* ext status */
+    u64 dd : 1;
+    u64 eop : 1;
+    u64 _rsv1 : 1;
+    u64 vp : 1;
+    u64 udpcs : 1;
+    u64 l4i : 1;
+    u64 ipcs : 1;
+    u64 pif : 1;
+    u64 _rsv2 : 1;
+    u64 vext : 1;
+    u64 udpv : 1;
+    u64 llint : 1;
+    u64 strip_crc : 1;
+    u64 smd_type : 2;
+    u64 tsip : 1;
+    u64 _rsv3 : 3;
+    u64 mc : 1;
+
+    /* ext error */
+    u64 _rsv4 : 3;
+    u64 hbo : 1;
+    u64 _rsv5 : 5;
+    u64 l4e : 1;
+    u64 ipe : 1;
+    u64 rxe : 1;
+
+    u64 pkt_len : 16;
+    u64 vlan_tag : 16;
+  };
+} ige_rx_desc_t;
+
+STATIC_ASSERT_SIZEOF (ige_rx_desc_t, 16);
+
+typedef union
+{
+  u64 qwords[2];
+  struct
+  {
+    u64 addr;
+    u64 dtalen : 16;
+    u64 ptp1 : 4;
+    u64 dtyp : 4;
+
+    u64 eop : 1;
+    u64 ifcs : 1;
+    u64 _reserved_26 : 1;
+    u64 rs : 1;
+    u64 _reserved_28 : 1;
+    u64 dext : 1;
+    u64 vle : 1;
+    u64 tse : 1;
+
+    /* status */
+    u64 dd : 1;
+    u64 ts_stat : 1;
+    u64 _reserved_35_36 : 2;
+
+    u64 idx : 1;
+    u64 ptp2 : 3;
+    u64 popts : 6;
+    u64 paylen : 18;
+  };
+
+  /* writeback */
+  struct
+  {
+    u64 dma_timestamp;
+    u64 _reserved_64_95 : 32;
+    u64 sta : 4;
+    u64 _reserved_100_127 : 28;
+  };
+} ige_tx_desc_t;
+
+STATIC_ASSERT_SIZEOF (ige_tx_desc_t, 16);
+
+typedef enum
+{
+  IGE_PHY_TYPE_UNKNOWN = 0,
+  IGE_PHY_TYPE_I210_INTERNAL,
+  IGE_PHY_TYPE_GPY211,
+} __clib_packed ige_phy_type_t;
+
+typedef enum
+{
+  IGE_DEV_TYPE_I211,
+  IGE_DEV_TYPE_I225,
+  IGE_DEV_TYPE_I226,
+} __clib_packed ige_dev_type_t;
+
+typedef struct
+{
+  ige_phy_type_t phy_type;
+  u8 supports_2_5g : 1;
+} ige_dev_config_t;
+
+typedef struct
+{
+  void *bar0;
+  u8 avail_rxq_bmp;
+  u8 avail_txq_bmp;
+  ige_phy_type_t phy_type;
+  ige_dev_config_t config;
+} ige_device_t;
+
+typedef struct
+{
+  ige_reg_status_t last_status;
+} ige_port_t;
+
+typedef struct
+{
+  u32 *buffer_indices;
+  ige_rx_desc_t *descs;
+  u16 head;
+  u16 tail;
+  u32 *reg_rdt;
+} ige_rxq_t;
+
+typedef struct
+{
+  u32 *buffer_indices;
+  ige_tx_desc_t *descs;
+  u16 head;
+  u16 tail;
+  u32 *reg_tdt;
+  u32 *wb;
+} ige_txq_t;
+
+typedef struct
+{
+  ige_rx_desc_t desc;
+  u32 buffer_index;
+  u32 hw_if_index;
+  u16 queue_id;
+  u16 next_index;
+} ige_rx_trace_t;
+
+typedef struct
+{
+  ige_tx_desc_t desc;
+  u32 buffer_index;
+  u32 hw_if_index;
+  u16 queue_id;
+} ige_tx_trace_t;
+
+/* counters.c */
+vnet_dev_rv_t ige_port_counters_init (vlib_main_t *, vnet_dev_port_t *);
+void ige_port_counter_poll (vlib_main_t *, vnet_dev_port_t *);
+
+/* format.c */
+format_function_t format_ige_reg_write;
+format_function_t format_ige_reg_read;
+format_function_t format_ige_reg_diff;
+format_function_t format_ige_port_status;
+format_function_t format_ige_rx_desc;
+format_function_t format_ige_rx_trace;
+format_function_t format_ige_tx_desc;
+format_function_t format_ige_tx_trace;
+format_function_t format_ige_receive_addr_table;
+
+/* phy.c */
+vnet_dev_rv_t ige_phy_init (vlib_main_t *, vnet_dev_t *);
+
+/* port.c */
+vnet_dev_rv_t ige_port_init (vlib_main_t *, vnet_dev_port_t *);
+vnet_dev_rv_t ige_port_start (vlib_main_t *, vnet_dev_port_t *);
+void ige_port_stop (vlib_main_t *, vnet_dev_port_t *);
+vnet_dev_rv_t ige_port_cfg_change_validate (vlib_main_t *, vnet_dev_port_t *,
+                                           vnet_dev_port_cfg_change_req_t *);
+vnet_dev_rv_t ige_port_cfg_change (vlib_main_t *, vnet_dev_port_t *,
+                                  vnet_dev_port_cfg_change_req_t *);
+
+/* queue.c */
+vnet_dev_rv_t ige_rx_queue_alloc (vlib_main_t *, vnet_dev_rx_queue_t *);
+vnet_dev_rv_t ige_tx_queue_alloc (vlib_main_t *, vnet_dev_tx_queue_t *);
+void ige_rx_queue_free (vlib_main_t *, vnet_dev_rx_queue_t *);
+void ige_tx_queue_free (vlib_main_t *, vnet_dev_tx_queue_t *);
+
+static_always_inline u16
+ige_rxq_refill_no_wrap (vlib_main_t *vm, u32 *buffer_indices,
+                       ige_rx_desc_t *descs, u16 n_refill,
+                       u8 buffer_pool_index, int use_va_dma)
+{
+  u16 n_alloc;
+  vlib_buffer_t *b;
+
+  n_alloc = vlib_buffer_alloc_from_pool (vm, buffer_indices, n_refill,
+                                        buffer_pool_index);
+
+  if (use_va_dma)
+    for (u32 i = 0; i < n_alloc; i++)
+      {
+       b = vlib_get_buffer (vm, buffer_indices[i]);
+       descs[i].pkt_addr = vlib_buffer_get_va (b);
+       descs[i].hdr_addr = 0;
+      }
+  else
+    for (u32 i = 0; i < n_alloc; i++)
+      {
+       b = vlib_get_buffer (vm, buffer_indices[i]);
+       descs[i].pkt_addr = vlib_buffer_get_pa (vm, b);
+       descs[i].hdr_addr = 0;
+      }
+
+  return n_alloc;
+}
+
+/* reg.c */
+vnet_dev_rv_t ige_reg_poll (vlib_main_t *, vnet_dev_t *, u32, u32, u32, f64,
+                           f64);
+int ige_reg_sw_fw_sync_acquire (vlib_main_t *, vnet_dev_t *);
+void ige_reg_sw_fw_sync_release (vlib_main_t *, vnet_dev_t *);
+
+/* inlines */
+static_always_inline void
+ige_reg_rd (vnet_dev_t *dev, u32 reg, u32 *val)
+{
+  ige_device_t *id = vnet_dev_get_data (dev);
+  u32 rv = __atomic_load_n ((u32 *) ((u8 *) id->bar0 + reg), __ATOMIC_ACQUIRE);
+  *val = rv;
+}
+
+static_always_inline void
+ige_reg_wr (vnet_dev_t *dev, u32 reg, u32 val)
+{
+  ige_device_t *id = vnet_dev_get_data (dev);
+  __atomic_store_n ((u32 *) ((u8 *) id->bar0 + reg), val, __ATOMIC_RELEASE);
+}
+
+#define log_debug(dev, f, ...)                                                \
+  vlib_log (VLIB_LOG_LEVEL_DEBUG, ige_log.class, "%U" f, format_vnet_dev_log, \
+           (dev), clib_string_skip_prefix (__func__, "ige_"), ##__VA_ARGS__)
+#define log_info(dev, f, ...)                                                 \
+  vlib_log (VLIB_LOG_LEVEL_INFO, ige_log.class, "%U: " f,                     \
+           format_vnet_dev_addr, dev, ##__VA_ARGS__)
+#define log_notice(dev, f, ...)                                               \
+  vlib_log (VLIB_LOG_LEVEL_NOTICE, ige_log.class, "%U: " f,                   \
+           format_vnet_dev_addr, dev, ##__VA_ARGS__)
+#define log_warn(dev, f, ...)                                                 \
+  vlib_log (VLIB_LOG_LEVEL_WARNING, ige_log.class, "%U: " f,                  \
+           format_vnet_dev_addr, dev, ##__VA_ARGS__)
+#define log_err(dev, f, ...)                                                  \
+  vlib_log (VLIB_LOG_LEVEL_ERR, ige_log.class, "%U: " f,                      \
+           format_vnet_dev_addr, dev, ##__VA_ARGS__)
+
+#define foreach_ige_tx_node_counter                                           \
+  _ (NO_FREE_SLOTS, no_free_slots, ERROR, "no free tx slots")                 \
+  _ (BUFFER_CHAIN_TOO_LONG, buffer_chain_too_long, ERROR,                     \
+     "buffer chain too long")
+
+typedef enum
+{
+#define _(f, n, s, d) IGE_TX_NODE_CTR_##f,
+  foreach_ige_tx_node_counter
+#undef _
+} ige_tx_node_counter_t;
diff --git a/src/plugins/dev_ige/ige_regs.h b/src/plugins/dev_ige/ige_regs.h
new file mode 100644 (file)
index 0000000..85dcc77
--- /dev/null
@@ -0,0 +1,400 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2025 Damjan Marion
+ */
+
+#pragma once
+
+#include <vppinfra/clib.h>
+#include <vppinfra/error_bootstrap.h>
+#include <vppinfra/format.h>
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+
+#define ige_reg_ctrl_t_fields                                                 \
+  __ (1, full_duplex)                                                         \
+  __ (1, _reserved1)                                                          \
+  __ (1, gio_master_disable)                                                  \
+  __ (3, _reserved3)                                                          \
+  __ (1, set_link_up)                                                         \
+  __ (9, _reserved7)                                                          \
+  __ (1, sdp0_gpien)                                                          \
+  __ (1, sdp1_gpien)                                                          \
+  __ (1, sdp0_data)                                                           \
+  __ (1, sdp1_data)                                                           \
+  __ (1, adww3wuc)                                                            \
+  __ (1, sdp0_wde)                                                            \
+  __ (1, sdp0_iodir)                                                          \
+  __ (1, sdp1_iodir)                                                          \
+  __ (2, _reserved24)                                                         \
+  __ (1, port_sw_reset)                                                       \
+  __ (1, rx_flow_ctl_en)                                                      \
+  __ (1, tx_flow_ctl_en)                                                      \
+  __ (1, device_reset)                                                        \
+  __ (1, vlan_mode_enable)                                                    \
+  __ (1, phy_reset)
+
+#define ige_reg_status_t_fields                                               \
+  __ (1, full_duplex)                                                         \
+  __ (1, link_up)                                                             \
+  __ (2, _reserved2)                                                          \
+  __ (1, tx_off)                                                              \
+  __ (1, _reserved5)                                                          \
+  __ (2, speed)                                                               \
+  __ (2, asdv)                                                                \
+  __ (1, phy_reset_asserted)                                                  \
+  __ (8, _reserved11)                                                         \
+  __ (1, gio_master_en_sts)                                                   \
+  __ (1, dev_rst_set)                                                         \
+  __ (1, rst_done)                                                            \
+  __ (1, speed_2p5)                                                           \
+  __ (7, _reserved23)                                                         \
+  __ (1, lpi_ignore)                                                          \
+  __ (1, _reserved31)
+
+#define ige_reg_ctrl_ext_t_fields                                             \
+  __ (2, _reserved0)                                                          \
+  __ (1, sdp2_gpien)                                                          \
+  __ (1, sdp3_gpien)                                                          \
+  __ (2, _reserved4)                                                          \
+  __ (1, sdp2_data)                                                           \
+  __ (1, sdp3_data)                                                           \
+  __ (2, _reserved8)                                                          \
+  __ (1, sdp2_iodir)                                                          \
+  __ (1, sdp3_iodir)                                                          \
+  __ (1, _reserved12)                                                         \
+  __ (1, eeprom_block_rst)                                                    \
+  __ (2, _reserved14)                                                         \
+  __ (1, no_snoop_dis)                                                        \
+  __ (1, relaxed_ordering_dis)                                                \
+  __ (2, _reserved18)                                                         \
+  __ (1, phy_power_down_ena)                                                  \
+  __ (5, _reserved121)                                                        \
+  __ (1, ext_vlan_ena)                                                        \
+  __ (1, _reserved127)                                                        \
+  __ (1, driver_loaded)                                                       \
+  __ (3, _reserved29)
+
+#define ige_reg_mdic_t_fields                                                 \
+  __ (16, data)                                                               \
+  __ (5, regadd)                                                              \
+  __ (5, _reserved21)                                                         \
+  __ (2, opcode)                                                              \
+  __ (1, ready)                                                               \
+  __ (1, mid_ie)                                                              \
+  __ (1, mid_err)                                                             \
+  __ (1, _reserved31)
+
+#define ige_reg_rctl_t_fields                                                 \
+  __ (1, _reserved0)                                                          \
+  __ (1, rx_enable)                                                           \
+  __ (1, store_bad_packets)                                                   \
+  __ (1, uc_promisc_ena)                                                      \
+  __ (1, mc_promisc_ena)                                                      \
+  __ (1, long_pkt_reception_ena)                                              \
+  __ (2, loopback_mode)                                                       \
+  __ (2, hash_select)                                                         \
+  __ (2, _reserved10)                                                         \
+  __ (2, mc_uc_tbl_off)                                                       \
+  __ (1, _reserved14)                                                         \
+  __ (1, bcast_accept_mode)                                                   \
+  __ (2, rx_buf_sz)                                                           \
+  __ (1, vlan_filter_ena)                                                     \
+  __ (1, cannonical_form_ind_ena)                                             \
+  __ (1, cannonical_form_ind_bit_val)                                         \
+  __ (1, pad_small_rx_pkts)                                                   \
+  __ (1, discard_pause_frames)                                                \
+  __ (1, pass_mac_ctrl_frames)                                                \
+  __ (2, _reserved24)                                                         \
+  __ (1, strip_eth_crc)                                                       \
+  __ (5, _reserved26)
+
+#define ige_reg_tctl_t_fields                                                 \
+  __ (1, _reserved0)                                                          \
+  __ (1, tx_enable)                                                           \
+  __ (1, _reserved2)                                                          \
+  __ (1, pad_short_pkts)                                                      \
+  __ (8, collision_threshold)                                                 \
+  __ (10, backoff_slot_time)                                                  \
+  __ (1, sw_xoff_tx)                                                          \
+  __ (1, _reserved23)                                                         \
+  __ (1, retransmit_on_late_colision)                                         \
+  __ (7, reserved25)
+
+#define ige_reg_txdctl_t_fields                                               \
+  __ (5, pthresh)                                                             \
+  __ (3, _reserved5)                                                          \
+  __ (5, hthresh)                                                             \
+  __ (3, _reserved13)                                                         \
+  __ (5, wthresh)                                                             \
+  __ (4, _reserved21)                                                         \
+  __ (1, enable)                                                              \
+  __ (1, sw_flush)                                                            \
+  __ (1, priority)                                                            \
+  __ (4, hwbthresh)
+
+#define ige_reg_phpm_t_fields                                                 \
+  __ (1, _reserved0)                                                          \
+  __ (1, restart_autoneg)                                                     \
+  __ (1, _reserved2)                                                          \
+  __ (1, dis_1000_in_non_d0a)                                                 \
+  __ (1, link_energy_detect)                                                  \
+  __ (1, go_link_disc)                                                        \
+  __ (1, disable_1000)                                                        \
+  __ (1, spd_b2b_en)                                                          \
+  __ (1, rst_compl)                                                           \
+  __ (1, dis_100_in_non_d0a)                                                  \
+  __ (1, ulp_req)                                                             \
+  __ (1, disable_2500)                                                        \
+  __ (1, dis_2500_in_non_d0a)                                                 \
+  __ (1, ulp_trig)                                                            \
+  __ (2, ulp_delay)                                                           \
+  __ (1, link_enery_en)                                                       \
+  __ (1, dev_off_en)                                                          \
+  __ (1, dev_off_state)                                                       \
+  __ (1, ulp_en)                                                              \
+  __ (12, _reserved20)
+
+#define ige_reg_manc_t_fields                                                 \
+  __ (1, flow_ctrl_discard)                                                   \
+  __ (1, ncsi_discard)                                                        \
+  __ (12, _reserved2)                                                         \
+  __ (1, fw_reset)                                                            \
+  __ (1, tco_isolate)                                                         \
+  __ (1, tco_reset)                                                           \
+  __ (1, rcv_tco_en)                                                          \
+  __ (1, keep_phy_link_up)                                                    \
+  __ (1, rcv_all)                                                             \
+  __ (1, inhibit_ulp)                                                         \
+  __ (2, _reserved21)                                                         \
+  __ (1, en_xsum_filter)                                                      \
+  __ (1, en_ipv4_filter)                                                      \
+  __ (1, fixed_net_type)                                                      \
+  __ (1, net_type)                                                            \
+  __ (1, ipv6_adv_only)                                                       \
+  __ (1, en_bmc2os)                                                           \
+  __ (1, en_bmc2net)                                                          \
+  __ (1, mproxye)                                                             \
+  __ (1, mproxya)
+
+#define ige_reg_swsm_t_fields                                                 \
+  __ (1, smbi)                                                                \
+  __ (1, swesmbi)                                                             \
+  __ (30, _reserved2)
+
+#define ige_reg_fwsm_t_fields                                                 \
+  __ (1, eep_fw_semaphore)                                                    \
+  __ (3, fw_mode)                                                             \
+  __ (2, _reserved4)                                                          \
+  __ (1, eep_reload_ind)                                                      \
+  __ (8, _reserved7)                                                          \
+  __ (1, fw_val_bit)                                                          \
+  __ (3, reset_ctr)                                                           \
+  __ (6, ext_err_ind)                                                         \
+  __ (1, pcie_config_err_ind)                                                 \
+  __ (5, _reserved26)                                                         \
+  __ (1, factory_mac_addr_restored)
+
+#define ige_reg_sw_fw_sync_t_fields                                           \
+  __ (1, sw_flash_sm)                                                         \
+  __ (1, sw_phy_sm)                                                           \
+  __ (1, sw_i2c_sm)                                                           \
+  __ (1, sw_mac_csr_sm)                                                       \
+  __ (3, _reserved4)                                                          \
+  __ (1, sw_svr_sm)                                                           \
+  __ (1, sw_mb_sm)                                                            \
+  __ (1, _reserved9)                                                          \
+  __ (1, sw_mng_sm)                                                           \
+  __ (5, _reserved11)                                                         \
+  __ (1, fw_flash_sm)                                                         \
+  __ (1, fw_phy_sm)                                                           \
+  __ (1, fw_i2c_sm)                                                           \
+  __ (1, fw_mac_csr_sm)                                                       \
+  __ (3, _reserved20)                                                         \
+  __ (1, fw_svr_sm)                                                           \
+  __ (8, _reserved24)
+
+#define ige_reg_srrctl_t_fields                                               \
+  __ (7, bsizepacket)                                                         \
+  __ (1, _reserved7)                                                          \
+  __ (6, bsizeheader)                                                         \
+  __ (2, timer1_sel)                                                          \
+  __ (1, _reserved16)                                                         \
+  __ (2, timer0_sel)                                                          \
+  __ (1, use_domain)                                                          \
+  __ (5, rdmts)                                                               \
+  __ (3, desc_type)                                                           \
+  __ (2, _reserved28)                                                         \
+  __ (1, timestamp)                                                           \
+  __ (1, drop_en)
+
+#define ige_reg_rxdctl_t_fields                                               \
+  __ (5, pthresh)                                                             \
+  __ (3, _reserved5)                                                          \
+  __ (5, hthresh)                                                             \
+  __ (3, _reserved13)                                                         \
+  __ (5, wthresh)                                                             \
+  __ (4, _reserved21)                                                         \
+  __ (1, enable)                                                              \
+  __ (1, swflush)                                                             \
+  __ (5, _reserved27)
+
+#define ige_reg_txctl_t_fields                                                \
+  __ (1, tx_desc_fetch_tph_en)                                                \
+  __ (1, tx_desc_wb_tph_en)                                                   \
+  __ (1, _reserved2)                                                          \
+  __ (1, tx_packet_tph_en)                                                    \
+  __ (1, _reserved4)                                                          \
+  __ (1, tx_desc_dca_en)                                                      \
+  __ (2, _reserved6)                                                          \
+  __ (1, tx_desc_read_no_snoop_en)                                            \
+  __ (1, tx_desc_read_relax_order_en)                                         \
+  __ (1, tx_desc_wb_no_snoop_en)                                              \
+  __ (1, tx_desc_wb_relax_order_en)                                           \
+  __ (1, tx_data_no_snoop_en)                                                 \
+  __ (1, tx_data_relax_order_en)                                              \
+  __ (18, _reserved14)
+
+#define ige_reg_eec_t_fields                                                  \
+  __ (6, _reserved0)                                                          \
+  __ (1, flash_in_use)                                                        \
+  __ (1, _reserved7)                                                          \
+  __ (1, ee_pres)                                                             \
+  __ (1, auto_rd)                                                             \
+  __ (1, _reservedxi10)                                                       \
+  __ (4, ee_size)                                                             \
+  __ (4, pci_ana_done)                                                        \
+  __ (1, flash_detected)                                                      \
+  __ (2, _reserved20)                                                         \
+  __ (1, shadow_modified)                                                     \
+  __ (1, flupd)                                                               \
+  __ (1, _reserved24)                                                         \
+  __ (1, sec1val)                                                             \
+  __ (1, fludone)                                                             \
+  __ (5, _reserved27)
+
+#define ige_reg_eemngctl_t_fields                                             \
+  __ (11, addr)                                                               \
+  __ (4, reserved11)                                                          \
+  __ (1, cmd_valid)                                                           \
+  __ (1, write)                                                               \
+  __ (1, eebusy)                                                              \
+  __ (1, cfg_done)                                                            \
+  __ (12, _reserved19)                                                        \
+  __ (1, done)
+
+#define IGE_REG_STRUCT(n)                                                     \
+  typedef union                                                               \
+  {                                                                           \
+    struct                                                                    \
+    {                                                                         \
+      n##_fields;                                                             \
+    };                                                                        \
+    u32 as_u32;                                                               \
+  } n;                                                                        \
+  STATIC_ASSERT_SIZEOF (n, 4);
+
+#define __(n, f) u32 f : n;
+IGE_REG_STRUCT (ige_reg_status_t);
+IGE_REG_STRUCT (ige_reg_ctrl_t);
+IGE_REG_STRUCT (ige_reg_ctrl_ext_t);
+IGE_REG_STRUCT (ige_reg_mdic_t);
+IGE_REG_STRUCT (ige_reg_rctl_t);
+IGE_REG_STRUCT (ige_reg_tctl_t);
+IGE_REG_STRUCT (ige_reg_txdctl_t);
+IGE_REG_STRUCT (ige_reg_txctl_t);
+IGE_REG_STRUCT (ige_reg_phpm_t);
+IGE_REG_STRUCT (ige_reg_manc_t);
+IGE_REG_STRUCT (ige_reg_swsm_t);
+IGE_REG_STRUCT (ige_reg_fwsm_t);
+IGE_REG_STRUCT (ige_reg_sw_fw_sync_t);
+IGE_REG_STRUCT (ige_reg_srrctl_t);
+IGE_REG_STRUCT (ige_reg_rxdctl_t);
+IGE_REG_STRUCT (ige_reg_eec_t);
+IGE_REG_STRUCT (ige_reg_eemngctl_t);
+#undef __
+
+#define foreach_ige_reg                                                       \
+  _ (0x00000, CTRL, ige_reg_ctrl_t_fields)                                    \
+  _ (0x00008, STATUS, ige_reg_status_t_fields)                                \
+  _ (0x00018, CTRL_EXT, ige_reg_ctrl_ext_t_fields)                            \
+  _ (0x00020, MDIC, ige_reg_mdic_t_fields)                                    \
+  _ (0x00100, RCTL, ige_reg_rctl_t_fields)                                    \
+  _ (0x00400, TCTL, ige_reg_tctl_t_fields)                                    \
+  _ (0x00404, TCTL_EXT, )                                                     \
+  _ (0x00e14, PHPM, ige_reg_phpm_t_fields)                                    \
+  _ (0x01500, ICR, )                                                          \
+  _ (0x0150c, IMC, )                                                          \
+  _ (0x05004, RLPML, )                                                        \
+  _ (0x05400, RAL0, )                                                         \
+  _ (0x05404, RAH0, )                                                         \
+  _ (0x05820, MANC, ige_reg_manc_t_fields)                                    \
+  _ (0x05b50, SWSM, ige_reg_swsm_t_fields)                                    \
+  _ (0x05b54, FWSM, ige_reg_fwsm_t_fields)                                    \
+  _ (0x05b5c, SW_FW_SYNC, ige_reg_sw_fw_sync_t_fields)                        \
+  _ (0x0c000, RDBAL0, )                                                       \
+  _ (0x0c004, RDBAH0, )                                                       \
+  _ (0x0c008, RDLEN0, )                                                       \
+  _ (0x0c00c, SRRCTL0, ige_reg_srrctl_t_fields)                               \
+  _ (0x0c010, RDH0, )                                                         \
+  _ (0x0c018, RDT0, )                                                         \
+  _ (0x0c028, RXDCTL0, ige_reg_rxdctl_t_fields)                               \
+  _ (0x0e000, TDBAL0, )                                                       \
+  _ (0x0e004, TDBAH0, )                                                       \
+  _ (0x0e008, TDLEN0, )                                                       \
+  _ (0x0e010, TDH0, )                                                         \
+  _ (0x0e014, TXCTL0, ige_reg_txctl_t_fields)                                 \
+  _ (0x0e018, TDT0, )                                                         \
+  _ (0x0e038, TDWBAL0, )                                                      \
+  _ (0x0e03c, TDWBAH0, )                                                      \
+  _ (0x0e028, TXDCTL0, ige_reg_txdctl_t_fields)                               \
+  _ (0x12010, EEC, ige_reg_eec_t_fields)                                      \
+  _ (0x12030, EEMNGCTL, ige_reg_eemngctl_t_fields)
+
+#define IGE_REG_RDBAL(n)  (IGE_REG_RDBAL0 + (n) *0x40)
+#define IGE_REG_RDBAH(n)  (IGE_REG_RDBAH0 + (n) *0x40)
+#define IGE_REG_RDLEN(n)  (IGE_REG_RDLEN0 + (n) *0x40)
+#define IGE_REG_SRRCTL(n) (IGE_REG_SRRCTL0 + (n) *0x40)
+#define IGE_REG_RDH(n)   (IGE_REG_RDH0 + (n) *0x40)
+#define IGE_REG_RDT(n)   (IGE_REG_RDT0 + (n) *0x40)
+#define IGE_REG_RXDCTL(n) (IGE_REG_RXDCTL0 + (n) *0x40)
+#define IGE_REG_TDBAL(n)  (IGE_REG_TDBAL0 + (n) *0x40)
+#define IGE_REG_TDBAH(n)  (IGE_REG_TDBAH0 + (n) *0x40)
+#define IGE_REG_TDLEN(n)  (IGE_REG_TDLEN0 + (n) *0x40)
+#define IGE_REG_TDH(n)   (IGE_REG_TDH0 + (n) *0x40)
+#define IGE_REG_TDT(n)   (IGE_REG_TDT0 + (n) *0x40)
+#define IGE_REG_TDWBAL(n) (IGE_REG_TDWBAL0 + (n) *0x40)
+#define IGE_REG_TDWBAH(n) (IGE_REG_TDWBAH0 + (n) *0x40)
+#define IGE_REG_TXDCTL(n) (IGE_REG_TXDCTL0 + (n) *0x40)
+#define IGE_REG_TXCTL(n)  (IGE_REG_TXCTL0 + (n) *0x40)
+#define IGE_REG_RAL(n)   (IGE_REG_RAL0 + (n) *0x08)
+#define IGE_REG_RAH(n)   (IGE_REG_RAH0 + (n) *0x08)
+
+#define IGE_TDWBAL_HEAD_WB_ENABLE 0x1
+
+typedef enum
+{
+#define _(o, n, f) IGE_REG_##n = (o),
+  foreach_ige_reg
+#undef _
+} ige_reg_t;
+
+typedef union
+{
+  struct
+  {
+    u32 ral;
+    u32 rah;
+  };
+  struct
+  {
+    u8 hw_addr[6];
+    u16 asel : 2;
+    u16 qsel : 2;
+    u16 _reserved20 : 8;
+    u16 qsel_enable : 1;
+    u16 _reserved30 : 2;
+    u16 av : 1;
+  };
+} ige_receive_addr_t;
+
+STATIC_ASSERT_SIZEOF (ige_receive_addr_t, 8);
diff --git a/src/plugins/dev_ige/phy.c b/src/plugins/dev_ige/phy.c
new file mode 100644 (file)
index 0000000..fc0e5d0
--- /dev/null
@@ -0,0 +1,311 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2025 Damjan Marion
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/counters.h>
+#include <dev_ige/ige.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+
+VLIB_REGISTER_LOG_CLASS (ige_log, static) = {
+  .class_name = "ige",
+  .subclass_name = "phy",
+};
+
+#define foreach_ige_phy_reg                                                   \
+  _ (0x00, CTRL)                                                              \
+  _ (0x01, STAT)                                                              \
+  _ (0x02, PHYID1)                                                            \
+  _ (0x03, PHYID2)                                                            \
+  _ (0x04, AN_ADV)                                                            \
+  _ (0x09, GCTRL)                                                             \
+  _ (0x0a, GSTAT)                                                             \
+  _ (0x0d, MMDCTRL)                                                           \
+  _ (0x0e, MMDDATA)                                                           \
+  _ (0x0f, XSTAT)
+
+typedef enum
+{
+#define _(n, v) IGE_PHY_REG_##v = (n),
+  foreach_ige_phy_reg
+#undef _
+} ige_phy_reg_t;
+
+static char *phy_reg_names[] = {
+#define _(n, v) [n] = #v,
+  foreach_ige_phy_reg
+#undef _
+};
+
+static vnet_dev_rv_t
+ige_phy_acquire (vlib_main_t *vm, vnet_dev_t *dev)
+{
+  ige_reg_sw_fw_sync_t sw_fw_sync;
+  int n_tries = 5;
+
+  log_debug (dev, "phy_acquire:");
+
+  while (n_tries-- > 0)
+    {
+      if (ige_reg_sw_fw_sync_acquire (vm, dev))
+       {
+         ige_reg_rd (dev, IGE_REG_SW_FW_SYNC, &sw_fw_sync.as_u32);
+         log_debug (dev, "phy_acquire: sw_fw_sync 0x%04x");
+
+         if (sw_fw_sync.fw_phy_sm == 0)
+           {
+             sw_fw_sync.sw_phy_sm = 1;
+             ige_reg_wr (dev, IGE_REG_SW_FW_SYNC, sw_fw_sync.as_u32);
+             ige_reg_sw_fw_sync_release (vm, dev);
+             return 0;
+           }
+
+         ige_reg_sw_fw_sync_release (vm, dev);
+       }
+      vlib_process_suspend (vm, 1e-4);
+    }
+
+  log_err (dev, "failed to acquire PHY");
+  return VNET_DEV_ERR_TIMEOUT;
+}
+
+static vnet_dev_rv_t
+ige_phy_release (vlib_main_t *vm, vnet_dev_t *dev)
+{
+  ige_reg_sw_fw_sync_t sw_fw_sync;
+
+  log_debug (dev, "phy_release:");
+
+  /* release phy */
+  if (ige_reg_sw_fw_sync_acquire (vm, dev) == 0)
+    {
+      log_err (dev, "sw_fw_sync ownership timeout");
+      return VNET_DEV_ERR_TIMEOUT;
+    }
+
+  sw_fw_sync.sw_phy_sm = 0;
+  ige_reg_wr (dev, IGE_REG_SW_FW_SYNC, sw_fw_sync.as_u32);
+  ige_reg_sw_fw_sync_release (vm, dev);
+
+  return 0;
+}
+
+static vnet_dev_rv_t
+ige_phy_read (vlib_main_t *vm, vnet_dev_t *dev, u16 addr, u16 *data)
+{
+  ige_reg_mdic_t mdic = { .regadd = addr, .opcode = 2 };
+  int n_tries = 10;
+  f64 t;
+
+  t = vlib_time_now (vm);
+  ige_reg_wr (dev, IGE_REG_MDIC, mdic.as_u32);
+  vlib_process_suspend (vm, 5e-5);
+  ige_reg_rd (dev, IGE_REG_MDIC, &mdic.as_u32);
+
+  while (mdic.ready == 0 && n_tries-- > 0)
+    {
+      vlib_process_suspend (vm, 2e-5);
+      ige_reg_rd (dev, IGE_REG_MDIC, &mdic.as_u32);
+    }
+
+  t = vlib_time_now (vm) - t;
+  if (t > 1e-4)
+    log_warn (dev, "phy_read: register read took %.06f sec", t);
+
+  if (mdic.ready == 0)
+    {
+      log_err (dev, "phy read timeout");
+      return VNET_DEV_ERR_TIMEOUT;
+    }
+
+  if (addr < ARRAY_LEN (phy_reg_names) && phy_reg_names[addr])
+    log_debug (dev, "reg %s data 0x%04x", phy_reg_names[addr], mdic.data);
+  else
+    log_debug (dev, "addr 0x%02x data 0x%04x", addr, mdic.data);
+
+  *data = mdic.data;
+  return 0;
+}
+
+static vnet_dev_rv_t
+ige_phy_write (vlib_main_t *vm, vnet_dev_t *dev, u16 addr, u16 data)
+{
+  ige_reg_mdic_t mdic = { .regadd = addr, .opcode = 1, .data = data };
+  int n_tries = 10;
+  f64 t;
+
+  t = vlib_time_now (vm);
+  ige_reg_wr (dev, IGE_REG_MDIC, mdic.as_u32);
+  vlib_process_suspend (vm, 5e-5);
+  ige_reg_rd (dev, IGE_REG_MDIC, &mdic.as_u32);
+
+  while (mdic.ready == 0 && n_tries-- > 0)
+    {
+      vlib_process_suspend (vm, 2e-5);
+      ige_reg_rd (dev, IGE_REG_MDIC, &mdic.as_u32);
+    }
+
+  t = vlib_time_now (vm) - t;
+  if (t > 1e-4)
+    log_warn (dev, "phy_write: register write took %.06f sec", t);
+
+  if (mdic.ready == 0)
+    {
+      log_err (dev, "phy write timeout");
+      return VNET_DEV_ERR_TIMEOUT;
+    }
+
+  if (addr < ARRAY_LEN (phy_reg_names) && phy_reg_names[addr])
+    log_debug (dev, "reg %s data 0x%04x", phy_reg_names[addr], mdic.data);
+  else
+    log_debug (dev, "addr 0x%02x data 0x%04x", addr, mdic.data);
+
+  return 0;
+}
+
+#define foreach_ige_phy_type                                                  \
+  _ (0x67c9dc00, GPY211, "Foxville LM B.1")                                   \
+  _ (0x67c9dc80, GPY211, "Foxville LM B.2")                                   \
+  _ (0x67c9dcc0, GPY211, "Foxville LM B.3 / Foxville Dock")                   \
+  _ (0x67c9dc02, GPY211, "Foxville V B.1")                                    \
+  _ (0x67c9dc82, GPY211, "Foxville V B.2")                                    \
+  _ (0x67c9dcc2, GPY211, "Foxville V B.3")                                    \
+  _ (0x67c9dc83, GPY211, "Foxville IT B.2")                                   \
+  _ (0x67c9dcc3, GPY211, "Foxville IT B.3")                                   \
+  _ (0x67c9dc18, GPY211, "FoxvilleC LM / Dock")                               \
+  _ (0x67c9dc58, GPY211, "FoxvilleC V")                                       \
+  _ (0x67c9dcd8, GPY211, "FoxvilleC IT")
+
+static struct
+{
+  u32 phy_id;
+  ige_phy_type_t type;
+  char *name;
+} phy_types[] = {
+#define _(i, t, s)                                                            \
+  {                                                                           \
+    .phy_id = i,                                                              \
+    .type = IGE_PHY_TYPE_##t,                                                 \
+    .name = s,                                                                \
+  },
+  foreach_ige_phy_type
+#undef _
+};
+
+vnet_dev_rv_t
+ige_phy_mmd_write (vlib_main_t *vm, vnet_dev_t *dev, u8 dad, u16 addr,
+                  u16 data)
+{
+  vnet_dev_rv_t rv;
+  struct
+  {
+    u16 reg;
+    u16 val;
+  } seq[] = {
+    { IGE_PHY_REG_MMDCTRL, dad },
+    { IGE_PHY_REG_MMDDATA, addr },
+    { IGE_PHY_REG_MMDCTRL, 0x4000 | dad },
+    { IGE_PHY_REG_MMDDATA, data },
+    { IGE_PHY_REG_MMDCTRL, 0 },
+  };
+
+  FOREACH_ARRAY_ELT (e, seq)
+    {
+      rv = ige_phy_write (vm, dev, e->reg, e->val);
+      if (rv != VNET_DEV_OK)
+       return rv;
+    }
+
+#if 0
+  ige_phy_rw_t rw2[5] = { { .addr = 0xd, .data = 7, .wr = 1 },
+                         { .addr = 0xe, .data = 0x20, .wr = 1 },
+                         { .addr = 0xd, .data = 0x4007, .wr = 1 },
+                         { .addr = 0xe, .data = 0x82, .wr = 1 },
+                         { .addr = 0xd, .data = 0, .wr = 1 } };
+#endif
+  return VNET_DEV_OK;
+}
+
+typedef struct
+{
+  union
+  {
+    struct
+    {
+      u16 phy_id2;
+      u16 phy_id1;
+    };
+    u32 phy_id;
+    struct
+    {
+      u32 revision : 4;
+      u32 model : 6;
+      u32 oui : 22;
+    };
+  };
+} ige_phy_id_t;
+
+vnet_dev_rv_t
+ige_phy_init (vlib_main_t *vm, vnet_dev_t *dev)
+{
+  ige_device_t *id = vnet_dev_get_data (dev);
+  vnet_dev_rv_t rv;
+  u16 reg;
+  ige_phy_id_t phyid;
+
+  if ((rv = ige_phy_acquire (vm, dev)) != VNET_DEV_OK)
+    return rv;
+
+  if ((rv = ige_phy_read (vm, dev, IGE_PHY_REG_PHYID1, &phyid.phy_id1)) !=
+      VNET_DEV_OK)
+    goto done;
+
+  if ((rv = ige_phy_read (vm, dev, IGE_PHY_REG_PHYID2, &phyid.phy_id2)) !=
+      VNET_DEV_OK)
+    goto done;
+
+  if (id->config.phy_type == IGE_PHY_TYPE_UNKNOWN)
+    {
+      FOREACH_ARRAY_ELT (e, phy_types)
+       if (e->phy_id == phyid.phy_id)
+         {
+           log_debug (dev, "PHY is '%s' (oui 0x%x model 0x%x revision 0x%x)",
+                      e->name, phyid.oui, phyid.model, phyid.revision);
+           id->config.phy_type = e->type;
+           break;
+         }
+    }
+
+  if (id->config.phy_type == IGE_PHY_TYPE_UNKNOWN)
+    {
+      log_err (dev, "Unsupported phy 0x%08x", phyid.phy_id);
+      rv = VNET_DEV_ERR_UNSUPPORTED_DEVICE;
+      goto done;
+    }
+
+  /* enable "1000BASE-T Full-Duplex" in GCTRL */
+  if ((rv = ige_phy_read (vm, dev, IGE_PHY_REG_GCTRL, &reg)) != VNET_DEV_OK)
+    goto done;
+  log_debug (dev, "GCTRL was set to 0x%04x", reg);
+  reg |= 0x200;
+  if ((rv = ige_phy_write (vm, dev, IGE_PHY_REG_GCTRL, reg)) != VNET_DEV_OK)
+    goto done;
+
+  if (id->config.phy_type == IGE_PHY_TYPE_GPY211)
+    {
+      /* modify ANEG[7] device register ANEG_MGBT_AN_CTRL[0x20]:
+       *   AB_2G5BT[7]  - 2.5 G BASE-T ability
+       *   FR_2G5BT[5]  - 2.5 G BASE-T Fast Retrain Ability
+       *   FR[1]        - Fast Retrain Ability
+       */
+      rv = ige_phy_mmd_write (vm, dev, 7, 0x20, 0xa2);
+      if (rv != VNET_DEV_OK)
+       goto done;
+    }
+
+done:
+  return ige_phy_release (vm, dev);
+}
diff --git a/src/plugins/dev_ige/port.c b/src/plugins/dev_ige/port.c
new file mode 100644 (file)
index 0000000..c18ca0b
--- /dev/null
@@ -0,0 +1,488 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2025 Damjan Marion
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/counters.h>
+#include <dev_ige/ige.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+
+VLIB_REGISTER_LOG_CLASS (ige_log, static) = {
+  .class_name = "ige",
+  .subclass_name = "port",
+};
+
+const u32 link_speeds[8] = {
+  [0b000] = 10000,
+  [0b001] = 100000,
+  [0b010] = 1000000,
+  [0b110] = 2500000,
+};
+
+static void
+ige_port_status_poll (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+  vnet_dev_t *dev = port->dev;
+  ige_port_t *ip = vnet_dev_get_port_data (port);
+  ige_reg_status_t status;
+
+  ige_reg_rd (dev, IGE_REG_STATUS, &status.as_u32);
+
+  if (ip->last_status.as_u32 != status.as_u32)
+    {
+      vnet_dev_port_state_changes_t changes = {};
+
+      log_debug (dev, "\n%U", format_ige_reg_diff, IGE_REG_STATUS,
+                ip->last_status.as_u32, status.as_u32);
+
+      if (ip->last_status.link_up != status.link_up)
+       {
+         changes.change.link_state = 1;
+         changes.link_state = status.link_up;
+         log_debug (dev, "link state changed to %s",
+                    status.link_up ? "up" : "down");
+       }
+
+      if (ip->last_status.full_duplex != status.full_duplex)
+       {
+         changes.change.link_duplex = 1;
+         changes.full_duplex = status.full_duplex;
+         log_debug (dev, "duplex changed to %s",
+                    status.full_duplex ? "full" : "half");
+       }
+
+      if (ip->last_status.speed != status.speed ||
+         ip->last_status.speed_2p5 != status.speed_2p5)
+       {
+         changes.change.link_speed = 1;
+         changes.link_speed =
+           link_speeds[status.speed_2p5 << 2 | status.speed];
+         if (changes.link_speed)
+           log_debug (dev, "link speed changed to %u Mbps",
+                      changes.link_speed / 1000);
+         else
+           log_warn (dev,
+                     "device reported unknown speed (speed %u speed_2p5 %u)",
+                     status.speed, status.speed_2p5);
+       }
+      ip->last_status.as_u32 = status.as_u32;
+      if (changes.change.any)
+       vnet_dev_port_state_change (vm, port, changes);
+    }
+}
+
+vnet_dev_rv_t
+ige_port_init (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+  vnet_dev_rv_t rv;
+
+  log_debug (port->dev, "port %u", port->port_id);
+
+  rv = ige_port_counters_init (vm, port);
+  vnet_dev_poll_port_add (vm, port, 1, ige_port_status_poll);
+  return rv;
+}
+
+vnet_dev_rv_t
+ige_port_start (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+  vnet_dev_t *dev = port->dev;
+  ige_device_t *id = vnet_dev_get_data (port->dev);
+  ige_rxq_t *iq;
+  ige_txq_t *tq;
+  ige_reg_rctl_t rctl;
+  ige_reg_tctl_t tctl;
+  vnet_dev_rv_t rv = VNET_DEV_OK;
+
+  log_debug (dev, "port %u", port->port_id);
+
+  ige_reg_rd (dev, IGE_REG_RCTL, &rctl.as_u32);
+  if (rctl.rx_enable)
+    {
+      log_warn (dev, "port %u rx is unexpectedly enabled", port->port_id);
+      rctl.rx_enable = 0;
+      ige_reg_wr (dev, IGE_REG_RCTL, rctl.as_u32);
+    }
+
+  ige_reg_rd (dev, IGE_REG_TCTL, &tctl.as_u32);
+  if (tctl.tx_enable)
+    {
+      log_warn (dev, "port %u tx is unexpectedly enabled", port->port_id);
+      tctl.tx_enable = 0;
+      ige_reg_wr (dev, IGE_REG_TCTL, tctl.as_u32);
+    }
+
+  foreach_vnet_dev_port_rx_queue (rxq, port)
+    {
+      const ige_reg_srrctl_t srrctl = {
+       .drop_en = 1,
+       .desc_type = 1,   /* advanced, no header */
+       .bsizepacket = 2, /* 2k */
+                         //.bsizeheader = 2, /* 128 B */
+      };
+
+      const ige_reg_rxdctl_t rxdctl = {
+       .pthresh = 12,
+       .hthresh = 10,
+       .wthresh = 1,
+       .enable = 1,
+      };
+
+      u64 dma_addr;
+      u16 q = rxq->queue_id;
+      ige_rxq_t *iq = vnet_dev_get_rx_queue_data (rxq);
+      dma_addr = vnet_dev_get_dma_addr (vm, dev, iq->descs);
+      ige_reg_wr (dev, IGE_REG_RDLEN (q), rxq->size * sizeof (ige_rx_desc_t));
+      ige_reg_wr (dev, IGE_REG_RDBAH (q), dma_addr >> 32);
+      ige_reg_wr (dev, IGE_REG_RDBAL (q), dma_addr);
+      ige_reg_wr (dev, IGE_REG_SRRCTL (q), srrctl.as_u32);
+      ige_reg_wr (dev, IGE_REG_RXDCTL (q), rxdctl.as_u32);
+      iq->head = 0;
+      iq->tail = 0;
+      iq->reg_rdt = (u32 *) ((u8 *) id->bar0 + IGE_REG_RDT (q));
+
+      /* Ensure the queue starts with buffers posted. */
+      u16 n_posted = ige_rxq_refill_no_wrap (
+       vm, iq->buffer_indices, iq->descs, rxq->size,
+       vnet_dev_get_rx_queue_buffer_pool_index (rxq), dev->va_dma);
+
+      iq->tail = n_posted;
+
+      if (iq->tail == 0)
+       {
+         rv = VNET_DEV_ERR_BUFFER_ALLOC_FAIL;
+         goto error;
+       }
+
+      __atomic_store_n (iq->reg_rdt, (u32) iq->tail, __ATOMIC_RELEASE);
+    }
+
+  foreach_vnet_dev_port_tx_queue (txq, port)
+    {
+      u64 dma_addr;
+      u64 wb_dma;
+      u16 q = txq->queue_id;
+      ige_reg_txctl_t txctl;
+
+      ige_txq_t *tq = vnet_dev_get_tx_queue_data (txq);
+      ASSERT (tq->wb != 0);
+      dma_addr = vnet_dev_get_dma_addr (vm, dev, tq->descs);
+      wb_dma = vnet_dev_get_dma_addr (vm, dev, tq->wb);
+
+      ige_reg_wr (dev, IGE_REG_TDLEN (q), txq->size * sizeof (ige_tx_desc_t));
+      ige_reg_wr (dev, IGE_REG_TDBAH (q), dma_addr >> 32);
+      ige_reg_wr (dev, IGE_REG_TDBAL (q), dma_addr);
+      ige_reg_wr (dev, IGE_REG_TDWBAH (q), wb_dma >> 32);
+      ige_reg_wr (dev, IGE_REG_TDWBAL (q),
+                 ((u32) wb_dma & ~0x3u) | IGE_TDWBAL_HEAD_WB_ENABLE);
+
+      *tq->wb = 0;
+
+      tq->head = tq->tail = 0;
+      tq->reg_tdt = (u32 *) ((u8 *) id->bar0 + IGE_REG_TDT (q));
+
+      ige_reg_wr (dev, IGE_REG_TDH (q), 0);
+      ige_reg_wr (dev, IGE_REG_TDT (q), 0);
+
+      ige_reg_txdctl_t txdctl = {
+       .pthresh = 8,
+       .hthresh = 1,
+       .wthresh = 1,
+       .enable = 1,
+      };
+
+      ige_reg_wr (dev, IGE_REG_TXDCTL (q), txdctl.as_u32);
+      ige_reg_rd (dev, IGE_REG_TXCTL (q), &txctl.as_u32);
+      txctl.tx_desc_wb_relax_order_en = 0;
+      ige_reg_wr (dev, IGE_REG_TXCTL (q), txctl.as_u32);
+    }
+
+  rctl.rx_enable = 1;
+  rctl.store_bad_packets = 0;
+  rctl.strip_eth_crc = 1;
+  rctl.long_pkt_reception_ena = 1;
+  rctl.vlan_filter_ena = 0;
+  rctl.bcast_accept_mode = 1;
+  rctl.discard_pause_frames = 1;
+  ige_reg_wr (dev, IGE_REG_RCTL, rctl.as_u32);
+  ige_reg_wr (dev, IGE_REG_RLPML, port->max_rx_frame_size);
+
+  tctl.tx_enable = 1;
+  tctl.pad_short_pkts = 1;
+  ige_reg_wr (dev, IGE_REG_TCTL, tctl.as_u32);
+
+  foreach_vnet_dev_port_rx_queue (rxq, port)
+    {
+      u16 q = rxq->queue_id;
+
+      ige_rxq_t *iq = vnet_dev_get_rx_queue_data (rxq);
+      ige_reg_wr (dev, IGE_REG_RDH (q), 0);
+      ige_reg_wr (dev, IGE_REG_RDT (q), iq->tail);
+    }
+
+  vnet_dev_poll_port_add (vm, port, 3, ige_port_counter_poll);
+  return 0;
+
+error:
+  foreach_vnet_dev_port_rx_queue (rxq, port)
+    {
+      iq = vnet_dev_get_rx_queue_data (rxq);
+      if (iq->tail)
+       {
+         u16 n_buffers = iq->tail - iq->head;
+         u16 mask = rxq->size - 1;
+         u16 start = iq->head & mask;
+         if (n_buffers)
+           vlib_buffer_free_from_ring_no_next (vm, iq->buffer_indices, start,
+                                               rxq->size, n_buffers);
+       }
+      iq->head = iq->tail = 0;
+    }
+  foreach_vnet_dev_port_tx_queue (txq, port)
+    {
+      tq = vnet_dev_get_tx_queue_data (txq);
+      if (tq->tail != tq->head)
+       {
+         u16 mask = txq->size - 1;
+         u16 start = tq->head & mask;
+         u16 n_buffers = tq->tail - tq->head;
+
+         if (n_buffers)
+           vlib_buffer_free_from_ring_no_next (vm, tq->buffer_indices, start,
+                                               txq->size, n_buffers);
+       }
+
+      tq->head = tq->tail = 0;
+      if (tq->reg_tdt)
+       {
+         ige_reg_txdctl_t txdctl = {};
+         ige_reg_wr (dev, IGE_REG_TDT (txq->queue_id), 0);
+         ige_reg_rd (dev, IGE_REG_TXDCTL (txq->queue_id), &txdctl.as_u32);
+         txdctl.enable = 0;
+         ige_reg_wr (dev, IGE_REG_TXDCTL (txq->queue_id), txdctl.as_u32);
+         ige_reg_wr (dev, IGE_REG_TDWBAL (txq->queue_id), 0);
+         ige_reg_wr (dev, IGE_REG_TDWBAH (txq->queue_id), 0);
+       }
+    }
+  return rv;
+}
+
+void
+ige_port_stop (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+  vnet_dev_t *dev = port->dev;
+  log_debug (dev, "port %u", port->port_id);
+  ige_reg_rctl_t rctl;
+  ige_reg_tctl_t tctl;
+  vnet_dev_poll_port_remove (vm, port, ige_port_counter_poll);
+
+  ige_reg_rd (dev, IGE_REG_RCTL, &rctl.as_u32);
+  rctl.rx_enable = 0;
+  ige_reg_wr (dev, IGE_REG_RCTL, rctl.as_u32);
+
+  ige_reg_rd (dev, IGE_REG_TCTL, &tctl.as_u32);
+  tctl.tx_enable = 0;
+  ige_reg_wr (dev, IGE_REG_TCTL, tctl.as_u32);
+
+  foreach_vnet_dev_port_rx_queue (rxq, port)
+    {
+      ige_rxq_t *iq = vnet_dev_get_rx_queue_data (rxq);
+      u16 n_buffers = iq->tail - iq->head;
+      u16 mask = rxq->size - 1;
+
+      if (n_buffers)
+       vlib_buffer_free_from_ring_no_next (
+         vm, iq->buffer_indices, iq->head & mask, rxq->size, n_buffers);
+
+      iq->head = iq->tail = 0;
+    }
+
+  foreach_vnet_dev_port_tx_queue (txq, port)
+    {
+      ige_txq_t *tq = vnet_dev_get_tx_queue_data (txq);
+      u16 n_buffers = tq->tail - tq->head;
+      u16 mask = txq->size - 1;
+
+      if (n_buffers)
+       vlib_buffer_free_from_ring_no_next (
+         vm, tq->buffer_indices, tq->head & mask, txq->size, n_buffers);
+
+      tq->head = tq->tail = 0;
+      if (tq->reg_tdt)
+       {
+         ige_reg_txdctl_t txdctl = {};
+         ige_reg_wr (dev, IGE_REG_TDT (txq->queue_id), 0);
+         ige_reg_rd (dev, IGE_REG_TXDCTL (txq->queue_id), &txdctl.as_u32);
+         txdctl.enable = 0;
+         ige_reg_wr (dev, IGE_REG_TXDCTL (txq->queue_id), txdctl.as_u32);
+         ige_reg_wr (dev, IGE_REG_TDWBAL (txq->queue_id), 0);
+         ige_reg_wr (dev, IGE_REG_TDWBAH (txq->queue_id), 0);
+       }
+    }
+}
+
+static vnet_dev_rv_t
+ige_set_promisc_mode (vlib_main_t *vm, vnet_dev_port_t *port, int enabled)
+{
+  vnet_dev_t *dev = port->dev;
+  ige_reg_rctl_t rctl;
+
+  ige_reg_rd (dev, IGE_REG_RCTL, &rctl.as_u32);
+  rctl.uc_promisc_ena = enabled;
+  rctl.mc_promisc_ena = enabled;
+  ige_reg_wr (dev, IGE_REG_RCTL, rctl.as_u32);
+  ige_reg_rd (dev, IGE_REG_RCTL, &rctl.as_u32);
+  log_debug (dev, "\n %U", format_ige_reg_read, IGE_REG_RCTL, rctl.as_u32);
+  return VNET_DEV_OK;
+}
+
+static vnet_dev_rv_t
+ige_change_primary_hw_addr (vlib_main_t *vm, vnet_dev_port_t *port,
+                           const vnet_dev_hw_addr_t *hw_addr)
+{
+  vnet_dev_t *dev = port->dev;
+  ige_receive_addr_t ra = {
+    .av = 1,
+  };
+
+  clib_memcpy (ra.hw_addr, hw_addr->eth_mac, sizeof (ra.hw_addr));
+
+  ige_reg_wr (dev, IGE_REG_RAH (0), ra.rah);
+  ige_reg_wr (dev, IGE_REG_RAL (0), ra.ral);
+
+  log_debug (dev, "receive addr table:\n%U", format_ige_receive_addr_table,
+            dev);
+  return VNET_DEV_OK;
+}
+
+static vnet_dev_rv_t
+ige_add_secondary_hw_addr (vlib_main_t *vm, vnet_dev_port_t *port,
+                          const vnet_dev_hw_addr_t *hw_addr)
+{
+  vnet_dev_t *dev = port->dev;
+  ige_receive_addr_t ra;
+  vnet_dev_rv_t rv = VNET_DEV_OK;
+  u32 empty_slot = 0;
+
+  for (u32 i = 0; i < 16; i++)
+    {
+      ige_reg_rd (dev, IGE_REG_RAH (i), &ra.rah);
+      ige_reg_rd (dev, IGE_REG_RAL (i), &ra.ral);
+      if (memcmp (ra.hw_addr, hw_addr->eth_mac, sizeof (ra.hw_addr)) == 0)
+       {
+         log_err (dev, "address %U already exists in table",
+                  format_ethernet_address, hw_addr->eth_mac);
+         rv = VNET_DEV_ERR_ALREADY_EXISTS;
+         goto done;
+       }
+      if (ra.av == 0 && empty_slot == 0 && i > 0)
+       empty_slot = i;
+    }
+
+  if (empty_slot == 0)
+    {
+      log_err (dev, "failed to add secondary hw addr %U, table full",
+              format_ethernet_address, hw_addr->eth_mac);
+      rv = VNET_DEV_ERR_RESOURCE_NOT_AVAILABLE;
+      goto done;
+    }
+
+  ra = (ige_receive_addr_t){ .av = 1 };
+  clib_memcpy (ra.hw_addr, hw_addr->eth_mac, sizeof (ra.hw_addr));
+  ige_reg_wr (dev, IGE_REG_RAH (empty_slot), ra.rah);
+  ige_reg_wr (dev, IGE_REG_RAL (empty_slot), ra.ral);
+
+done:
+  log_debug (dev, "receive addr table:\n%U", format_ige_receive_addr_table,
+            dev);
+  return rv;
+}
+
+static vnet_dev_rv_t
+ige_remove_secondary_hw_addr (vlib_main_t *vm, vnet_dev_port_t *port,
+                             const vnet_dev_hw_addr_t *hw_addr)
+{
+  vnet_dev_t *dev = port->dev;
+  ige_receive_addr_t ra;
+  vnet_dev_rv_t rv = VNET_DEV_OK;
+
+  for (u32 i = 1; i < 16; i++)
+    {
+      ige_reg_rd (dev, IGE_REG_RAH (i), &ra.rah);
+      ige_reg_rd (dev, IGE_REG_RAL (i), &ra.ral);
+      if (memcmp (ra.hw_addr, hw_addr->eth_mac, sizeof (ra.hw_addr)) == 0)
+       {
+         ige_reg_wr (dev, IGE_REG_RAH (i), 0);
+         ige_reg_wr (dev, IGE_REG_RAL (i), 0);
+         goto done;
+       }
+    }
+
+  log_err (dev, "failed to remove secondary hw addr %U, not found",
+          format_ethernet_address, hw_addr->eth_mac);
+  rv = VNET_DEV_ERR_NOT_FOUND;
+
+done:
+  log_debug (dev, "receive addr table:\n%U", format_ige_receive_addr_table,
+            dev);
+  return rv;
+}
+
+vnet_dev_rv_t
+ige_port_cfg_change_validate (vlib_main_t *vm, vnet_dev_port_t *port,
+                             vnet_dev_port_cfg_change_req_t *req)
+{
+  vnet_dev_rv_t rv = VNET_DEV_ERR_NOT_SUPPORTED;
+  switch (req->type)
+    {
+    case VNET_DEV_PORT_CFG_MAX_RX_FRAME_SIZE:
+      if (port->started)
+       rv = VNET_DEV_ERR_PORT_STARTED;
+      break;
+
+    case VNET_DEV_PORT_CFG_PROMISC_MODE:
+    case VNET_DEV_PORT_CFG_CHANGE_PRIMARY_HW_ADDR:
+    case VNET_DEV_PORT_CFG_ADD_SECONDARY_HW_ADDR:
+    case VNET_DEV_PORT_CFG_REMOVE_SECONDARY_HW_ADDR:
+      rv = VNET_DEV_OK;
+      break;
+
+    default:
+      break;
+    }
+
+  return rv;
+}
+
+vnet_dev_rv_t
+ige_port_cfg_change (vlib_main_t *vm, vnet_dev_port_t *port,
+                    vnet_dev_port_cfg_change_req_t *req)
+{
+  vnet_dev_rv_t rv = VNET_DEV_OK;
+  switch (req->type)
+    {
+    case VNET_DEV_PORT_CFG_MAX_RX_FRAME_SIZE:
+      break;
+    case VNET_DEV_PORT_CFG_PROMISC_MODE:
+      rv = ige_set_promisc_mode (vm, port, req->promisc);
+      break;
+    case VNET_DEV_PORT_CFG_CHANGE_PRIMARY_HW_ADDR:
+      rv = ige_change_primary_hw_addr (vm, port, &req->addr);
+      break;
+    case VNET_DEV_PORT_CFG_ADD_SECONDARY_HW_ADDR:
+      rv = ige_add_secondary_hw_addr (vm, port, &req->addr);
+      break;
+    case VNET_DEV_PORT_CFG_REMOVE_SECONDARY_HW_ADDR:
+      rv = ige_remove_secondary_hw_addr (vm, port, &req->addr);
+      break;
+
+    default:
+      rv = VNET_DEV_ERR_NOT_SUPPORTED;
+      break;
+    }
+
+  return rv;
+}
diff --git a/src/plugins/dev_ige/queue.c b/src/plugins/dev_ige/queue.c
new file mode 100644 (file)
index 0000000..895c79d
--- /dev/null
@@ -0,0 +1,154 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2025 Damjan Marion
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/counters.h>
+#include <dev_ige/ige.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+
+VLIB_REGISTER_LOG_CLASS (ige_log, static) = {
+  .class_name = "ige",
+  .subclass_name = "queue",
+};
+
+vnet_dev_rv_t
+ige_rx_queue_alloc (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq)
+{
+  vnet_dev_t *dev = rxq->port->dev;
+  ige_device_t *id = vnet_dev_get_data (dev);
+  ige_rxq_t *iq = vnet_dev_get_rx_queue_data (rxq);
+  vnet_dev_rv_t rv;
+
+  if (id->avail_rxq_bmp == 0)
+    {
+      log_err (dev, "no free RX queues (requested size %u)", rxq->size);
+      return VNET_DEV_ERR_RESOURCE_NOT_AVAILABLE;
+    }
+
+  rxq->queue_id = get_lowest_set_bit_index (id->avail_rxq_bmp);
+  id->avail_rxq_bmp ^= 1 << rxq->queue_id;
+
+  iq->buffer_indices = clib_mem_alloc_aligned (
+    rxq->size * sizeof (iq->buffer_indices[0]), CLIB_CACHE_LINE_BYTES);
+
+  if (iq->buffer_indices == 0)
+    {
+      id->avail_rxq_bmp |= 1 << rxq->queue_id;
+      log_err (dev, "queue %u buffer ring alloc failed (ring size %u)",
+              rxq->queue_id, rxq->size);
+      return VNET_DEV_ERR_RESOURCE_NOT_AVAILABLE;
+    }
+
+  clib_memset_u32 (iq->buffer_indices, 0, rxq->size);
+
+  rv = vnet_dev_dma_mem_alloc (vm, dev, sizeof (ige_rx_desc_t) * rxq->size, 0,
+                              (void **) &iq->descs);
+  if (rv != VNET_DEV_OK)
+    {
+      clib_mem_free (iq->buffer_indices);
+      iq->buffer_indices = 0;
+      id->avail_rxq_bmp |= 1 << rxq->queue_id;
+      log_err (dev, "queue %u DMA descriptor alloc failed (rv %d)",
+              rxq->queue_id, rv);
+      return rv;
+    }
+
+  log_debug (dev, "rx queue %u allocated (size %u)", rxq->queue_id, rxq->size);
+  return rv;
+}
+
+vnet_dev_rv_t
+ige_tx_queue_alloc (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
+{
+  vnet_dev_t *dev = txq->port->dev;
+  ige_device_t *id = vnet_dev_get_data (dev);
+  ige_txq_t *iq = vnet_dev_get_tx_queue_data (txq);
+  vnet_dev_rv_t rv = VNET_DEV_OK;
+
+  if (id->avail_txq_bmp == 0)
+    {
+      log_err (dev, "no free TX queues (requested size %u)", txq->size);
+      return VNET_DEV_ERR_RESOURCE_NOT_AVAILABLE;
+    }
+  txq->queue_id = get_lowest_set_bit_index (id->avail_txq_bmp);
+  id->avail_txq_bmp ^= 1 << txq->queue_id;
+  iq->buffer_indices = clib_mem_alloc_aligned (
+    txq->size * sizeof (iq->buffer_indices[0]), CLIB_CACHE_LINE_BYTES);
+
+  if (iq->buffer_indices == 0)
+    {
+      rv = VNET_DEV_ERR_RESOURCE_NOT_AVAILABLE;
+      goto done;
+    }
+
+  rv = vnet_dev_dma_mem_alloc (vm, dev, sizeof (ige_tx_desc_t) * txq->size, 0,
+                              (void **) &iq->descs);
+
+  if (rv != VNET_DEV_OK)
+    goto done;
+
+  rv = vnet_dev_dma_mem_alloc (vm, dev, CLIB_CACHE_LINE_BYTES,
+                              CLIB_CACHE_LINE_BYTES, (void **) &iq->wb);
+
+  if (rv != VNET_DEV_OK)
+    goto done;
+
+  log_debug (dev, "tx queue %u allocated (size %u)", txq->queue_id, txq->size);
+
+done:
+  if (rv != VNET_DEV_OK)
+    {
+      if (iq->wb)
+       vnet_dev_dma_mem_free (vm, dev, iq->wb);
+      if (iq->descs)
+       vnet_dev_dma_mem_free (vm, dev, iq->descs);
+      if (iq->buffer_indices)
+       clib_mem_free (iq->buffer_indices);
+
+      id->avail_txq_bmp |= 1 << txq->queue_id;
+      log_err (dev, "queue %u allocation failed (rv %d)", txq->queue_id, rv);
+    }
+
+  return rv;
+}
+
+void
+ige_rx_queue_free (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq)
+{
+  vnet_dev_t *dev = rxq->port->dev;
+  ige_device_t *id = vnet_dev_get_data (dev);
+  ige_rxq_t *iq = vnet_dev_get_rx_queue_data (rxq);
+
+  id->avail_rxq_bmp |= 1 << rxq->queue_id;
+  vnet_dev_dma_mem_free (vm, dev, iq->descs);
+  iq->descs = 0;
+
+  if (iq->buffer_indices)
+    {
+      clib_mem_free (iq->buffer_indices);
+      iq->buffer_indices = 0;
+    }
+}
+
+void
+ige_tx_queue_free (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
+{
+  vnet_dev_t *dev = txq->port->dev;
+  ige_device_t *id = vnet_dev_get_data (dev);
+  ige_txq_t *iq = vnet_dev_get_tx_queue_data (txq);
+
+  id->avail_txq_bmp |= 1 << txq->queue_id;
+
+  if (iq->descs)
+    vnet_dev_dma_mem_free (vm, dev, iq->descs);
+
+  if (iq->buffer_indices)
+    clib_mem_free (iq->buffer_indices);
+
+  if (iq->wb)
+    vnet_dev_dma_mem_free (vm, dev, iq->wb);
+}
diff --git a/src/plugins/dev_ige/reg.c b/src/plugins/dev_ige/reg.c
new file mode 100644 (file)
index 0000000..8f0f161
--- /dev/null
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2025 Damjan Marion
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/counters.h>
+#include <dev_ige/ige.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+
+VLIB_REGISTER_LOG_CLASS (ige_log, static) = {
+  .class_name = "ige",
+  .subclass_name = "reg",
+};
+
+vnet_dev_rv_t
+ige_reg_poll (vlib_main_t *vm, vnet_dev_t *dev, u32 reg, u32 mask, u32 match,
+             f64 initial_delay, f64 timeout)
+{
+  f64 t0 = vlib_time_now (vm);
+  u32 val;
+
+  for (f64 delay = initial_delay, total_time = delay; total_time < timeout;
+       delay *= 2, total_time += delay)
+    {
+      ige_reg_rd (dev, reg, &val);
+      if ((val & mask) == match)
+       {
+         log_debug (dev, "reg %05x (suspend %.6f)", reg,
+                    vlib_time_now (vm) - t0);
+         return 1;
+       }
+      vlib_process_suspend (vm, delay);
+    }
+  log_debug (dev, "reg %05x timeout", reg);
+  return 0;
+}
+
+void
+ige_reg_sw_fw_sync_release (vlib_main_t *vm, vnet_dev_t *dev)
+{
+  ige_reg_swsm_t swsm;
+  log_debug (dev, "");
+  ige_reg_rd (dev, IGE_REG_SWSM, &swsm.as_u32);
+  swsm.smbi = 0;
+  swsm.swesmbi = 0;
+  ige_reg_wr (dev, IGE_REG_SWSM, swsm.as_u32);
+}
+
+int
+ige_reg_sw_fw_sync_acquire (vlib_main_t *vm, vnet_dev_t *dev)
+{
+  ige_reg_swsm_t swsm;
+  int i, timeout = 10;
+
+  log_debug (dev, "");
+  for (i = 0; i < timeout * 2; i++)
+    {
+      ige_reg_rd (dev, IGE_REG_SWSM, &swsm.as_u32);
+      if (swsm.smbi == 0)
+       break;
+
+      if (i == timeout - 1)
+       {
+         log_debug (dev, "timeout, attempt to clear SWSM");
+         swsm.smbi = 0;
+         swsm.swesmbi = 0;
+         ige_reg_wr (dev, IGE_REG_SWSM, swsm.as_u32);
+       }
+      vlib_process_suspend (vm, 5e-5);
+    }
+
+  if (i == timeout * 2)
+    {
+      log_debug (dev, "timeout acquiring SWSM");
+      return 0;
+    }
+
+  for (i = 0; i < timeout; i++)
+    {
+      swsm.swesmbi = 1;
+      ige_reg_wr (dev, IGE_REG_SWSM, swsm.as_u32);
+      ige_reg_rd (dev, IGE_REG_SWSM, &swsm.as_u32);
+      if (swsm.swesmbi == 1)
+       break;
+      vlib_process_suspend (vm, 5e-5);
+    }
+
+  if (i == timeout)
+    {
+      swsm.smbi = 0;
+      swsm.swesmbi = 0;
+      ige_reg_wr (dev, IGE_REG_SWSM, swsm.as_u32);
+      log_debug (dev, "timeout acquiring SWSMBI");
+      return 0;
+    }
+
+  log_debug (dev, "acquired");
+  return 1;
+}
diff --git a/src/plugins/dev_ige/rx_node.c b/src/plugins/dev_ige/rx_node.c
new file mode 100644 (file)
index 0000000..6c8680b
--- /dev/null
@@ -0,0 +1,255 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2025 Damjan Marion
+ */
+
+#include "vppinfra/clib.h"
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/devices/devices.h>
+#include <dev_ige/ige.h>
+#include <vnet/ethernet/ethernet.h>
+
+static_always_inline void
+ige_rxq_refill (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq, int use_va_dma)
+{
+  ige_rxq_t *iq = vnet_dev_get_rx_queue_data (rxq);
+  u16 n, off, n_before_wrap, size, mask, n_refill, tail;
+  u8 buffer_pool_index = vnet_dev_get_rx_queue_buffer_pool_index (rxq);
+
+  tail = iq->tail;
+  size = rxq->size;
+
+  n_refill = iq->head + size - tail;
+
+  if (n_refill < 8)
+    return;
+
+  mask = size - 1;
+  off = tail & mask;
+  n_before_wrap = size - off;
+  n = clib_min (n_refill, n_before_wrap);
+
+  n = ige_rxq_refill_no_wrap (vm, iq->buffer_indices + off, iq->descs + off, n,
+                             buffer_pool_index, use_va_dma);
+  tail += n;
+
+  if (n == n_before_wrap)
+    tail += ige_rxq_refill_no_wrap (vm, iq->buffer_indices, iq->descs,
+                                   n_refill - n_before_wrap,
+                                   buffer_pool_index, use_va_dma);
+
+  if (iq->tail != tail)
+    {
+      __atomic_store_n (iq->reg_rdt, tail & mask, __ATOMIC_RELEASE);
+      iq->tail = tail;
+    }
+}
+
+static_always_inline u64
+ige_rx_deq_64_desc (vlib_main_t *vm, vlib_node_runtime_t *node,
+                   vnet_dev_rx_queue_t *rxq, vlib_buffer_template_t bt,
+                   u32 *to, u32 max_pkts, u32 *n_rx_bytes, u32 *n_trace)
+{
+  u16 mask = rxq->size - 1;
+  ige_rxq_t *iq = vnet_dev_get_rx_queue_data (rxq);
+  u16 head = iq->head;
+  u16 slot = head & mask;
+  ige_rx_desc_t dc[64], *descs = iq->descs, *d = descs + slot;
+  u32 bc[64], *buffer_indices = iq->buffer_indices,
+             *bi = buffer_indices + slot;
+  vlib_buffer_t *buffers[64];
+  u32 n_descs = 0, n_pkts = 0;
+
+  while (d->dd && n_descs < ARRAY_LEN (dc) && n_pkts < max_pkts)
+    {
+      dc[n_descs] = *d;
+      bc[n_descs] = *bi;
+      n_pkts += d->eop;
+      n_descs++;
+      slot = (slot + 1) & mask;
+      d = descs + slot;
+      bi = buffer_indices + slot;
+    }
+
+  if (n_pkts == 0)
+    return 0;
+
+  /* remove descriptors from incomplete packets */
+  while (dc[n_descs - 1].eop == 0)
+    n_descs--;
+
+  /* advance head */
+  iq->head += n_descs;
+
+  vlib_get_buffers (vm, bc, buffers, n_descs);
+
+  for (int i = 0; i < n_descs; i++)
+    {
+      u32 len = dc[i].pkt_len;
+      buffers[i]->template = bt;
+      buffers[i]->current_length = len;
+      *n_rx_bytes += len;
+    }
+
+  if (n_pkts < n_descs)
+    {
+      u32 hi = 0;    /* head index */
+      u32 tlnif = 0; /* total length not including first buffer */
+
+      for (int i = 0; i < n_descs; i++)
+       {
+         if (i > hi)
+           {
+             buffers[i - 1]->next_buffer = bc[i];
+             buffers[i - 1]->flags |= VLIB_BUFFER_NEXT_PRESENT;
+             tlnif += dc[i].pkt_len;
+           }
+         if (dc[i].eop)
+           {
+             to++[0] = bc[hi];
+             if (tlnif)
+               {
+                 buffers[hi]->total_length_not_including_first_buffer = tlnif;
+                 buffers[hi]->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
+                 tlnif = 0;
+               }
+             hi = i + 1;
+           }
+       }
+    }
+  else
+    vlib_buffer_copy_indices (to, bc, n_pkts);
+
+  if (PREDICT_FALSE (*n_trace))
+    for (u32 i = 0; i<n_descs && * n_trace> 0; i++)
+      {
+       vlib_buffer_t *b = buffers[i];
+       u32 next_index, hw_if_index;
+
+       if (b == 0)
+         continue;
+
+       next_index = vnet_dev_get_rx_queue_if_next_index (rxq);
+       hw_if_index = vnet_dev_get_rx_queue_if_hw_if_index (rxq);
+
+       if (PREDICT_TRUE (vlib_trace_buffer (vm, node, next_index, b, 0)))
+         {
+           ige_rx_trace_t *tr = vlib_add_trace (vm, node, b, sizeof (*tr));
+           tr->next_index = next_index;
+           tr->hw_if_index = hw_if_index;
+           tr->queue_id = rxq->queue_id;
+           tr->buffer_index = bc[i];
+           tr->desc = dc[i];
+           (*n_trace)--;
+         }
+      }
+
+  return n_pkts;
+}
+
+static_always_inline u32
+ige_rx_one_queue (vlib_main_t *vm, vlib_node_runtime_t *node,
+                 vnet_dev_rx_queue_t *rxq)
+{
+  ige_rxq_t *iq = vnet_dev_get_rx_queue_data (rxq);
+  u16 next_index = vnet_dev_get_rx_queue_if_next_index (rxq);
+  vlib_buffer_template_t bt;
+  vnet_main_t *vnm;
+  u32 n_trace, sw_if_index, n_rx = 0, n_rx_bytes = 0, *to_next, n;
+  uword n_left_to_next;
+  u16 mask = rxq->size - 1;
+  u16 slot = iq->head & mask;
+  ige_rx_desc_t *d = iq->descs + slot;
+
+  if (d->dd == 0)
+    return 0;
+
+  while (d->eop == 0)
+    {
+      slot = (slot + 1) & mask;
+      d = iq->descs + slot;
+      if (d->dd == 0)
+       return 0;
+    }
+
+  bt = vnet_dev_get_rx_queue_if_buffer_template (rxq);
+
+  vlib_get_new_next_frame (vm, node, next_index, to_next, n_left_to_next);
+  n_trace = vlib_get_trace_count (vm, node);
+
+  while (n_left_to_next >= 64)
+    {
+      n = ige_rx_deq_64_desc (vm, node, rxq, bt, to_next, 64, &n_rx_bytes,
+                             &n_trace);
+
+      n_rx += n;
+
+      to_next += n;
+      n_left_to_next -= n;
+      if (n < (64 - 3))
+       goto rxq_empty;
+    }
+
+  if (n_left_to_next > 0)
+    {
+      n = ige_rx_deq_64_desc (vm, node, rxq, bt, to_next, n_left_to_next,
+                             &n_rx_bytes, &n_trace);
+
+      n_rx += n;
+
+      to_next += n;
+      n_left_to_next -= n;
+    }
+
+rxq_empty:
+
+  vlib_set_trace_count (vm, node, n_trace);
+  sw_if_index = vnet_dev_get_rx_queue_if_sw_if_index (rxq);
+
+  if (PREDICT_TRUE (next_index == VNET_DEV_ETH_RX_PORT_NEXT_ETH_INPUT))
+    {
+      vlib_next_frame_t *nf;
+      vlib_frame_t *f;
+      ethernet_input_frame_t *ef;
+      nf = vlib_node_runtime_get_next_frame (vm, node, next_index);
+      f = vlib_get_frame (vm, nf->frame);
+      f->flags = ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX;
+
+      ef = vlib_frame_scalar_args (f);
+      ef->sw_if_index = sw_if_index;
+      ef->hw_if_index = vnet_dev_get_rx_queue_if_hw_if_index (rxq);
+
+      // if ((or_qw1 & mask_ipe.as_u64) == 0) f->flags |=
+      // ETH_INPUT_FRAME_F_IP4_CKSUM_OK;
+      vlib_frame_no_append (f);
+    }
+
+  vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+
+  vnm = vnet_get_main ();
+  vlib_increment_combined_counter (
+    vnm->interface_main.combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
+    vm->thread_index, sw_if_index, n_rx, n_rx_bytes);
+
+  return n_rx;
+}
+
+VNET_DEV_NODE_FN (ige_rx_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+  uint32_t rv = 0;
+  foreach_vnet_dev_rx_queue_runtime (rxq, node)
+    {
+      vnet_dev_t *dev = rxq->port->dev;
+
+      rv += ige_rx_one_queue (vm, node, rxq);
+
+      /* refill RX queue */
+      if (dev->va_dma)
+       ige_rxq_refill (vm, rxq, /*use_va_dma */ 1);
+      else
+       ige_rxq_refill (vm, rxq, /*use_va_dma */ 0);
+    }
+
+  return rv;
+}
diff --git a/src/plugins/dev_ige/tx_node.c b/src/plugins/dev_ige/tx_node.c
new file mode 100644 (file)
index 0000000..e85250d
--- /dev/null
@@ -0,0 +1,201 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2025 Damjan Marion
+ */
+
+#include <vlib/vlib.h>
+#include <vppinfra/ring.h>
+#include <vppinfra/vector/ip_csum.h>
+
+#include <vnet/dev/dev.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/udp/udp_packet.h>
+#include <vnet/tcp/tcp_packet.h>
+
+#include <dev_ige/ige.h>
+
+static_always_inline void
+ige_enq_txd (vlib_main_t *vm, vlib_node_runtime_t *n, vnet_dev_tx_queue_t *txq,
+            vlib_buffer_t *b, u32 bi, int first, int last,
+            ige_tx_desc_t *descs, u32 *buffer_indices, u16 *tail, u16 mask,
+            int use_va, int trace)
+{
+  u32 len = b->current_length;
+  u32 slot = *tail & mask;
+  ige_tx_desc_t d = {
+    .eop = last ? 1 : 0,
+    .rs = last ? 1 : 0,
+    .ifcs = 1,
+    .dtyp = 0b0011,
+    .dtalen = len,
+  };
+  d.addr = use_va ? vlib_buffer_get_current_va (b) :
+                   vlib_buffer_get_current_pa (vm, b);
+  if (first)
+    d.paylen = last ? len : len + b->total_length_not_including_first_buffer;
+
+  if (trace && b->flags & VLIB_BUFFER_IS_TRACED)
+    {
+      ige_tx_trace_t *t = vlib_add_trace (vm, n, b, sizeof (*t));
+      t->desc = d;
+      t->hw_if_index = vnet_dev_get_tx_queue_if_hw_if_index (txq);
+      t->queue_id = txq->queue_id;
+      t->buffer_index = bi;
+    }
+
+  descs[slot] = d;
+  buffer_indices[slot] = bi;
+  (*tail)++;
+}
+
+static_always_inline void
+ige_txq_complete (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
+{
+  ige_txq_t *itq = vnet_dev_get_tx_queue_data (txq);
+
+  u16 head = itq->head;
+  u16 tail = itq->tail;
+  u16 n_free;
+
+  if (head == tail)
+    return;
+
+  u32 new_head = __atomic_load_n (itq->wb, __ATOMIC_ACQUIRE);
+  u16 mask = txq->size - 1;
+  n_free = (new_head - head) & mask;
+  n_free &= 0xfff0;
+
+  if (!n_free)
+    return;
+
+  vlib_buffer_free_from_ring_no_next (vm, itq->buffer_indices, head & mask,
+                                     txq->size, n_free);
+
+  itq->head = head + n_free;
+}
+
+static_always_inline u32
+ige_txq_enq (vlib_main_t *vm, vlib_node_runtime_t *node,
+            vnet_dev_tx_queue_t *txq, u32 *from, u32 max_pkts, int va, int tr)
+{
+  ige_txq_t *const itq = vnet_dev_get_tx_queue_data (txq);
+  ige_tx_desc_t *const d = itq->descs;
+  u32 *const bi = itq->buffer_indices;
+  const u16 size = txq->size;
+  const u16 mask = size - 1;
+  u16 n_pkts = 0;
+  u32 drop_too_long[VLIB_FRAME_SIZE], n_drop_too_long = 0;
+
+  ige_txq_complete (vm, txq);
+
+  u16 head = itq->head;
+  u16 tail = itq->tail;
+  const u32 max_tail = head + size;
+
+  while (n_pkts < max_pkts && tail < max_tail)
+    {
+      u32 hbi = from[n_pkts];
+      vlib_buffer_t *b = vlib_get_buffer (vm, hbi);
+      u32 i;
+
+      if (b->flags & VLIB_BUFFER_NEXT_PRESENT)
+       {
+         u32 tbi[4] = {
+           [0] = b->next_buffer,
+         };
+         vlib_buffer_t *tb[4] = {
+           [0] = vlib_get_buffer (vm, b->next_buffer),
+         };
+         u32 n = 1;
+
+         while (tb[n - 1]->flags & VLIB_BUFFER_NEXT_PRESENT)
+           {
+             if (n >= ARRAY_LEN (tbi))
+               {
+                 drop_too_long[n_drop_too_long++] = hbi;
+                 goto next;
+               }
+
+             tbi[n] = tb[n - 1]->next_buffer;
+             tb[n] = vlib_get_buffer (vm, tbi[n]);
+             n++;
+           }
+
+         if (tail + n + 1 > max_tail)
+           break;
+
+         ige_enq_txd (vm, node, txq, b, hbi, 1, 0, d, bi, &tail, mask, va,
+                      tr);
+         for (i = 0; i + 1 < n; i++)
+           ige_enq_txd (vm, node, txq, tb[i], tbi[i], 0, 0, d, bi, &tail,
+                        mask, va, tr);
+         ige_enq_txd (vm, node, txq, tb[i], tbi[i], 0, 1, d, bi, &tail, mask,
+                      va, tr);
+       }
+      else
+       ige_enq_txd (vm, node, txq, b, hbi, 1, 1, d, bi, &tail, mask, va, tr);
+
+    next:
+      n_pkts++;
+    }
+
+  if (n_drop_too_long)
+    {
+      vlib_error_count (vm, node->node_index,
+                       IGE_TX_NODE_CTR_BUFFER_CHAIN_TOO_LONG,
+                       n_drop_too_long);
+      vlib_buffer_free (vm, drop_too_long, n_drop_too_long);
+    }
+
+  if (itq->tail != tail)
+    {
+      __atomic_store_n (itq->reg_tdt, tail & mask, __ATOMIC_RELEASE);
+      itq->tail = tail;
+    }
+
+  return n_pkts;
+}
+
+VNET_DEV_NODE_FN (ige_tx_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+  vnet_dev_tx_node_runtime_t *rt = vnet_dev_get_tx_node_runtime (node);
+  vnet_dev_tx_queue_t *txq = rt->tx_queue;
+  vnet_dev_t *dev = txq->port->dev;
+  u32 *from = vlib_frame_vector_args (frame);
+  u16 n, n_left;
+  int n_reties = 2;
+
+  n_left = frame->n_vectors;
+
+  vnet_dev_tx_queue_lock_if_needed (txq);
+
+  while (n_reties--)
+    {
+      if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE))
+       n = ige_txq_enq (vm, node, txq, from, n_left, dev->va_dma != 0, 1);
+      else if (dev->va_dma)
+       n = ige_txq_enq (vm, node, txq, from, n_left, 1, 0);
+      else
+       n = ige_txq_enq (vm, node, txq, from, n_left, 0, 0);
+
+      from += n;
+      n_left -= n;
+
+      if (n == 0 || n == n_left)
+       break;
+    }
+
+  if (n_left)
+    {
+      fformat (stderr, "no_free_slots %u\n", n_left);
+      vlib_buffer_free (vm, from, n_left);
+      vlib_error_count (vm, node->node_index, IGE_TX_NODE_CTR_NO_FREE_SLOTS,
+                       n_left);
+    }
+
+  vnet_dev_tx_queue_unlock_if_needed (txq);
+
+  return frame->n_vectors - n_left;
+}