From: Damjan Marion Date: Thu, 2 Oct 2025 12:01:39 +0000 (+0200) Subject: ige: native driver for Intel Gigabit Adapters (i211, i225, i226) X-Git-Url: https://gerrit.fd.io/r/gitweb?a=commitdiff_plain;h=refs%2Fchanges%2F81%2F41081%2F16;p=vpp.git ige: native driver for Intel Gigabit Adapters (i211, i225, i226) Type: feature Change-Id: I79bd1111fdfc777843de917ed061c8e818e20d2e Signed-off-by: Damjan Marion --- diff --git a/MAINTAINERS b/MAINTAINERS index b00762ba2b6..e4c766a6d28 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -414,6 +414,11 @@ I: iavf M: Damjan Marion F: src/plugins/dev_iavf/ +Plugin - IGE Device driver +I: ige +M: Damjan Marion +F: src/plugins/dev_ige/ + Plugin - Amazon Elastic Network Adapter (ENA) device driver I: ena M: Damjan Marion diff --git a/docs/spelling_wordlist.txt b/docs/spelling_wordlist.txt index cd9d75089cf..4e22435bafa 100644 --- a/docs/spelling_wordlist.txt +++ b/docs/spelling_wordlist.txt @@ -251,6 +251,7 @@ det dev devbind dev_iavf +dev_ige dev_octeon df dhcp @@ -486,6 +487,7 @@ ietf iface ifndef igb +ige igmp ikev Ikev diff --git a/src/plugins/dev_ige/CMakeLists.txt b/src/plugins/dev_ige/CMakeLists.txt new file mode 100644 index 00000000000..fc9d033b8f6 --- /dev/null +++ b/src/plugins/dev_ige/CMakeLists.txt @@ -0,0 +1,20 @@ +# SPDX-License-Identifier: Apache-2.0 +# Copyright(c) 2024 Cisco Systems, Inc. + +add_vpp_plugin(dev_ige + SOURCES + counters.c + ige.c + format.c + phy.c + port.c + queue.c + reg.c + rx_node.c + tx_node.c + + MULTIARCH_SOURCES + rx_node.c + tx_node.c +) + diff --git a/src/plugins/dev_ige/counters.c b/src/plugins/dev_ige/counters.c new file mode 100644 index 00000000000..5510b381b0c --- /dev/null +++ b/src/plugins/dev_ige/counters.c @@ -0,0 +1,112 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2025 Damjan Marion + */ + +#include +#include +#include +#include +#include +#include +#include + +#define _(hi, lo) ((u64) hi << 32 | lo) +static vnet_dev_counter_t ige_port_counters[] = { + VNET_DEV_CTR_RX_BYTES (_ (0x40c4, 0x40c0)), + VNET_DEV_CTR_TX_BYTES (_ (0x40cc, 0x40c8)), + VNET_DEV_CTR_RX_PACKETS (0x40d0), + VNET_DEV_CTR_TX_PACKETS (0x40d4), + VNET_DEV_CTR_VENDOR (_ (0x408c, 0x4088), RX, BYTES, "good"), + VNET_DEV_CTR_VENDOR (_ (0x4094, 0x4090), TX, BYTES, "good"), + VNET_DEV_CTR_VENDOR (_ (0x412c, 0x4128), RX, BYTES, "host good"), + VNET_DEV_CTR_VENDOR (_ (0x4134, 0x4130), TX, BYTES, "host good"), + VNET_DEV_CTR_VENDOR (0x4104, RX, PACKETS, "host"), + VNET_DEV_CTR_VENDOR (0x4000, RX, PACKETS, "CRC error"), + VNET_DEV_CTR_VENDOR (0x4010, RX, PACKETS, "missed"), + VNET_DEV_CTR_VENDOR (0x405c, RX, PACKETS, "64 bytes"), + VNET_DEV_CTR_VENDOR (0x4060, RX, PACKETS, "65-127 byte"), + VNET_DEV_CTR_VENDOR (0x4064, RX, PACKETS, "128-255 byte"), + VNET_DEV_CTR_VENDOR (0x4068, RX, PACKETS, "256-511 byte"), + VNET_DEV_CTR_VENDOR (0x406c, RX, PACKETS, "512-1023 byte"), + VNET_DEV_CTR_VENDOR (0x4070, RX, PACKETS, ">=1024 byte"), + VNET_DEV_CTR_VENDOR (0x4074, RX, PACKETS, "good"), + VNET_DEV_CTR_VENDOR (0x4078, RX, PACKETS, "broadcast"), + VNET_DEV_CTR_VENDOR (0x407c, RX, PACKETS, "multicast"), + VNET_DEV_CTR_VENDOR (0x40d8, TX, PACKETS, "64 bytes"), + VNET_DEV_CTR_VENDOR (0x40dc, TX, PACKETS, "65-127 byte"), + VNET_DEV_CTR_VENDOR (0x40e0, TX, PACKETS, "128-255 byte"), + VNET_DEV_CTR_VENDOR (0x40e4, TX, PACKETS, "256-511 byte"), + VNET_DEV_CTR_VENDOR (0x40e8, TX, PACKETS, "512-1023 byte"), + VNET_DEV_CTR_VENDOR (0x40ec, TX, PACKETS, ">=1024 byte"), + VNET_DEV_CTR_VENDOR (0x40f0, TX, PACKETS, "multicast"), + VNET_DEV_CTR_VENDOR (0x40f4, TX, PACKETS, "broadcast"), + VNET_DEV_CTR_VENDOR (0x4108, NA, NA, "debug counter 1"), + VNET_DEV_CTR_VENDOR (0x410c, NA, NA, "debug counter 2"), + VNET_DEV_CTR_VENDOR (0x4110, NA, NA, "debug counter 3"), + VNET_DEV_CTR_VENDOR (0x411c, NA, NA, "debug counter 4"), +}; + +vnet_dev_counter_t ige_rxq_counters[] = { + VNET_DEV_CTR_RX_PACKETS (_ (0x100, 0x10010)), + VNET_DEV_CTR_RX_BYTES (_ (0x100, 0x10018)), + VNET_DEV_CTR_RX_DROPS (_ (0x40, 0xc030)), + VNET_DEV_CTR_VENDOR (_ (0x100, 0x10038), RX, PACKETS, "multicast"), +}; + +vnet_dev_counter_t ige_txq_counters[] = { + VNET_DEV_CTR_TX_PACKETS (_ (0x100, 0x10014)), + VNET_DEV_CTR_TX_BYTES (_ (0x100, 0x10034)), + VNET_DEV_CTR_TX_DROPS (_ (0x40, 0xe030)), +}; +#undef _ + +vnet_dev_rv_t +ige_port_counters_init (vlib_main_t *vm, vnet_dev_port_t *port) +{ + vnet_dev_port_add_counters (vm, port, ige_port_counters, + ARRAY_LEN (ige_port_counters)); + foreach_vnet_dev_port_rx_queue (rxq, port) + vnet_dev_rx_queue_add_counters (vm, rxq, ige_rxq_counters, + ARRAY_LEN (ige_rxq_counters)); + foreach_vnet_dev_port_tx_queue (txq, port) + vnet_dev_tx_queue_add_counters (vm, txq, ige_txq_counters, + ARRAY_LEN (ige_txq_counters)); + return 0; +} + +void +ige_port_counter_poll (vlib_main_t *vm, vnet_dev_port_t *port) +{ + vnet_dev_t *dev = port->dev; + u32 val; + + foreach_vnet_dev_counter (c, port->counter_main) + { + u64 reg = c->user_data; + u32 hi = 0, lo; + ige_reg_rd (dev, (u32) reg, &lo); + reg >>= 32; + if (reg) + ige_reg_rd (dev, (u32) reg, &hi); + + vnet_dev_counter_value_add (vm, c, (u64) hi << 32 | lo); + } + + foreach_vnet_dev_port_rx_queue (rxq, port) + if (rxq->started) + foreach_vnet_dev_counter (c, rxq->counter_main) + { + u32 reg = (u32) c->user_data + (c->user_data >> 32) * rxq->queue_id; + ige_reg_rd (dev, reg, &val); + vnet_dev_counter_value_update (vm, c, val); + } + + foreach_vnet_dev_port_tx_queue (txq, port) + if (txq->started) + foreach_vnet_dev_counter (c, txq->counter_main) + { + u32 reg = (u32) c->user_data + (c->user_data >> 32) * txq->queue_id; + ige_reg_rd (dev, reg, &val); + vnet_dev_counter_value_update (vm, c, val); + } +} diff --git a/src/plugins/dev_ige/format.c b/src/plugins/dev_ige/format.c new file mode 100644 index 00000000000..07913cc51db --- /dev/null +++ b/src/plugins/dev_ige/format.c @@ -0,0 +1,259 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2025 Damjan Marion + */ + +#include +#include +#include +#include +#include + +static u8 * +_format_ige_reg (u8 *s, u32 offset, u32 val, int no_zero, u32 mask) +{ + u32 indent = format_get_indent (s); + u32 rv = 0, f, v; + u8 *s2 = 0; + int line = 0; + +#define _(o, rn, m) \ + if (offset == o) \ + { \ + if (line++) \ + s = format (s, "\n%U", format_white_space, indent); \ + vec_reset_length (s2); \ + s2 = format (s2, "[0x%05x] %s:", o, #rn); \ + rv = val; \ + s = format (s, "%-32v = 0x%08x", s2, rv); \ + f = 0; \ + m \ + } + +#define __(l, fn) \ + v = (rv >> f) & pow2_mask (l); \ + if ((pow2_mask (l) << f) & mask) \ + if (v || (!no_zero && #fn[0] != '_')) \ + { \ + vec_reset_length (s2); \ + s = format (s, "\n%U", format_white_space, indent + 2); \ + s2 = format (s2, "[%2u:%2u] %s", f + l - 1, f, #fn); \ + s = format (s, "%-30v = ", s2); \ + if (l < 3) \ + s = format (s, "%u", v); \ + else if (l <= 8) \ + s = format (s, "0x%02x (%u)", v, v); \ + else if (l <= 16) \ + s = format (s, "0x%04x", v); \ + else \ + s = format (s, "0x%08x", v); \ + } \ + f += l; + + foreach_ige_reg; +#undef _ + + vec_free (s2); + + return s; +} + +u8 * +format_ige_reg_read (u8 *s, va_list *args) +{ + u32 offset = va_arg (*args, u32); + u32 val = va_arg (*args, u32); + return _format_ige_reg (s, offset, val, 0, 0xffffffff); +} + +u8 * +format_ige_reg_write (u8 *s, va_list *args) +{ + u32 offset = va_arg (*args, u32); + u32 val = va_arg (*args, u32); + return _format_ige_reg (s, offset, val, 1, 0xffffffff); +} + +u8 * +format_ige_reg_diff (u8 *s, va_list *args) +{ + u32 offset = va_arg (*args, u32); + u32 old = va_arg (*args, u32); + u32 new = va_arg (*args, u32); + return _format_ige_reg (s, offset, new, 0, old ^ new); +} + +static u8 * +format_ige_rss_type (u8 *s, va_list *args) +{ + static const char *rss_type_names[] = { + [0x0] = "none", + [0x1] = "HASH_TCP_IPV4", + [0x2] = "HASH_IPV4", + [0x3] = "HASH_TCP_IPV6", + [0x4] = "HASH_IPV6_EX", + [0x5] = "HASH_IPV6", + [0x6] = "HASH_TCP_IPV6_EX", + [0x7] = "HASH_UDP_IPV4", + [0x8] = "HASH_UDP_IPV6", + [0x9] = "HASH_UDP_IPV6_EX", + }; + + u32 rss_type = va_arg (*args, u32); + + if (rss_type < ARRAY_LEN (rss_type_names) && rss_type_names[rss_type]) + return format (s, "%s", rss_type_names[rss_type]); + + return format (s, "0x%x", rss_type); +} + +u8 * +format_ige_port_status (u8 *s, va_list *args) +{ + vnet_dev_format_args_t __clib_unused *a = + va_arg (*args, vnet_dev_format_args_t *); + vnet_dev_port_t *port = va_arg (*args, vnet_dev_port_t *); + ige_port_t *ip = vnet_dev_get_port_data (port); + ige_device_t *id = vnet_dev_get_data (port->dev); + u32 speed = 0; + if (id->config.supports_2_5g && ip->last_status.speed_2p5) + speed = 2500; + else if (ip->last_status.speed < 3) + speed = (u32[]){ 10, 100, 1000 }[ip->last_status.speed]; + + if (ip->last_status.link_up) + s = format (s, "Link up, speed %u Mbps, duplex %s", speed, + ip->last_status.full_duplex ? "full" : "half"); + else + s = format (s, "Link down"); + return s; +} + +u8 * +format_ige_rx_desc (u8 *s, va_list *args) +{ + const ige_rx_desc_t *d = va_arg (*args, const ige_rx_desc_t *); + u32 indent = format_get_indent (s) + 2; + u32 hdr_len = (d->hdr_len_hi << 10) | d->hdr_len_lo; + +#define _(b) ((b) ? '+' : '-') + + s = format ( + s, "pkt_len %u vlan 0x%u hdr_len %u sph%c rss_type %U rss_hash 0x%08x", + d->pkt_len, d->vlan_tag, hdr_len, _ (d->sph), format_ige_rss_type, + d->rss_type, d->rss_hash); + s = format (s, + "\n%Upacket_type: ip4%c ip4e%c ip6%c ip6e%c tcp%c udp%c sctp%c " + "nfs%c etqf %u l2pkt%c vpkt%c", + format_white_space, indent, _ (d->ipv4), _ (d->ipv4e), + _ (d->ipv6), _ (d->ipv6e), _ (d->tcp), _ (d->udp), _ (d->sctp), + _ (d->nfs), d->etqf, _ (d->l2pkt), _ (d->vpkt)); + + s = format (s, "\n%Uext_status: dd%c eop%c", format_white_space, indent, + _ (d->dd), _ (d->eop)); + + if (d->eop) + { + s = format (s, " vp%c udpcs%c l4i%c ipcs%c pif%c", _ (d->vp), + _ (d->udpcs), _ (d->l4i), _ (d->ipcs), _ (d->pif)); + s = format (s, + " vext%c udpv%c llint%c strip_crc%c smd_type %u tsip%c mc%c", + _ (d->vext), _ (d->udpv), _ (d->llint), _ (d->strip_crc), + (u32) d->smd_type, _ (d->tsip), _ (d->mc)); + } + + s = format (s, "\n%Uext_error: l4e%c ipe%c rxe%c", format_white_space, + indent, _ (d->l4e), _ (d->ipe), _ (d->rxe)); + if (d->sph) + s = format (s, " hbo%c", _ (d->hbo)); + +#undef _ + + return s; +} + +u8 * +format_ige_rx_trace (u8 *s, va_list *args) +{ + vlib_main_t *vm = va_arg (*args, vlib_main_t *); + vlib_node_t *node = va_arg (*args, vlib_node_t *); + ige_rx_trace_t *t = va_arg (*args, ige_rx_trace_t *); + vnet_main_t *vnm = vnet_get_main (); + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, t->hw_if_index); + u32 indent = format_get_indent (s); + + s = format (s, "ige: %v (%u) qid %u next-node %U buffer %u", hi->name, + t->hw_if_index, t->queue_id, format_vlib_next_node_name, vm, + node->index, t->next_index, t->buffer_index); + + s = format (s, "\n%Udesc: %U", format_white_space, indent + 2, + format_ige_rx_desc, &t->desc); + + return s; +} + +u8 * +format_ige_tx_desc (u8 *s, va_list *args) +{ + const ige_tx_desc_t *d = va_arg (*args, const ige_tx_desc_t *); + u32 indent = format_get_indent (s) + 2; + +#define _(b) ((b) ? '+' : '-') + + s = format ( + s, + "addr 0x%016llx dtalen %u paylen %u dtyp 0x%x ptp1 %u ptp2 %u popts 0x%x", + d->addr, d->dtalen, d->paylen, d->dtyp, d->ptp1, d->ptp2, d->popts); + + s = format (s, "\n%Uflags: eop%c ifcs%c rs%c dext%c vle%c tse%c idx%c", + format_white_space, indent, _ (d->eop), _ (d->ifcs), _ (d->rs), + _ (d->dext), _ (d->vle), _ (d->tse), _ (d->idx)); + + s = format (s, "\n%Ustatus: dd%c ts_stat%c sta 0x%x", format_white_space, + indent, _ (d->dd), _ (d->ts_stat), d->sta); + +#undef _ + + return s; +} + +u8 * +format_ige_tx_trace (u8 *s, va_list *args) +{ + vlib_main_t __clib_unused *vm = va_arg (*args, vlib_main_t *); + vlib_node_t __clib_unused *node = va_arg (*args, vlib_node_t *); + ige_tx_trace_t *t = va_arg (*args, ige_tx_trace_t *); + vnet_main_t *vnm = vnet_get_main (); + vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, t->hw_if_index); + u32 indent = format_get_indent (s); + + s = format (s, "ige-tx: %v (%u) qid %u buffer %u", hi->name, t->hw_if_index, + t->queue_id, t->buffer_index); + + s = format (s, "\n%Udesc: %U", format_white_space, indent + 2, + format_ige_tx_desc, &t->desc); + + return s; +} +u8 * +format_ige_receive_addr_table (u8 *s, va_list *args) +{ + vnet_dev_t *dev = va_arg (*args, vnet_dev_t *); + u32 indent = format_get_indent (s); + + for (int i = 0; i < 16; i++) + { + ige_receive_addr_t ra; + ige_reg_rd (dev, IGE_REG_RAH (i), &ra.rah); + ige_reg_rd (dev, IGE_REG_RAL (i), &ra.ral); + if (ra.av) + { + if (i) + s = format (s, "\n%U", format_white_space, indent); + s = format (s, "[%u] %U asel %u qsel %u qsel_enable %u av %u", i, + format_ethernet_address, ra.hw_addr, ra.asel, ra.qsel, + ra.qsel_enable, ra.av); + } + } + + return s; +} diff --git a/src/plugins/dev_ige/ige.c b/src/plugins/dev_ige/ige.c new file mode 100644 index 00000000000..af9cb35e8a2 --- /dev/null +++ b/src/plugins/dev_ige/ige.c @@ -0,0 +1,230 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2025 Damjan Marion + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +VLIB_REGISTER_LOG_CLASS (ige_log, static) = { + .class_name = "ige", + .subclass_name = "init", +}; + +#define _(f, n, s, d) \ + { .name = #n, .desc = d, .severity = VL_COUNTER_SEVERITY_##s }, + +vlib_error_desc_t ige_tx_node_counters[] = { foreach_ige_tx_node_counter }; +#undef _ + +vnet_dev_node_t ige_rx_node = { + .format_trace = format_ige_rx_trace, +}; + +vnet_dev_node_t ige_tx_node = { + .error_counters = ige_tx_node_counters, + .n_error_counters = ARRAY_LEN (ige_tx_node_counters), + .format_trace = format_ige_tx_trace, +}; + +static ige_dev_config_t config_by_type[] = { + [IGE_DEV_TYPE_I211] = { .phy_type = IGE_PHY_TYPE_I210_INTERNAL }, + [IGE_DEV_TYPE_I225] = { .phy_type = IGE_PHY_TYPE_GPY211, + .supports_2_5g = 1 }, + [IGE_DEV_TYPE_I226] = { .phy_type = IGE_PHY_TYPE_GPY211, + .supports_2_5g = 1 }, +}; + +static struct +{ + u16 device_id; + ige_dev_type_t type; + char *description; +} ige_dev_types[] = { + +#define _(id, t, desc) \ + { \ + .device_id = (id), .type = IGE_DEV_TYPE_##t, .description = (desc) \ + } + + _ (0x1539, I211, "Intel(R) Ethernet Controller I211"), + _ (0x15f2, I225, "Intel(R) Ethernet Controller I225-LM"), + _ (0x15f3, I225, "Intel(R) Ethernet Controller I225-V"), + _ (0x0d9f, I225, "Intel(R) Ethernet Controller I225-IT"), + _ (0x125b, I226, "Intel(R) Ethernet Controller I226-LM"), + _ (0x125c, I226, "Intel(R) Ethernet Controller I226-V"), + _ (0x125d, I226, "Intel(R) Ethernet Controller I226-IT"), +#undef _ +}; + +static u8 * +ige_probe (vlib_main_t *vm, vnet_dev_bus_index_t bus_index, void *dev_info) +{ + vnet_dev_bus_pci_device_info_t *di = dev_info; + + if (di->vendor_id != 0x8086) + return 0; + + FOREACH_ARRAY_ELT (dt, ige_dev_types) + { + if (dt->device_id == di->device_id) + return format (0, "%s", dt->description); + } + + return 0; +} + +static vnet_dev_rv_t +ige_init (vlib_main_t *vm, vnet_dev_t *dev) +{ + ige_device_t *id = vnet_dev_get_data (dev); + vlib_pci_config_hdr_t pci_hdr; + vnet_dev_rv_t rv; + u32 match, mask, tmp; + + rv = vnet_dev_pci_read_config_header (vm, dev, &pci_hdr); + if (rv != VNET_DEV_OK) + return rv; + + if (pci_hdr.vendor_id != 0x8086) + return VNET_DEV_ERR_UNSUPPORTED_DEVICE; + + rv = VNET_DEV_ERR_UNSUPPORTED_DEVICE; + + FOREACH_ARRAY_ELT (dt, ige_dev_types) + if (dt->device_id == pci_hdr.device_id) + { + id->config = config_by_type[dt->type]; + rv = VNET_DEV_OK; + break; + } + + if (rv != VNET_DEV_OK) + return rv; + + /* map BAR0 */ + if (id->bar0 == 0) + { + rv = vnet_dev_pci_map_region (vm, dev, 0, &id->bar0); + if (rv != VNET_DEV_OK) + return rv; + } + + /* disable interrupts */ + ige_reg_wr (dev, IGE_REG_IMC, 0xffffffff); + ige_reg_rd (dev, IGE_REG_ICR, &tmp); + + rv = vnet_dev_pci_function_level_reset (vm, dev); + if (rv != VNET_DEV_OK) + return rv; + + rv = vnet_dev_pci_bus_master_enable (vm, dev); + if (rv != VNET_DEV_OK) + return rv; + + mask = (ige_reg_status_t){ .rst_done = 1 }.as_u32; + match = mask; + + if (ige_reg_poll (vm, dev, IGE_REG_STATUS, mask, match, 1e-5, 1e-1) == 0) + { + log_err (dev, "reset timeout"); + return VNET_DEV_ERR_TIMEOUT; + } + + /* disable interrupts again */ + ige_reg_wr (dev, IGE_REG_IMC, 0xffffffff); + ige_reg_rd (dev, IGE_REG_ICR, &tmp); + + /* notify ME that driver is loaded */ + ige_reg_ctrl_ext_t ctrl_ext; + ige_reg_rd (dev, IGE_REG_CTRL_EXT, &ctrl_ext.as_u32); + ctrl_ext.driver_loaded = 1; + ige_reg_wr (dev, IGE_REG_CTRL_EXT, ctrl_ext.as_u32); + + rv = ige_phy_init (vm, dev); + + if (rv != VNET_DEV_OK) + { + log_err (dev, "failed to read PHY ID"); + return rv; + } + + vnet_dev_port_add_args_t port = { + .port = { + .attr = { + .type = VNET_DEV_PORT_TYPE_ETHERNET, + .max_rx_queues = 4, + .max_tx_queues = 4, + .max_supported_rx_frame_size = 9728, + }, + .ops = { + .init = ige_port_init, + .start = ige_port_start, + .stop = ige_port_stop, + .format_status = format_ige_port_status, + .config_change = ige_port_cfg_change, + .config_change_validate = ige_port_cfg_change_validate, + }, + .data_size = sizeof (ige_port_t), + }, + .rx_node = &ige_rx_node, + .tx_node = &ige_tx_node, + .rx_queue = { + .config = { + .data_size = sizeof (ige_rxq_t), + .default_size = 512, + .size_is_power_of_two = 1, + .min_size = 512, + .max_size = 32768, + }, + .ops = { + .alloc = ige_rx_queue_alloc, + .free = ige_rx_queue_free, + }, + }, + .tx_queue = { + .config = { + .data_size = sizeof (ige_txq_t), + .default_size = 512, + .size_is_power_of_two = 1, + .min_size = 512, + .max_size = 32768, + }, + .ops = { + .alloc = ige_tx_queue_alloc, + .free = ige_tx_queue_free, + }, + }, + }; + + ige_reg_rd (dev, IGE_REG_RAL0, &tmp); + clib_memcpy (&port.port.attr.hw_addr.eth_mac[0], &tmp, 4); + ige_reg_rd (dev, IGE_REG_RAH0, &tmp); + clib_memcpy (&port.port.attr.hw_addr.eth_mac[4], &tmp, 2); + log_info (dev, "MAC address is %U", format_ethernet_address, + port.port.attr.hw_addr.eth_mac); + + id->avail_rxq_bmp = pow2_mask (4); + id->avail_txq_bmp = pow2_mask (4); + return vnet_dev_port_add (vm, dev, 0, &port); +} + +VNET_DEV_REGISTER_DRIVER (ige) = { + .name = "ige", + .bus = "pci", + .device_data_sz = sizeof (ige_device_t), + .ops = { + .init = ige_init, + .probe = ige_probe, + }, +}; + +VLIB_PLUGIN_REGISTER () = { + .version = VPP_BUILD_VER, + .description = "dev_ige", +}; diff --git a/src/plugins/dev_ige/ige.h b/src/plugins/dev_ige/ige.h new file mode 100644 index 00000000000..444fbbca6ad --- /dev/null +++ b/src/plugins/dev_ige/ige.h @@ -0,0 +1,301 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2025 Damjan Marion + */ + +#pragma once + +#include +#include +#include +#include +#include + +#include + +typedef union +{ + struct + { + u64 pkt_addr; + u64 hdr_addr; + }; + struct + { + u64 rss_type : 4; + + /* packet type */ + u64 ipv4 : 1; + u64 ipv4e : 1; + u64 ipv6 : 1; + u64 ipv6e : 1; + u64 tcp : 1; + u64 udp : 1; + u64 sctp : 1; + u64 nfs : 1; + u64 etqf : 3; + u64 l2pkt : 1; + u64 vpkt : 1; + + u64 _reserved_17 : 2; + u64 hdr_len_hi : 2; + u64 hdr_len_lo : 10; + u64 sph : 1; + u64 rss_hash : 32; + + /* ext status */ + u64 dd : 1; + u64 eop : 1; + u64 _rsv1 : 1; + u64 vp : 1; + u64 udpcs : 1; + u64 l4i : 1; + u64 ipcs : 1; + u64 pif : 1; + u64 _rsv2 : 1; + u64 vext : 1; + u64 udpv : 1; + u64 llint : 1; + u64 strip_crc : 1; + u64 smd_type : 2; + u64 tsip : 1; + u64 _rsv3 : 3; + u64 mc : 1; + + /* ext error */ + u64 _rsv4 : 3; + u64 hbo : 1; + u64 _rsv5 : 5; + u64 l4e : 1; + u64 ipe : 1; + u64 rxe : 1; + + u64 pkt_len : 16; + u64 vlan_tag : 16; + }; +} ige_rx_desc_t; + +STATIC_ASSERT_SIZEOF (ige_rx_desc_t, 16); + +typedef union +{ + u64 qwords[2]; + struct + { + u64 addr; + u64 dtalen : 16; + u64 ptp1 : 4; + u64 dtyp : 4; + + u64 eop : 1; + u64 ifcs : 1; + u64 _reserved_26 : 1; + u64 rs : 1; + u64 _reserved_28 : 1; + u64 dext : 1; + u64 vle : 1; + u64 tse : 1; + + /* status */ + u64 dd : 1; + u64 ts_stat : 1; + u64 _reserved_35_36 : 2; + + u64 idx : 1; + u64 ptp2 : 3; + u64 popts : 6; + u64 paylen : 18; + }; + + /* writeback */ + struct + { + u64 dma_timestamp; + u64 _reserved_64_95 : 32; + u64 sta : 4; + u64 _reserved_100_127 : 28; + }; +} ige_tx_desc_t; + +STATIC_ASSERT_SIZEOF (ige_tx_desc_t, 16); + +typedef enum +{ + IGE_PHY_TYPE_UNKNOWN = 0, + IGE_PHY_TYPE_I210_INTERNAL, + IGE_PHY_TYPE_GPY211, +} __clib_packed ige_phy_type_t; + +typedef enum +{ + IGE_DEV_TYPE_I211, + IGE_DEV_TYPE_I225, + IGE_DEV_TYPE_I226, +} __clib_packed ige_dev_type_t; + +typedef struct +{ + ige_phy_type_t phy_type; + u8 supports_2_5g : 1; +} ige_dev_config_t; + +typedef struct +{ + void *bar0; + u8 avail_rxq_bmp; + u8 avail_txq_bmp; + ige_phy_type_t phy_type; + ige_dev_config_t config; +} ige_device_t; + +typedef struct +{ + ige_reg_status_t last_status; +} ige_port_t; + +typedef struct +{ + u32 *buffer_indices; + ige_rx_desc_t *descs; + u16 head; + u16 tail; + u32 *reg_rdt; +} ige_rxq_t; + +typedef struct +{ + u32 *buffer_indices; + ige_tx_desc_t *descs; + u16 head; + u16 tail; + u32 *reg_tdt; + u32 *wb; +} ige_txq_t; + +typedef struct +{ + ige_rx_desc_t desc; + u32 buffer_index; + u32 hw_if_index; + u16 queue_id; + u16 next_index; +} ige_rx_trace_t; + +typedef struct +{ + ige_tx_desc_t desc; + u32 buffer_index; + u32 hw_if_index; + u16 queue_id; +} ige_tx_trace_t; + +/* counters.c */ +vnet_dev_rv_t ige_port_counters_init (vlib_main_t *, vnet_dev_port_t *); +void ige_port_counter_poll (vlib_main_t *, vnet_dev_port_t *); + +/* format.c */ +format_function_t format_ige_reg_write; +format_function_t format_ige_reg_read; +format_function_t format_ige_reg_diff; +format_function_t format_ige_port_status; +format_function_t format_ige_rx_desc; +format_function_t format_ige_rx_trace; +format_function_t format_ige_tx_desc; +format_function_t format_ige_tx_trace; +format_function_t format_ige_receive_addr_table; + +/* phy.c */ +vnet_dev_rv_t ige_phy_init (vlib_main_t *, vnet_dev_t *); + +/* port.c */ +vnet_dev_rv_t ige_port_init (vlib_main_t *, vnet_dev_port_t *); +vnet_dev_rv_t ige_port_start (vlib_main_t *, vnet_dev_port_t *); +void ige_port_stop (vlib_main_t *, vnet_dev_port_t *); +vnet_dev_rv_t ige_port_cfg_change_validate (vlib_main_t *, vnet_dev_port_t *, + vnet_dev_port_cfg_change_req_t *); +vnet_dev_rv_t ige_port_cfg_change (vlib_main_t *, vnet_dev_port_t *, + vnet_dev_port_cfg_change_req_t *); + +/* queue.c */ +vnet_dev_rv_t ige_rx_queue_alloc (vlib_main_t *, vnet_dev_rx_queue_t *); +vnet_dev_rv_t ige_tx_queue_alloc (vlib_main_t *, vnet_dev_tx_queue_t *); +void ige_rx_queue_free (vlib_main_t *, vnet_dev_rx_queue_t *); +void ige_tx_queue_free (vlib_main_t *, vnet_dev_tx_queue_t *); + +static_always_inline u16 +ige_rxq_refill_no_wrap (vlib_main_t *vm, u32 *buffer_indices, + ige_rx_desc_t *descs, u16 n_refill, + u8 buffer_pool_index, int use_va_dma) +{ + u16 n_alloc; + vlib_buffer_t *b; + + n_alloc = vlib_buffer_alloc_from_pool (vm, buffer_indices, n_refill, + buffer_pool_index); + + if (use_va_dma) + for (u32 i = 0; i < n_alloc; i++) + { + b = vlib_get_buffer (vm, buffer_indices[i]); + descs[i].pkt_addr = vlib_buffer_get_va (b); + descs[i].hdr_addr = 0; + } + else + for (u32 i = 0; i < n_alloc; i++) + { + b = vlib_get_buffer (vm, buffer_indices[i]); + descs[i].pkt_addr = vlib_buffer_get_pa (vm, b); + descs[i].hdr_addr = 0; + } + + return n_alloc; +} + +/* reg.c */ +vnet_dev_rv_t ige_reg_poll (vlib_main_t *, vnet_dev_t *, u32, u32, u32, f64, + f64); +int ige_reg_sw_fw_sync_acquire (vlib_main_t *, vnet_dev_t *); +void ige_reg_sw_fw_sync_release (vlib_main_t *, vnet_dev_t *); + +/* inlines */ +static_always_inline void +ige_reg_rd (vnet_dev_t *dev, u32 reg, u32 *val) +{ + ige_device_t *id = vnet_dev_get_data (dev); + u32 rv = __atomic_load_n ((u32 *) ((u8 *) id->bar0 + reg), __ATOMIC_ACQUIRE); + *val = rv; +} + +static_always_inline void +ige_reg_wr (vnet_dev_t *dev, u32 reg, u32 val) +{ + ige_device_t *id = vnet_dev_get_data (dev); + __atomic_store_n ((u32 *) ((u8 *) id->bar0 + reg), val, __ATOMIC_RELEASE); +} + +#define log_debug(dev, f, ...) \ + vlib_log (VLIB_LOG_LEVEL_DEBUG, ige_log.class, "%U" f, format_vnet_dev_log, \ + (dev), clib_string_skip_prefix (__func__, "ige_"), ##__VA_ARGS__) +#define log_info(dev, f, ...) \ + vlib_log (VLIB_LOG_LEVEL_INFO, ige_log.class, "%U: " f, \ + format_vnet_dev_addr, dev, ##__VA_ARGS__) +#define log_notice(dev, f, ...) \ + vlib_log (VLIB_LOG_LEVEL_NOTICE, ige_log.class, "%U: " f, \ + format_vnet_dev_addr, dev, ##__VA_ARGS__) +#define log_warn(dev, f, ...) \ + vlib_log (VLIB_LOG_LEVEL_WARNING, ige_log.class, "%U: " f, \ + format_vnet_dev_addr, dev, ##__VA_ARGS__) +#define log_err(dev, f, ...) \ + vlib_log (VLIB_LOG_LEVEL_ERR, ige_log.class, "%U: " f, \ + format_vnet_dev_addr, dev, ##__VA_ARGS__) + +#define foreach_ige_tx_node_counter \ + _ (NO_FREE_SLOTS, no_free_slots, ERROR, "no free tx slots") \ + _ (BUFFER_CHAIN_TOO_LONG, buffer_chain_too_long, ERROR, \ + "buffer chain too long") + +typedef enum +{ +#define _(f, n, s, d) IGE_TX_NODE_CTR_##f, + foreach_ige_tx_node_counter +#undef _ +} ige_tx_node_counter_t; diff --git a/src/plugins/dev_ige/ige_regs.h b/src/plugins/dev_ige/ige_regs.h new file mode 100644 index 00000000000..85dcc773a52 --- /dev/null +++ b/src/plugins/dev_ige/ige_regs.h @@ -0,0 +1,400 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2025 Damjan Marion + */ + +#pragma once + +#include +#include +#include +#include +#include + +#define ige_reg_ctrl_t_fields \ + __ (1, full_duplex) \ + __ (1, _reserved1) \ + __ (1, gio_master_disable) \ + __ (3, _reserved3) \ + __ (1, set_link_up) \ + __ (9, _reserved7) \ + __ (1, sdp0_gpien) \ + __ (1, sdp1_gpien) \ + __ (1, sdp0_data) \ + __ (1, sdp1_data) \ + __ (1, adww3wuc) \ + __ (1, sdp0_wde) \ + __ (1, sdp0_iodir) \ + __ (1, sdp1_iodir) \ + __ (2, _reserved24) \ + __ (1, port_sw_reset) \ + __ (1, rx_flow_ctl_en) \ + __ (1, tx_flow_ctl_en) \ + __ (1, device_reset) \ + __ (1, vlan_mode_enable) \ + __ (1, phy_reset) + +#define ige_reg_status_t_fields \ + __ (1, full_duplex) \ + __ (1, link_up) \ + __ (2, _reserved2) \ + __ (1, tx_off) \ + __ (1, _reserved5) \ + __ (2, speed) \ + __ (2, asdv) \ + __ (1, phy_reset_asserted) \ + __ (8, _reserved11) \ + __ (1, gio_master_en_sts) \ + __ (1, dev_rst_set) \ + __ (1, rst_done) \ + __ (1, speed_2p5) \ + __ (7, _reserved23) \ + __ (1, lpi_ignore) \ + __ (1, _reserved31) + +#define ige_reg_ctrl_ext_t_fields \ + __ (2, _reserved0) \ + __ (1, sdp2_gpien) \ + __ (1, sdp3_gpien) \ + __ (2, _reserved4) \ + __ (1, sdp2_data) \ + __ (1, sdp3_data) \ + __ (2, _reserved8) \ + __ (1, sdp2_iodir) \ + __ (1, sdp3_iodir) \ + __ (1, _reserved12) \ + __ (1, eeprom_block_rst) \ + __ (2, _reserved14) \ + __ (1, no_snoop_dis) \ + __ (1, relaxed_ordering_dis) \ + __ (2, _reserved18) \ + __ (1, phy_power_down_ena) \ + __ (5, _reserved121) \ + __ (1, ext_vlan_ena) \ + __ (1, _reserved127) \ + __ (1, driver_loaded) \ + __ (3, _reserved29) + +#define ige_reg_mdic_t_fields \ + __ (16, data) \ + __ (5, regadd) \ + __ (5, _reserved21) \ + __ (2, opcode) \ + __ (1, ready) \ + __ (1, mid_ie) \ + __ (1, mid_err) \ + __ (1, _reserved31) + +#define ige_reg_rctl_t_fields \ + __ (1, _reserved0) \ + __ (1, rx_enable) \ + __ (1, store_bad_packets) \ + __ (1, uc_promisc_ena) \ + __ (1, mc_promisc_ena) \ + __ (1, long_pkt_reception_ena) \ + __ (2, loopback_mode) \ + __ (2, hash_select) \ + __ (2, _reserved10) \ + __ (2, mc_uc_tbl_off) \ + __ (1, _reserved14) \ + __ (1, bcast_accept_mode) \ + __ (2, rx_buf_sz) \ + __ (1, vlan_filter_ena) \ + __ (1, cannonical_form_ind_ena) \ + __ (1, cannonical_form_ind_bit_val) \ + __ (1, pad_small_rx_pkts) \ + __ (1, discard_pause_frames) \ + __ (1, pass_mac_ctrl_frames) \ + __ (2, _reserved24) \ + __ (1, strip_eth_crc) \ + __ (5, _reserved26) + +#define ige_reg_tctl_t_fields \ + __ (1, _reserved0) \ + __ (1, tx_enable) \ + __ (1, _reserved2) \ + __ (1, pad_short_pkts) \ + __ (8, collision_threshold) \ + __ (10, backoff_slot_time) \ + __ (1, sw_xoff_tx) \ + __ (1, _reserved23) \ + __ (1, retransmit_on_late_colision) \ + __ (7, reserved25) + +#define ige_reg_txdctl_t_fields \ + __ (5, pthresh) \ + __ (3, _reserved5) \ + __ (5, hthresh) \ + __ (3, _reserved13) \ + __ (5, wthresh) \ + __ (4, _reserved21) \ + __ (1, enable) \ + __ (1, sw_flush) \ + __ (1, priority) \ + __ (4, hwbthresh) + +#define ige_reg_phpm_t_fields \ + __ (1, _reserved0) \ + __ (1, restart_autoneg) \ + __ (1, _reserved2) \ + __ (1, dis_1000_in_non_d0a) \ + __ (1, link_energy_detect) \ + __ (1, go_link_disc) \ + __ (1, disable_1000) \ + __ (1, spd_b2b_en) \ + __ (1, rst_compl) \ + __ (1, dis_100_in_non_d0a) \ + __ (1, ulp_req) \ + __ (1, disable_2500) \ + __ (1, dis_2500_in_non_d0a) \ + __ (1, ulp_trig) \ + __ (2, ulp_delay) \ + __ (1, link_enery_en) \ + __ (1, dev_off_en) \ + __ (1, dev_off_state) \ + __ (1, ulp_en) \ + __ (12, _reserved20) + +#define ige_reg_manc_t_fields \ + __ (1, flow_ctrl_discard) \ + __ (1, ncsi_discard) \ + __ (12, _reserved2) \ + __ (1, fw_reset) \ + __ (1, tco_isolate) \ + __ (1, tco_reset) \ + __ (1, rcv_tco_en) \ + __ (1, keep_phy_link_up) \ + __ (1, rcv_all) \ + __ (1, inhibit_ulp) \ + __ (2, _reserved21) \ + __ (1, en_xsum_filter) \ + __ (1, en_ipv4_filter) \ + __ (1, fixed_net_type) \ + __ (1, net_type) \ + __ (1, ipv6_adv_only) \ + __ (1, en_bmc2os) \ + __ (1, en_bmc2net) \ + __ (1, mproxye) \ + __ (1, mproxya) + +#define ige_reg_swsm_t_fields \ + __ (1, smbi) \ + __ (1, swesmbi) \ + __ (30, _reserved2) + +#define ige_reg_fwsm_t_fields \ + __ (1, eep_fw_semaphore) \ + __ (3, fw_mode) \ + __ (2, _reserved4) \ + __ (1, eep_reload_ind) \ + __ (8, _reserved7) \ + __ (1, fw_val_bit) \ + __ (3, reset_ctr) \ + __ (6, ext_err_ind) \ + __ (1, pcie_config_err_ind) \ + __ (5, _reserved26) \ + __ (1, factory_mac_addr_restored) + +#define ige_reg_sw_fw_sync_t_fields \ + __ (1, sw_flash_sm) \ + __ (1, sw_phy_sm) \ + __ (1, sw_i2c_sm) \ + __ (1, sw_mac_csr_sm) \ + __ (3, _reserved4) \ + __ (1, sw_svr_sm) \ + __ (1, sw_mb_sm) \ + __ (1, _reserved9) \ + __ (1, sw_mng_sm) \ + __ (5, _reserved11) \ + __ (1, fw_flash_sm) \ + __ (1, fw_phy_sm) \ + __ (1, fw_i2c_sm) \ + __ (1, fw_mac_csr_sm) \ + __ (3, _reserved20) \ + __ (1, fw_svr_sm) \ + __ (8, _reserved24) + +#define ige_reg_srrctl_t_fields \ + __ (7, bsizepacket) \ + __ (1, _reserved7) \ + __ (6, bsizeheader) \ + __ (2, timer1_sel) \ + __ (1, _reserved16) \ + __ (2, timer0_sel) \ + __ (1, use_domain) \ + __ (5, rdmts) \ + __ (3, desc_type) \ + __ (2, _reserved28) \ + __ (1, timestamp) \ + __ (1, drop_en) + +#define ige_reg_rxdctl_t_fields \ + __ (5, pthresh) \ + __ (3, _reserved5) \ + __ (5, hthresh) \ + __ (3, _reserved13) \ + __ (5, wthresh) \ + __ (4, _reserved21) \ + __ (1, enable) \ + __ (1, swflush) \ + __ (5, _reserved27) + +#define ige_reg_txctl_t_fields \ + __ (1, tx_desc_fetch_tph_en) \ + __ (1, tx_desc_wb_tph_en) \ + __ (1, _reserved2) \ + __ (1, tx_packet_tph_en) \ + __ (1, _reserved4) \ + __ (1, tx_desc_dca_en) \ + __ (2, _reserved6) \ + __ (1, tx_desc_read_no_snoop_en) \ + __ (1, tx_desc_read_relax_order_en) \ + __ (1, tx_desc_wb_no_snoop_en) \ + __ (1, tx_desc_wb_relax_order_en) \ + __ (1, tx_data_no_snoop_en) \ + __ (1, tx_data_relax_order_en) \ + __ (18, _reserved14) + +#define ige_reg_eec_t_fields \ + __ (6, _reserved0) \ + __ (1, flash_in_use) \ + __ (1, _reserved7) \ + __ (1, ee_pres) \ + __ (1, auto_rd) \ + __ (1, _reservedxi10) \ + __ (4, ee_size) \ + __ (4, pci_ana_done) \ + __ (1, flash_detected) \ + __ (2, _reserved20) \ + __ (1, shadow_modified) \ + __ (1, flupd) \ + __ (1, _reserved24) \ + __ (1, sec1val) \ + __ (1, fludone) \ + __ (5, _reserved27) + +#define ige_reg_eemngctl_t_fields \ + __ (11, addr) \ + __ (4, reserved11) \ + __ (1, cmd_valid) \ + __ (1, write) \ + __ (1, eebusy) \ + __ (1, cfg_done) \ + __ (12, _reserved19) \ + __ (1, done) + +#define IGE_REG_STRUCT(n) \ + typedef union \ + { \ + struct \ + { \ + n##_fields; \ + }; \ + u32 as_u32; \ + } n; \ + STATIC_ASSERT_SIZEOF (n, 4); + +#define __(n, f) u32 f : n; +IGE_REG_STRUCT (ige_reg_status_t); +IGE_REG_STRUCT (ige_reg_ctrl_t); +IGE_REG_STRUCT (ige_reg_ctrl_ext_t); +IGE_REG_STRUCT (ige_reg_mdic_t); +IGE_REG_STRUCT (ige_reg_rctl_t); +IGE_REG_STRUCT (ige_reg_tctl_t); +IGE_REG_STRUCT (ige_reg_txdctl_t); +IGE_REG_STRUCT (ige_reg_txctl_t); +IGE_REG_STRUCT (ige_reg_phpm_t); +IGE_REG_STRUCT (ige_reg_manc_t); +IGE_REG_STRUCT (ige_reg_swsm_t); +IGE_REG_STRUCT (ige_reg_fwsm_t); +IGE_REG_STRUCT (ige_reg_sw_fw_sync_t); +IGE_REG_STRUCT (ige_reg_srrctl_t); +IGE_REG_STRUCT (ige_reg_rxdctl_t); +IGE_REG_STRUCT (ige_reg_eec_t); +IGE_REG_STRUCT (ige_reg_eemngctl_t); +#undef __ + +#define foreach_ige_reg \ + _ (0x00000, CTRL, ige_reg_ctrl_t_fields) \ + _ (0x00008, STATUS, ige_reg_status_t_fields) \ + _ (0x00018, CTRL_EXT, ige_reg_ctrl_ext_t_fields) \ + _ (0x00020, MDIC, ige_reg_mdic_t_fields) \ + _ (0x00100, RCTL, ige_reg_rctl_t_fields) \ + _ (0x00400, TCTL, ige_reg_tctl_t_fields) \ + _ (0x00404, TCTL_EXT, ) \ + _ (0x00e14, PHPM, ige_reg_phpm_t_fields) \ + _ (0x01500, ICR, ) \ + _ (0x0150c, IMC, ) \ + _ (0x05004, RLPML, ) \ + _ (0x05400, RAL0, ) \ + _ (0x05404, RAH0, ) \ + _ (0x05820, MANC, ige_reg_manc_t_fields) \ + _ (0x05b50, SWSM, ige_reg_swsm_t_fields) \ + _ (0x05b54, FWSM, ige_reg_fwsm_t_fields) \ + _ (0x05b5c, SW_FW_SYNC, ige_reg_sw_fw_sync_t_fields) \ + _ (0x0c000, RDBAL0, ) \ + _ (0x0c004, RDBAH0, ) \ + _ (0x0c008, RDLEN0, ) \ + _ (0x0c00c, SRRCTL0, ige_reg_srrctl_t_fields) \ + _ (0x0c010, RDH0, ) \ + _ (0x0c018, RDT0, ) \ + _ (0x0c028, RXDCTL0, ige_reg_rxdctl_t_fields) \ + _ (0x0e000, TDBAL0, ) \ + _ (0x0e004, TDBAH0, ) \ + _ (0x0e008, TDLEN0, ) \ + _ (0x0e010, TDH0, ) \ + _ (0x0e014, TXCTL0, ige_reg_txctl_t_fields) \ + _ (0x0e018, TDT0, ) \ + _ (0x0e038, TDWBAL0, ) \ + _ (0x0e03c, TDWBAH0, ) \ + _ (0x0e028, TXDCTL0, ige_reg_txdctl_t_fields) \ + _ (0x12010, EEC, ige_reg_eec_t_fields) \ + _ (0x12030, EEMNGCTL, ige_reg_eemngctl_t_fields) + +#define IGE_REG_RDBAL(n) (IGE_REG_RDBAL0 + (n) *0x40) +#define IGE_REG_RDBAH(n) (IGE_REG_RDBAH0 + (n) *0x40) +#define IGE_REG_RDLEN(n) (IGE_REG_RDLEN0 + (n) *0x40) +#define IGE_REG_SRRCTL(n) (IGE_REG_SRRCTL0 + (n) *0x40) +#define IGE_REG_RDH(n) (IGE_REG_RDH0 + (n) *0x40) +#define IGE_REG_RDT(n) (IGE_REG_RDT0 + (n) *0x40) +#define IGE_REG_RXDCTL(n) (IGE_REG_RXDCTL0 + (n) *0x40) +#define IGE_REG_TDBAL(n) (IGE_REG_TDBAL0 + (n) *0x40) +#define IGE_REG_TDBAH(n) (IGE_REG_TDBAH0 + (n) *0x40) +#define IGE_REG_TDLEN(n) (IGE_REG_TDLEN0 + (n) *0x40) +#define IGE_REG_TDH(n) (IGE_REG_TDH0 + (n) *0x40) +#define IGE_REG_TDT(n) (IGE_REG_TDT0 + (n) *0x40) +#define IGE_REG_TDWBAL(n) (IGE_REG_TDWBAL0 + (n) *0x40) +#define IGE_REG_TDWBAH(n) (IGE_REG_TDWBAH0 + (n) *0x40) +#define IGE_REG_TXDCTL(n) (IGE_REG_TXDCTL0 + (n) *0x40) +#define IGE_REG_TXCTL(n) (IGE_REG_TXCTL0 + (n) *0x40) +#define IGE_REG_RAL(n) (IGE_REG_RAL0 + (n) *0x08) +#define IGE_REG_RAH(n) (IGE_REG_RAH0 + (n) *0x08) + +#define IGE_TDWBAL_HEAD_WB_ENABLE 0x1 + +typedef enum +{ +#define _(o, n, f) IGE_REG_##n = (o), + foreach_ige_reg +#undef _ +} ige_reg_t; + +typedef union +{ + struct + { + u32 ral; + u32 rah; + }; + struct + { + u8 hw_addr[6]; + u16 asel : 2; + u16 qsel : 2; + u16 _reserved20 : 8; + u16 qsel_enable : 1; + u16 _reserved30 : 2; + u16 av : 1; + }; +} ige_receive_addr_t; + +STATIC_ASSERT_SIZEOF (ige_receive_addr_t, 8); diff --git a/src/plugins/dev_ige/phy.c b/src/plugins/dev_ige/phy.c new file mode 100644 index 00000000000..fc0e5d0a742 --- /dev/null +++ b/src/plugins/dev_ige/phy.c @@ -0,0 +1,311 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2025 Damjan Marion + */ + +#include +#include +#include +#include +#include +#include +#include + +VLIB_REGISTER_LOG_CLASS (ige_log, static) = { + .class_name = "ige", + .subclass_name = "phy", +}; + +#define foreach_ige_phy_reg \ + _ (0x00, CTRL) \ + _ (0x01, STAT) \ + _ (0x02, PHYID1) \ + _ (0x03, PHYID2) \ + _ (0x04, AN_ADV) \ + _ (0x09, GCTRL) \ + _ (0x0a, GSTAT) \ + _ (0x0d, MMDCTRL) \ + _ (0x0e, MMDDATA) \ + _ (0x0f, XSTAT) + +typedef enum +{ +#define _(n, v) IGE_PHY_REG_##v = (n), + foreach_ige_phy_reg +#undef _ +} ige_phy_reg_t; + +static char *phy_reg_names[] = { +#define _(n, v) [n] = #v, + foreach_ige_phy_reg +#undef _ +}; + +static vnet_dev_rv_t +ige_phy_acquire (vlib_main_t *vm, vnet_dev_t *dev) +{ + ige_reg_sw_fw_sync_t sw_fw_sync; + int n_tries = 5; + + log_debug (dev, "phy_acquire:"); + + while (n_tries-- > 0) + { + if (ige_reg_sw_fw_sync_acquire (vm, dev)) + { + ige_reg_rd (dev, IGE_REG_SW_FW_SYNC, &sw_fw_sync.as_u32); + log_debug (dev, "phy_acquire: sw_fw_sync 0x%04x"); + + if (sw_fw_sync.fw_phy_sm == 0) + { + sw_fw_sync.sw_phy_sm = 1; + ige_reg_wr (dev, IGE_REG_SW_FW_SYNC, sw_fw_sync.as_u32); + ige_reg_sw_fw_sync_release (vm, dev); + return 0; + } + + ige_reg_sw_fw_sync_release (vm, dev); + } + vlib_process_suspend (vm, 1e-4); + } + + log_err (dev, "failed to acquire PHY"); + return VNET_DEV_ERR_TIMEOUT; +} + +static vnet_dev_rv_t +ige_phy_release (vlib_main_t *vm, vnet_dev_t *dev) +{ + ige_reg_sw_fw_sync_t sw_fw_sync; + + log_debug (dev, "phy_release:"); + + /* release phy */ + if (ige_reg_sw_fw_sync_acquire (vm, dev) == 0) + { + log_err (dev, "sw_fw_sync ownership timeout"); + return VNET_DEV_ERR_TIMEOUT; + } + + sw_fw_sync.sw_phy_sm = 0; + ige_reg_wr (dev, IGE_REG_SW_FW_SYNC, sw_fw_sync.as_u32); + ige_reg_sw_fw_sync_release (vm, dev); + + return 0; +} + +static vnet_dev_rv_t +ige_phy_read (vlib_main_t *vm, vnet_dev_t *dev, u16 addr, u16 *data) +{ + ige_reg_mdic_t mdic = { .regadd = addr, .opcode = 2 }; + int n_tries = 10; + f64 t; + + t = vlib_time_now (vm); + ige_reg_wr (dev, IGE_REG_MDIC, mdic.as_u32); + vlib_process_suspend (vm, 5e-5); + ige_reg_rd (dev, IGE_REG_MDIC, &mdic.as_u32); + + while (mdic.ready == 0 && n_tries-- > 0) + { + vlib_process_suspend (vm, 2e-5); + ige_reg_rd (dev, IGE_REG_MDIC, &mdic.as_u32); + } + + t = vlib_time_now (vm) - t; + if (t > 1e-4) + log_warn (dev, "phy_read: register read took %.06f sec", t); + + if (mdic.ready == 0) + { + log_err (dev, "phy read timeout"); + return VNET_DEV_ERR_TIMEOUT; + } + + if (addr < ARRAY_LEN (phy_reg_names) && phy_reg_names[addr]) + log_debug (dev, "reg %s data 0x%04x", phy_reg_names[addr], mdic.data); + else + log_debug (dev, "addr 0x%02x data 0x%04x", addr, mdic.data); + + *data = mdic.data; + return 0; +} + +static vnet_dev_rv_t +ige_phy_write (vlib_main_t *vm, vnet_dev_t *dev, u16 addr, u16 data) +{ + ige_reg_mdic_t mdic = { .regadd = addr, .opcode = 1, .data = data }; + int n_tries = 10; + f64 t; + + t = vlib_time_now (vm); + ige_reg_wr (dev, IGE_REG_MDIC, mdic.as_u32); + vlib_process_suspend (vm, 5e-5); + ige_reg_rd (dev, IGE_REG_MDIC, &mdic.as_u32); + + while (mdic.ready == 0 && n_tries-- > 0) + { + vlib_process_suspend (vm, 2e-5); + ige_reg_rd (dev, IGE_REG_MDIC, &mdic.as_u32); + } + + t = vlib_time_now (vm) - t; + if (t > 1e-4) + log_warn (dev, "phy_write: register write took %.06f sec", t); + + if (mdic.ready == 0) + { + log_err (dev, "phy write timeout"); + return VNET_DEV_ERR_TIMEOUT; + } + + if (addr < ARRAY_LEN (phy_reg_names) && phy_reg_names[addr]) + log_debug (dev, "reg %s data 0x%04x", phy_reg_names[addr], mdic.data); + else + log_debug (dev, "addr 0x%02x data 0x%04x", addr, mdic.data); + + return 0; +} + +#define foreach_ige_phy_type \ + _ (0x67c9dc00, GPY211, "Foxville LM B.1") \ + _ (0x67c9dc80, GPY211, "Foxville LM B.2") \ + _ (0x67c9dcc0, GPY211, "Foxville LM B.3 / Foxville Dock") \ + _ (0x67c9dc02, GPY211, "Foxville V B.1") \ + _ (0x67c9dc82, GPY211, "Foxville V B.2") \ + _ (0x67c9dcc2, GPY211, "Foxville V B.3") \ + _ (0x67c9dc83, GPY211, "Foxville IT B.2") \ + _ (0x67c9dcc3, GPY211, "Foxville IT B.3") \ + _ (0x67c9dc18, GPY211, "FoxvilleC LM / Dock") \ + _ (0x67c9dc58, GPY211, "FoxvilleC V") \ + _ (0x67c9dcd8, GPY211, "FoxvilleC IT") + +static struct +{ + u32 phy_id; + ige_phy_type_t type; + char *name; +} phy_types[] = { +#define _(i, t, s) \ + { \ + .phy_id = i, \ + .type = IGE_PHY_TYPE_##t, \ + .name = s, \ + }, + foreach_ige_phy_type +#undef _ +}; + +vnet_dev_rv_t +ige_phy_mmd_write (vlib_main_t *vm, vnet_dev_t *dev, u8 dad, u16 addr, + u16 data) +{ + vnet_dev_rv_t rv; + struct + { + u16 reg; + u16 val; + } seq[] = { + { IGE_PHY_REG_MMDCTRL, dad }, + { IGE_PHY_REG_MMDDATA, addr }, + { IGE_PHY_REG_MMDCTRL, 0x4000 | dad }, + { IGE_PHY_REG_MMDDATA, data }, + { IGE_PHY_REG_MMDCTRL, 0 }, + }; + + FOREACH_ARRAY_ELT (e, seq) + { + rv = ige_phy_write (vm, dev, e->reg, e->val); + if (rv != VNET_DEV_OK) + return rv; + } + +#if 0 + ige_phy_rw_t rw2[5] = { { .addr = 0xd, .data = 7, .wr = 1 }, + { .addr = 0xe, .data = 0x20, .wr = 1 }, + { .addr = 0xd, .data = 0x4007, .wr = 1 }, + { .addr = 0xe, .data = 0x82, .wr = 1 }, + { .addr = 0xd, .data = 0, .wr = 1 } }; +#endif + return VNET_DEV_OK; +} + +typedef struct +{ + union + { + struct + { + u16 phy_id2; + u16 phy_id1; + }; + u32 phy_id; + struct + { + u32 revision : 4; + u32 model : 6; + u32 oui : 22; + }; + }; +} ige_phy_id_t; + +vnet_dev_rv_t +ige_phy_init (vlib_main_t *vm, vnet_dev_t *dev) +{ + ige_device_t *id = vnet_dev_get_data (dev); + vnet_dev_rv_t rv; + u16 reg; + ige_phy_id_t phyid; + + if ((rv = ige_phy_acquire (vm, dev)) != VNET_DEV_OK) + return rv; + + if ((rv = ige_phy_read (vm, dev, IGE_PHY_REG_PHYID1, &phyid.phy_id1)) != + VNET_DEV_OK) + goto done; + + if ((rv = ige_phy_read (vm, dev, IGE_PHY_REG_PHYID2, &phyid.phy_id2)) != + VNET_DEV_OK) + goto done; + + if (id->config.phy_type == IGE_PHY_TYPE_UNKNOWN) + { + FOREACH_ARRAY_ELT (e, phy_types) + if (e->phy_id == phyid.phy_id) + { + log_debug (dev, "PHY is '%s' (oui 0x%x model 0x%x revision 0x%x)", + e->name, phyid.oui, phyid.model, phyid.revision); + id->config.phy_type = e->type; + break; + } + } + + if (id->config.phy_type == IGE_PHY_TYPE_UNKNOWN) + { + log_err (dev, "Unsupported phy 0x%08x", phyid.phy_id); + rv = VNET_DEV_ERR_UNSUPPORTED_DEVICE; + goto done; + } + + /* enable "1000BASE-T Full-Duplex" in GCTRL */ + if ((rv = ige_phy_read (vm, dev, IGE_PHY_REG_GCTRL, ®)) != VNET_DEV_OK) + goto done; + log_debug (dev, "GCTRL was set to 0x%04x", reg); + reg |= 0x200; + if ((rv = ige_phy_write (vm, dev, IGE_PHY_REG_GCTRL, reg)) != VNET_DEV_OK) + goto done; + + if (id->config.phy_type == IGE_PHY_TYPE_GPY211) + { + /* modify ANEG[7] device register ANEG_MGBT_AN_CTRL[0x20]: + * AB_2G5BT[7] - 2.5 G BASE-T ability + * FR_2G5BT[5] - 2.5 G BASE-T Fast Retrain Ability + * FR[1] - Fast Retrain Ability + */ + rv = ige_phy_mmd_write (vm, dev, 7, 0x20, 0xa2); + if (rv != VNET_DEV_OK) + goto done; + } + +done: + return ige_phy_release (vm, dev); +} diff --git a/src/plugins/dev_ige/port.c b/src/plugins/dev_ige/port.c new file mode 100644 index 00000000000..c18ca0b5ea4 --- /dev/null +++ b/src/plugins/dev_ige/port.c @@ -0,0 +1,488 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2025 Damjan Marion + */ + +#include +#include +#include +#include +#include +#include +#include + +VLIB_REGISTER_LOG_CLASS (ige_log, static) = { + .class_name = "ige", + .subclass_name = "port", +}; + +const u32 link_speeds[8] = { + [0b000] = 10000, + [0b001] = 100000, + [0b010] = 1000000, + [0b110] = 2500000, +}; + +static void +ige_port_status_poll (vlib_main_t *vm, vnet_dev_port_t *port) +{ + vnet_dev_t *dev = port->dev; + ige_port_t *ip = vnet_dev_get_port_data (port); + ige_reg_status_t status; + + ige_reg_rd (dev, IGE_REG_STATUS, &status.as_u32); + + if (ip->last_status.as_u32 != status.as_u32) + { + vnet_dev_port_state_changes_t changes = {}; + + log_debug (dev, "\n%U", format_ige_reg_diff, IGE_REG_STATUS, + ip->last_status.as_u32, status.as_u32); + + if (ip->last_status.link_up != status.link_up) + { + changes.change.link_state = 1; + changes.link_state = status.link_up; + log_debug (dev, "link state changed to %s", + status.link_up ? "up" : "down"); + } + + if (ip->last_status.full_duplex != status.full_duplex) + { + changes.change.link_duplex = 1; + changes.full_duplex = status.full_duplex; + log_debug (dev, "duplex changed to %s", + status.full_duplex ? "full" : "half"); + } + + if (ip->last_status.speed != status.speed || + ip->last_status.speed_2p5 != status.speed_2p5) + { + changes.change.link_speed = 1; + changes.link_speed = + link_speeds[status.speed_2p5 << 2 | status.speed]; + if (changes.link_speed) + log_debug (dev, "link speed changed to %u Mbps", + changes.link_speed / 1000); + else + log_warn (dev, + "device reported unknown speed (speed %u speed_2p5 %u)", + status.speed, status.speed_2p5); + } + ip->last_status.as_u32 = status.as_u32; + if (changes.change.any) + vnet_dev_port_state_change (vm, port, changes); + } +} + +vnet_dev_rv_t +ige_port_init (vlib_main_t *vm, vnet_dev_port_t *port) +{ + vnet_dev_rv_t rv; + + log_debug (port->dev, "port %u", port->port_id); + + rv = ige_port_counters_init (vm, port); + vnet_dev_poll_port_add (vm, port, 1, ige_port_status_poll); + return rv; +} + +vnet_dev_rv_t +ige_port_start (vlib_main_t *vm, vnet_dev_port_t *port) +{ + vnet_dev_t *dev = port->dev; + ige_device_t *id = vnet_dev_get_data (port->dev); + ige_rxq_t *iq; + ige_txq_t *tq; + ige_reg_rctl_t rctl; + ige_reg_tctl_t tctl; + vnet_dev_rv_t rv = VNET_DEV_OK; + + log_debug (dev, "port %u", port->port_id); + + ige_reg_rd (dev, IGE_REG_RCTL, &rctl.as_u32); + if (rctl.rx_enable) + { + log_warn (dev, "port %u rx is unexpectedly enabled", port->port_id); + rctl.rx_enable = 0; + ige_reg_wr (dev, IGE_REG_RCTL, rctl.as_u32); + } + + ige_reg_rd (dev, IGE_REG_TCTL, &tctl.as_u32); + if (tctl.tx_enable) + { + log_warn (dev, "port %u tx is unexpectedly enabled", port->port_id); + tctl.tx_enable = 0; + ige_reg_wr (dev, IGE_REG_TCTL, tctl.as_u32); + } + + foreach_vnet_dev_port_rx_queue (rxq, port) + { + const ige_reg_srrctl_t srrctl = { + .drop_en = 1, + .desc_type = 1, /* advanced, no header */ + .bsizepacket = 2, /* 2k */ + //.bsizeheader = 2, /* 128 B */ + }; + + const ige_reg_rxdctl_t rxdctl = { + .pthresh = 12, + .hthresh = 10, + .wthresh = 1, + .enable = 1, + }; + + u64 dma_addr; + u16 q = rxq->queue_id; + ige_rxq_t *iq = vnet_dev_get_rx_queue_data (rxq); + dma_addr = vnet_dev_get_dma_addr (vm, dev, iq->descs); + ige_reg_wr (dev, IGE_REG_RDLEN (q), rxq->size * sizeof (ige_rx_desc_t)); + ige_reg_wr (dev, IGE_REG_RDBAH (q), dma_addr >> 32); + ige_reg_wr (dev, IGE_REG_RDBAL (q), dma_addr); + ige_reg_wr (dev, IGE_REG_SRRCTL (q), srrctl.as_u32); + ige_reg_wr (dev, IGE_REG_RXDCTL (q), rxdctl.as_u32); + iq->head = 0; + iq->tail = 0; + iq->reg_rdt = (u32 *) ((u8 *) id->bar0 + IGE_REG_RDT (q)); + + /* Ensure the queue starts with buffers posted. */ + u16 n_posted = ige_rxq_refill_no_wrap ( + vm, iq->buffer_indices, iq->descs, rxq->size, + vnet_dev_get_rx_queue_buffer_pool_index (rxq), dev->va_dma); + + iq->tail = n_posted; + + if (iq->tail == 0) + { + rv = VNET_DEV_ERR_BUFFER_ALLOC_FAIL; + goto error; + } + + __atomic_store_n (iq->reg_rdt, (u32) iq->tail, __ATOMIC_RELEASE); + } + + foreach_vnet_dev_port_tx_queue (txq, port) + { + u64 dma_addr; + u64 wb_dma; + u16 q = txq->queue_id; + ige_reg_txctl_t txctl; + + ige_txq_t *tq = vnet_dev_get_tx_queue_data (txq); + ASSERT (tq->wb != 0); + dma_addr = vnet_dev_get_dma_addr (vm, dev, tq->descs); + wb_dma = vnet_dev_get_dma_addr (vm, dev, tq->wb); + + ige_reg_wr (dev, IGE_REG_TDLEN (q), txq->size * sizeof (ige_tx_desc_t)); + ige_reg_wr (dev, IGE_REG_TDBAH (q), dma_addr >> 32); + ige_reg_wr (dev, IGE_REG_TDBAL (q), dma_addr); + ige_reg_wr (dev, IGE_REG_TDWBAH (q), wb_dma >> 32); + ige_reg_wr (dev, IGE_REG_TDWBAL (q), + ((u32) wb_dma & ~0x3u) | IGE_TDWBAL_HEAD_WB_ENABLE); + + *tq->wb = 0; + + tq->head = tq->tail = 0; + tq->reg_tdt = (u32 *) ((u8 *) id->bar0 + IGE_REG_TDT (q)); + + ige_reg_wr (dev, IGE_REG_TDH (q), 0); + ige_reg_wr (dev, IGE_REG_TDT (q), 0); + + ige_reg_txdctl_t txdctl = { + .pthresh = 8, + .hthresh = 1, + .wthresh = 1, + .enable = 1, + }; + + ige_reg_wr (dev, IGE_REG_TXDCTL (q), txdctl.as_u32); + ige_reg_rd (dev, IGE_REG_TXCTL (q), &txctl.as_u32); + txctl.tx_desc_wb_relax_order_en = 0; + ige_reg_wr (dev, IGE_REG_TXCTL (q), txctl.as_u32); + } + + rctl.rx_enable = 1; + rctl.store_bad_packets = 0; + rctl.strip_eth_crc = 1; + rctl.long_pkt_reception_ena = 1; + rctl.vlan_filter_ena = 0; + rctl.bcast_accept_mode = 1; + rctl.discard_pause_frames = 1; + ige_reg_wr (dev, IGE_REG_RCTL, rctl.as_u32); + ige_reg_wr (dev, IGE_REG_RLPML, port->max_rx_frame_size); + + tctl.tx_enable = 1; + tctl.pad_short_pkts = 1; + ige_reg_wr (dev, IGE_REG_TCTL, tctl.as_u32); + + foreach_vnet_dev_port_rx_queue (rxq, port) + { + u16 q = rxq->queue_id; + + ige_rxq_t *iq = vnet_dev_get_rx_queue_data (rxq); + ige_reg_wr (dev, IGE_REG_RDH (q), 0); + ige_reg_wr (dev, IGE_REG_RDT (q), iq->tail); + } + + vnet_dev_poll_port_add (vm, port, 3, ige_port_counter_poll); + return 0; + +error: + foreach_vnet_dev_port_rx_queue (rxq, port) + { + iq = vnet_dev_get_rx_queue_data (rxq); + if (iq->tail) + { + u16 n_buffers = iq->tail - iq->head; + u16 mask = rxq->size - 1; + u16 start = iq->head & mask; + if (n_buffers) + vlib_buffer_free_from_ring_no_next (vm, iq->buffer_indices, start, + rxq->size, n_buffers); + } + iq->head = iq->tail = 0; + } + foreach_vnet_dev_port_tx_queue (txq, port) + { + tq = vnet_dev_get_tx_queue_data (txq); + if (tq->tail != tq->head) + { + u16 mask = txq->size - 1; + u16 start = tq->head & mask; + u16 n_buffers = tq->tail - tq->head; + + if (n_buffers) + vlib_buffer_free_from_ring_no_next (vm, tq->buffer_indices, start, + txq->size, n_buffers); + } + + tq->head = tq->tail = 0; + if (tq->reg_tdt) + { + ige_reg_txdctl_t txdctl = {}; + ige_reg_wr (dev, IGE_REG_TDT (txq->queue_id), 0); + ige_reg_rd (dev, IGE_REG_TXDCTL (txq->queue_id), &txdctl.as_u32); + txdctl.enable = 0; + ige_reg_wr (dev, IGE_REG_TXDCTL (txq->queue_id), txdctl.as_u32); + ige_reg_wr (dev, IGE_REG_TDWBAL (txq->queue_id), 0); + ige_reg_wr (dev, IGE_REG_TDWBAH (txq->queue_id), 0); + } + } + return rv; +} + +void +ige_port_stop (vlib_main_t *vm, vnet_dev_port_t *port) +{ + vnet_dev_t *dev = port->dev; + log_debug (dev, "port %u", port->port_id); + ige_reg_rctl_t rctl; + ige_reg_tctl_t tctl; + vnet_dev_poll_port_remove (vm, port, ige_port_counter_poll); + + ige_reg_rd (dev, IGE_REG_RCTL, &rctl.as_u32); + rctl.rx_enable = 0; + ige_reg_wr (dev, IGE_REG_RCTL, rctl.as_u32); + + ige_reg_rd (dev, IGE_REG_TCTL, &tctl.as_u32); + tctl.tx_enable = 0; + ige_reg_wr (dev, IGE_REG_TCTL, tctl.as_u32); + + foreach_vnet_dev_port_rx_queue (rxq, port) + { + ige_rxq_t *iq = vnet_dev_get_rx_queue_data (rxq); + u16 n_buffers = iq->tail - iq->head; + u16 mask = rxq->size - 1; + + if (n_buffers) + vlib_buffer_free_from_ring_no_next ( + vm, iq->buffer_indices, iq->head & mask, rxq->size, n_buffers); + + iq->head = iq->tail = 0; + } + + foreach_vnet_dev_port_tx_queue (txq, port) + { + ige_txq_t *tq = vnet_dev_get_tx_queue_data (txq); + u16 n_buffers = tq->tail - tq->head; + u16 mask = txq->size - 1; + + if (n_buffers) + vlib_buffer_free_from_ring_no_next ( + vm, tq->buffer_indices, tq->head & mask, txq->size, n_buffers); + + tq->head = tq->tail = 0; + if (tq->reg_tdt) + { + ige_reg_txdctl_t txdctl = {}; + ige_reg_wr (dev, IGE_REG_TDT (txq->queue_id), 0); + ige_reg_rd (dev, IGE_REG_TXDCTL (txq->queue_id), &txdctl.as_u32); + txdctl.enable = 0; + ige_reg_wr (dev, IGE_REG_TXDCTL (txq->queue_id), txdctl.as_u32); + ige_reg_wr (dev, IGE_REG_TDWBAL (txq->queue_id), 0); + ige_reg_wr (dev, IGE_REG_TDWBAH (txq->queue_id), 0); + } + } +} + +static vnet_dev_rv_t +ige_set_promisc_mode (vlib_main_t *vm, vnet_dev_port_t *port, int enabled) +{ + vnet_dev_t *dev = port->dev; + ige_reg_rctl_t rctl; + + ige_reg_rd (dev, IGE_REG_RCTL, &rctl.as_u32); + rctl.uc_promisc_ena = enabled; + rctl.mc_promisc_ena = enabled; + ige_reg_wr (dev, IGE_REG_RCTL, rctl.as_u32); + ige_reg_rd (dev, IGE_REG_RCTL, &rctl.as_u32); + log_debug (dev, "\n %U", format_ige_reg_read, IGE_REG_RCTL, rctl.as_u32); + return VNET_DEV_OK; +} + +static vnet_dev_rv_t +ige_change_primary_hw_addr (vlib_main_t *vm, vnet_dev_port_t *port, + const vnet_dev_hw_addr_t *hw_addr) +{ + vnet_dev_t *dev = port->dev; + ige_receive_addr_t ra = { + .av = 1, + }; + + clib_memcpy (ra.hw_addr, hw_addr->eth_mac, sizeof (ra.hw_addr)); + + ige_reg_wr (dev, IGE_REG_RAH (0), ra.rah); + ige_reg_wr (dev, IGE_REG_RAL (0), ra.ral); + + log_debug (dev, "receive addr table:\n%U", format_ige_receive_addr_table, + dev); + return VNET_DEV_OK; +} + +static vnet_dev_rv_t +ige_add_secondary_hw_addr (vlib_main_t *vm, vnet_dev_port_t *port, + const vnet_dev_hw_addr_t *hw_addr) +{ + vnet_dev_t *dev = port->dev; + ige_receive_addr_t ra; + vnet_dev_rv_t rv = VNET_DEV_OK; + u32 empty_slot = 0; + + for (u32 i = 0; i < 16; i++) + { + ige_reg_rd (dev, IGE_REG_RAH (i), &ra.rah); + ige_reg_rd (dev, IGE_REG_RAL (i), &ra.ral); + if (memcmp (ra.hw_addr, hw_addr->eth_mac, sizeof (ra.hw_addr)) == 0) + { + log_err (dev, "address %U already exists in table", + format_ethernet_address, hw_addr->eth_mac); + rv = VNET_DEV_ERR_ALREADY_EXISTS; + goto done; + } + if (ra.av == 0 && empty_slot == 0 && i > 0) + empty_slot = i; + } + + if (empty_slot == 0) + { + log_err (dev, "failed to add secondary hw addr %U, table full", + format_ethernet_address, hw_addr->eth_mac); + rv = VNET_DEV_ERR_RESOURCE_NOT_AVAILABLE; + goto done; + } + + ra = (ige_receive_addr_t){ .av = 1 }; + clib_memcpy (ra.hw_addr, hw_addr->eth_mac, sizeof (ra.hw_addr)); + ige_reg_wr (dev, IGE_REG_RAH (empty_slot), ra.rah); + ige_reg_wr (dev, IGE_REG_RAL (empty_slot), ra.ral); + +done: + log_debug (dev, "receive addr table:\n%U", format_ige_receive_addr_table, + dev); + return rv; +} + +static vnet_dev_rv_t +ige_remove_secondary_hw_addr (vlib_main_t *vm, vnet_dev_port_t *port, + const vnet_dev_hw_addr_t *hw_addr) +{ + vnet_dev_t *dev = port->dev; + ige_receive_addr_t ra; + vnet_dev_rv_t rv = VNET_DEV_OK; + + for (u32 i = 1; i < 16; i++) + { + ige_reg_rd (dev, IGE_REG_RAH (i), &ra.rah); + ige_reg_rd (dev, IGE_REG_RAL (i), &ra.ral); + if (memcmp (ra.hw_addr, hw_addr->eth_mac, sizeof (ra.hw_addr)) == 0) + { + ige_reg_wr (dev, IGE_REG_RAH (i), 0); + ige_reg_wr (dev, IGE_REG_RAL (i), 0); + goto done; + } + } + + log_err (dev, "failed to remove secondary hw addr %U, not found", + format_ethernet_address, hw_addr->eth_mac); + rv = VNET_DEV_ERR_NOT_FOUND; + +done: + log_debug (dev, "receive addr table:\n%U", format_ige_receive_addr_table, + dev); + return rv; +} + +vnet_dev_rv_t +ige_port_cfg_change_validate (vlib_main_t *vm, vnet_dev_port_t *port, + vnet_dev_port_cfg_change_req_t *req) +{ + vnet_dev_rv_t rv = VNET_DEV_ERR_NOT_SUPPORTED; + switch (req->type) + { + case VNET_DEV_PORT_CFG_MAX_RX_FRAME_SIZE: + if (port->started) + rv = VNET_DEV_ERR_PORT_STARTED; + break; + + case VNET_DEV_PORT_CFG_PROMISC_MODE: + case VNET_DEV_PORT_CFG_CHANGE_PRIMARY_HW_ADDR: + case VNET_DEV_PORT_CFG_ADD_SECONDARY_HW_ADDR: + case VNET_DEV_PORT_CFG_REMOVE_SECONDARY_HW_ADDR: + rv = VNET_DEV_OK; + break; + + default: + break; + } + + return rv; +} + +vnet_dev_rv_t +ige_port_cfg_change (vlib_main_t *vm, vnet_dev_port_t *port, + vnet_dev_port_cfg_change_req_t *req) +{ + vnet_dev_rv_t rv = VNET_DEV_OK; + switch (req->type) + { + case VNET_DEV_PORT_CFG_MAX_RX_FRAME_SIZE: + break; + case VNET_DEV_PORT_CFG_PROMISC_MODE: + rv = ige_set_promisc_mode (vm, port, req->promisc); + break; + case VNET_DEV_PORT_CFG_CHANGE_PRIMARY_HW_ADDR: + rv = ige_change_primary_hw_addr (vm, port, &req->addr); + break; + case VNET_DEV_PORT_CFG_ADD_SECONDARY_HW_ADDR: + rv = ige_add_secondary_hw_addr (vm, port, &req->addr); + break; + case VNET_DEV_PORT_CFG_REMOVE_SECONDARY_HW_ADDR: + rv = ige_remove_secondary_hw_addr (vm, port, &req->addr); + break; + + default: + rv = VNET_DEV_ERR_NOT_SUPPORTED; + break; + } + + return rv; +} diff --git a/src/plugins/dev_ige/queue.c b/src/plugins/dev_ige/queue.c new file mode 100644 index 00000000000..895c79dead4 --- /dev/null +++ b/src/plugins/dev_ige/queue.c @@ -0,0 +1,154 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2025 Damjan Marion + */ + +#include +#include +#include +#include +#include +#include +#include + +VLIB_REGISTER_LOG_CLASS (ige_log, static) = { + .class_name = "ige", + .subclass_name = "queue", +}; + +vnet_dev_rv_t +ige_rx_queue_alloc (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq) +{ + vnet_dev_t *dev = rxq->port->dev; + ige_device_t *id = vnet_dev_get_data (dev); + ige_rxq_t *iq = vnet_dev_get_rx_queue_data (rxq); + vnet_dev_rv_t rv; + + if (id->avail_rxq_bmp == 0) + { + log_err (dev, "no free RX queues (requested size %u)", rxq->size); + return VNET_DEV_ERR_RESOURCE_NOT_AVAILABLE; + } + + rxq->queue_id = get_lowest_set_bit_index (id->avail_rxq_bmp); + id->avail_rxq_bmp ^= 1 << rxq->queue_id; + + iq->buffer_indices = clib_mem_alloc_aligned ( + rxq->size * sizeof (iq->buffer_indices[0]), CLIB_CACHE_LINE_BYTES); + + if (iq->buffer_indices == 0) + { + id->avail_rxq_bmp |= 1 << rxq->queue_id; + log_err (dev, "queue %u buffer ring alloc failed (ring size %u)", + rxq->queue_id, rxq->size); + return VNET_DEV_ERR_RESOURCE_NOT_AVAILABLE; + } + + clib_memset_u32 (iq->buffer_indices, 0, rxq->size); + + rv = vnet_dev_dma_mem_alloc (vm, dev, sizeof (ige_rx_desc_t) * rxq->size, 0, + (void **) &iq->descs); + if (rv != VNET_DEV_OK) + { + clib_mem_free (iq->buffer_indices); + iq->buffer_indices = 0; + id->avail_rxq_bmp |= 1 << rxq->queue_id; + log_err (dev, "queue %u DMA descriptor alloc failed (rv %d)", + rxq->queue_id, rv); + return rv; + } + + log_debug (dev, "rx queue %u allocated (size %u)", rxq->queue_id, rxq->size); + return rv; +} + +vnet_dev_rv_t +ige_tx_queue_alloc (vlib_main_t *vm, vnet_dev_tx_queue_t *txq) +{ + vnet_dev_t *dev = txq->port->dev; + ige_device_t *id = vnet_dev_get_data (dev); + ige_txq_t *iq = vnet_dev_get_tx_queue_data (txq); + vnet_dev_rv_t rv = VNET_DEV_OK; + + if (id->avail_txq_bmp == 0) + { + log_err (dev, "no free TX queues (requested size %u)", txq->size); + return VNET_DEV_ERR_RESOURCE_NOT_AVAILABLE; + } + txq->queue_id = get_lowest_set_bit_index (id->avail_txq_bmp); + id->avail_txq_bmp ^= 1 << txq->queue_id; + iq->buffer_indices = clib_mem_alloc_aligned ( + txq->size * sizeof (iq->buffer_indices[0]), CLIB_CACHE_LINE_BYTES); + + if (iq->buffer_indices == 0) + { + rv = VNET_DEV_ERR_RESOURCE_NOT_AVAILABLE; + goto done; + } + + rv = vnet_dev_dma_mem_alloc (vm, dev, sizeof (ige_tx_desc_t) * txq->size, 0, + (void **) &iq->descs); + + if (rv != VNET_DEV_OK) + goto done; + + rv = vnet_dev_dma_mem_alloc (vm, dev, CLIB_CACHE_LINE_BYTES, + CLIB_CACHE_LINE_BYTES, (void **) &iq->wb); + + if (rv != VNET_DEV_OK) + goto done; + + log_debug (dev, "tx queue %u allocated (size %u)", txq->queue_id, txq->size); + +done: + if (rv != VNET_DEV_OK) + { + if (iq->wb) + vnet_dev_dma_mem_free (vm, dev, iq->wb); + if (iq->descs) + vnet_dev_dma_mem_free (vm, dev, iq->descs); + if (iq->buffer_indices) + clib_mem_free (iq->buffer_indices); + + id->avail_txq_bmp |= 1 << txq->queue_id; + log_err (dev, "queue %u allocation failed (rv %d)", txq->queue_id, rv); + } + + return rv; +} + +void +ige_rx_queue_free (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq) +{ + vnet_dev_t *dev = rxq->port->dev; + ige_device_t *id = vnet_dev_get_data (dev); + ige_rxq_t *iq = vnet_dev_get_rx_queue_data (rxq); + + id->avail_rxq_bmp |= 1 << rxq->queue_id; + vnet_dev_dma_mem_free (vm, dev, iq->descs); + iq->descs = 0; + + if (iq->buffer_indices) + { + clib_mem_free (iq->buffer_indices); + iq->buffer_indices = 0; + } +} + +void +ige_tx_queue_free (vlib_main_t *vm, vnet_dev_tx_queue_t *txq) +{ + vnet_dev_t *dev = txq->port->dev; + ige_device_t *id = vnet_dev_get_data (dev); + ige_txq_t *iq = vnet_dev_get_tx_queue_data (txq); + + id->avail_txq_bmp |= 1 << txq->queue_id; + + if (iq->descs) + vnet_dev_dma_mem_free (vm, dev, iq->descs); + + if (iq->buffer_indices) + clib_mem_free (iq->buffer_indices); + + if (iq->wb) + vnet_dev_dma_mem_free (vm, dev, iq->wb); +} diff --git a/src/plugins/dev_ige/reg.c b/src/plugins/dev_ige/reg.c new file mode 100644 index 00000000000..8f0f1619d0e --- /dev/null +++ b/src/plugins/dev_ige/reg.c @@ -0,0 +1,102 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2025 Damjan Marion + */ + +#include +#include +#include +#include +#include +#include +#include + +VLIB_REGISTER_LOG_CLASS (ige_log, static) = { + .class_name = "ige", + .subclass_name = "reg", +}; + +vnet_dev_rv_t +ige_reg_poll (vlib_main_t *vm, vnet_dev_t *dev, u32 reg, u32 mask, u32 match, + f64 initial_delay, f64 timeout) +{ + f64 t0 = vlib_time_now (vm); + u32 val; + + for (f64 delay = initial_delay, total_time = delay; total_time < timeout; + delay *= 2, total_time += delay) + { + ige_reg_rd (dev, reg, &val); + if ((val & mask) == match) + { + log_debug (dev, "reg %05x (suspend %.6f)", reg, + vlib_time_now (vm) - t0); + return 1; + } + vlib_process_suspend (vm, delay); + } + log_debug (dev, "reg %05x timeout", reg); + return 0; +} + +void +ige_reg_sw_fw_sync_release (vlib_main_t *vm, vnet_dev_t *dev) +{ + ige_reg_swsm_t swsm; + log_debug (dev, ""); + ige_reg_rd (dev, IGE_REG_SWSM, &swsm.as_u32); + swsm.smbi = 0; + swsm.swesmbi = 0; + ige_reg_wr (dev, IGE_REG_SWSM, swsm.as_u32); +} + +int +ige_reg_sw_fw_sync_acquire (vlib_main_t *vm, vnet_dev_t *dev) +{ + ige_reg_swsm_t swsm; + int i, timeout = 10; + + log_debug (dev, ""); + for (i = 0; i < timeout * 2; i++) + { + ige_reg_rd (dev, IGE_REG_SWSM, &swsm.as_u32); + if (swsm.smbi == 0) + break; + + if (i == timeout - 1) + { + log_debug (dev, "timeout, attempt to clear SWSM"); + swsm.smbi = 0; + swsm.swesmbi = 0; + ige_reg_wr (dev, IGE_REG_SWSM, swsm.as_u32); + } + vlib_process_suspend (vm, 5e-5); + } + + if (i == timeout * 2) + { + log_debug (dev, "timeout acquiring SWSM"); + return 0; + } + + for (i = 0; i < timeout; i++) + { + swsm.swesmbi = 1; + ige_reg_wr (dev, IGE_REG_SWSM, swsm.as_u32); + ige_reg_rd (dev, IGE_REG_SWSM, &swsm.as_u32); + if (swsm.swesmbi == 1) + break; + vlib_process_suspend (vm, 5e-5); + } + + if (i == timeout) + { + swsm.smbi = 0; + swsm.swesmbi = 0; + ige_reg_wr (dev, IGE_REG_SWSM, swsm.as_u32); + log_debug (dev, "timeout acquiring SWSMBI"); + return 0; + } + + log_debug (dev, "acquired"); + return 1; +} diff --git a/src/plugins/dev_ige/rx_node.c b/src/plugins/dev_ige/rx_node.c new file mode 100644 index 00000000000..6c8680bee23 --- /dev/null +++ b/src/plugins/dev_ige/rx_node.c @@ -0,0 +1,255 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2025 Damjan Marion + */ + +#include "vppinfra/clib.h" +#include +#include +#include +#include +#include + +static_always_inline void +ige_rxq_refill (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq, int use_va_dma) +{ + ige_rxq_t *iq = vnet_dev_get_rx_queue_data (rxq); + u16 n, off, n_before_wrap, size, mask, n_refill, tail; + u8 buffer_pool_index = vnet_dev_get_rx_queue_buffer_pool_index (rxq); + + tail = iq->tail; + size = rxq->size; + + n_refill = iq->head + size - tail; + + if (n_refill < 8) + return; + + mask = size - 1; + off = tail & mask; + n_before_wrap = size - off; + n = clib_min (n_refill, n_before_wrap); + + n = ige_rxq_refill_no_wrap (vm, iq->buffer_indices + off, iq->descs + off, n, + buffer_pool_index, use_va_dma); + tail += n; + + if (n == n_before_wrap) + tail += ige_rxq_refill_no_wrap (vm, iq->buffer_indices, iq->descs, + n_refill - n_before_wrap, + buffer_pool_index, use_va_dma); + + if (iq->tail != tail) + { + __atomic_store_n (iq->reg_rdt, tail & mask, __ATOMIC_RELEASE); + iq->tail = tail; + } +} + +static_always_inline u64 +ige_rx_deq_64_desc (vlib_main_t *vm, vlib_node_runtime_t *node, + vnet_dev_rx_queue_t *rxq, vlib_buffer_template_t bt, + u32 *to, u32 max_pkts, u32 *n_rx_bytes, u32 *n_trace) +{ + u16 mask = rxq->size - 1; + ige_rxq_t *iq = vnet_dev_get_rx_queue_data (rxq); + u16 head = iq->head; + u16 slot = head & mask; + ige_rx_desc_t dc[64], *descs = iq->descs, *d = descs + slot; + u32 bc[64], *buffer_indices = iq->buffer_indices, + *bi = buffer_indices + slot; + vlib_buffer_t *buffers[64]; + u32 n_descs = 0, n_pkts = 0; + + while (d->dd && n_descs < ARRAY_LEN (dc) && n_pkts < max_pkts) + { + dc[n_descs] = *d; + bc[n_descs] = *bi; + n_pkts += d->eop; + n_descs++; + slot = (slot + 1) & mask; + d = descs + slot; + bi = buffer_indices + slot; + } + + if (n_pkts == 0) + return 0; + + /* remove descriptors from incomplete packets */ + while (dc[n_descs - 1].eop == 0) + n_descs--; + + /* advance head */ + iq->head += n_descs; + + vlib_get_buffers (vm, bc, buffers, n_descs); + + for (int i = 0; i < n_descs; i++) + { + u32 len = dc[i].pkt_len; + buffers[i]->template = bt; + buffers[i]->current_length = len; + *n_rx_bytes += len; + } + + if (n_pkts < n_descs) + { + u32 hi = 0; /* head index */ + u32 tlnif = 0; /* total length not including first buffer */ + + for (int i = 0; i < n_descs; i++) + { + if (i > hi) + { + buffers[i - 1]->next_buffer = bc[i]; + buffers[i - 1]->flags |= VLIB_BUFFER_NEXT_PRESENT; + tlnif += dc[i].pkt_len; + } + if (dc[i].eop) + { + to++[0] = bc[hi]; + if (tlnif) + { + buffers[hi]->total_length_not_including_first_buffer = tlnif; + buffers[hi]->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID; + tlnif = 0; + } + hi = i + 1; + } + } + } + else + vlib_buffer_copy_indices (to, bc, n_pkts); + + if (PREDICT_FALSE (*n_trace)) + for (u32 i = 0; i 0; i++) + { + vlib_buffer_t *b = buffers[i]; + u32 next_index, hw_if_index; + + if (b == 0) + continue; + + next_index = vnet_dev_get_rx_queue_if_next_index (rxq); + hw_if_index = vnet_dev_get_rx_queue_if_hw_if_index (rxq); + + if (PREDICT_TRUE (vlib_trace_buffer (vm, node, next_index, b, 0))) + { + ige_rx_trace_t *tr = vlib_add_trace (vm, node, b, sizeof (*tr)); + tr->next_index = next_index; + tr->hw_if_index = hw_if_index; + tr->queue_id = rxq->queue_id; + tr->buffer_index = bc[i]; + tr->desc = dc[i]; + (*n_trace)--; + } + } + + return n_pkts; +} + +static_always_inline u32 +ige_rx_one_queue (vlib_main_t *vm, vlib_node_runtime_t *node, + vnet_dev_rx_queue_t *rxq) +{ + ige_rxq_t *iq = vnet_dev_get_rx_queue_data (rxq); + u16 next_index = vnet_dev_get_rx_queue_if_next_index (rxq); + vlib_buffer_template_t bt; + vnet_main_t *vnm; + u32 n_trace, sw_if_index, n_rx = 0, n_rx_bytes = 0, *to_next, n; + uword n_left_to_next; + u16 mask = rxq->size - 1; + u16 slot = iq->head & mask; + ige_rx_desc_t *d = iq->descs + slot; + + if (d->dd == 0) + return 0; + + while (d->eop == 0) + { + slot = (slot + 1) & mask; + d = iq->descs + slot; + if (d->dd == 0) + return 0; + } + + bt = vnet_dev_get_rx_queue_if_buffer_template (rxq); + + vlib_get_new_next_frame (vm, node, next_index, to_next, n_left_to_next); + n_trace = vlib_get_trace_count (vm, node); + + while (n_left_to_next >= 64) + { + n = ige_rx_deq_64_desc (vm, node, rxq, bt, to_next, 64, &n_rx_bytes, + &n_trace); + + n_rx += n; + + to_next += n; + n_left_to_next -= n; + if (n < (64 - 3)) + goto rxq_empty; + } + + if (n_left_to_next > 0) + { + n = ige_rx_deq_64_desc (vm, node, rxq, bt, to_next, n_left_to_next, + &n_rx_bytes, &n_trace); + + n_rx += n; + + to_next += n; + n_left_to_next -= n; + } + +rxq_empty: + + vlib_set_trace_count (vm, node, n_trace); + sw_if_index = vnet_dev_get_rx_queue_if_sw_if_index (rxq); + + if (PREDICT_TRUE (next_index == VNET_DEV_ETH_RX_PORT_NEXT_ETH_INPUT)) + { + vlib_next_frame_t *nf; + vlib_frame_t *f; + ethernet_input_frame_t *ef; + nf = vlib_node_runtime_get_next_frame (vm, node, next_index); + f = vlib_get_frame (vm, nf->frame); + f->flags = ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX; + + ef = vlib_frame_scalar_args (f); + ef->sw_if_index = sw_if_index; + ef->hw_if_index = vnet_dev_get_rx_queue_if_hw_if_index (rxq); + + // if ((or_qw1 & mask_ipe.as_u64) == 0) f->flags |= + // ETH_INPUT_FRAME_F_IP4_CKSUM_OK; + vlib_frame_no_append (f); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + + vnm = vnet_get_main (); + vlib_increment_combined_counter ( + vnm->interface_main.combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, + vm->thread_index, sw_if_index, n_rx, n_rx_bytes); + + return n_rx; +} + +VNET_DEV_NODE_FN (ige_rx_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + uint32_t rv = 0; + foreach_vnet_dev_rx_queue_runtime (rxq, node) + { + vnet_dev_t *dev = rxq->port->dev; + + rv += ige_rx_one_queue (vm, node, rxq); + + /* refill RX queue */ + if (dev->va_dma) + ige_rxq_refill (vm, rxq, /*use_va_dma */ 1); + else + ige_rxq_refill (vm, rxq, /*use_va_dma */ 0); + } + + return rv; +} diff --git a/src/plugins/dev_ige/tx_node.c b/src/plugins/dev_ige/tx_node.c new file mode 100644 index 00000000000..e85250d814d --- /dev/null +++ b/src/plugins/dev_ige/tx_node.c @@ -0,0 +1,201 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2025 Damjan Marion + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +static_always_inline void +ige_enq_txd (vlib_main_t *vm, vlib_node_runtime_t *n, vnet_dev_tx_queue_t *txq, + vlib_buffer_t *b, u32 bi, int first, int last, + ige_tx_desc_t *descs, u32 *buffer_indices, u16 *tail, u16 mask, + int use_va, int trace) +{ + u32 len = b->current_length; + u32 slot = *tail & mask; + ige_tx_desc_t d = { + .eop = last ? 1 : 0, + .rs = last ? 1 : 0, + .ifcs = 1, + .dtyp = 0b0011, + .dtalen = len, + }; + d.addr = use_va ? vlib_buffer_get_current_va (b) : + vlib_buffer_get_current_pa (vm, b); + if (first) + d.paylen = last ? len : len + b->total_length_not_including_first_buffer; + + if (trace && b->flags & VLIB_BUFFER_IS_TRACED) + { + ige_tx_trace_t *t = vlib_add_trace (vm, n, b, sizeof (*t)); + t->desc = d; + t->hw_if_index = vnet_dev_get_tx_queue_if_hw_if_index (txq); + t->queue_id = txq->queue_id; + t->buffer_index = bi; + } + + descs[slot] = d; + buffer_indices[slot] = bi; + (*tail)++; +} + +static_always_inline void +ige_txq_complete (vlib_main_t *vm, vnet_dev_tx_queue_t *txq) +{ + ige_txq_t *itq = vnet_dev_get_tx_queue_data (txq); + + u16 head = itq->head; + u16 tail = itq->tail; + u16 n_free; + + if (head == tail) + return; + + u32 new_head = __atomic_load_n (itq->wb, __ATOMIC_ACQUIRE); + u16 mask = txq->size - 1; + n_free = (new_head - head) & mask; + n_free &= 0xfff0; + + if (!n_free) + return; + + vlib_buffer_free_from_ring_no_next (vm, itq->buffer_indices, head & mask, + txq->size, n_free); + + itq->head = head + n_free; +} + +static_always_inline u32 +ige_txq_enq (vlib_main_t *vm, vlib_node_runtime_t *node, + vnet_dev_tx_queue_t *txq, u32 *from, u32 max_pkts, int va, int tr) +{ + ige_txq_t *const itq = vnet_dev_get_tx_queue_data (txq); + ige_tx_desc_t *const d = itq->descs; + u32 *const bi = itq->buffer_indices; + const u16 size = txq->size; + const u16 mask = size - 1; + u16 n_pkts = 0; + u32 drop_too_long[VLIB_FRAME_SIZE], n_drop_too_long = 0; + + ige_txq_complete (vm, txq); + + u16 head = itq->head; + u16 tail = itq->tail; + const u32 max_tail = head + size; + + while (n_pkts < max_pkts && tail < max_tail) + { + u32 hbi = from[n_pkts]; + vlib_buffer_t *b = vlib_get_buffer (vm, hbi); + u32 i; + + if (b->flags & VLIB_BUFFER_NEXT_PRESENT) + { + u32 tbi[4] = { + [0] = b->next_buffer, + }; + vlib_buffer_t *tb[4] = { + [0] = vlib_get_buffer (vm, b->next_buffer), + }; + u32 n = 1; + + while (tb[n - 1]->flags & VLIB_BUFFER_NEXT_PRESENT) + { + if (n >= ARRAY_LEN (tbi)) + { + drop_too_long[n_drop_too_long++] = hbi; + goto next; + } + + tbi[n] = tb[n - 1]->next_buffer; + tb[n] = vlib_get_buffer (vm, tbi[n]); + n++; + } + + if (tail + n + 1 > max_tail) + break; + + ige_enq_txd (vm, node, txq, b, hbi, 1, 0, d, bi, &tail, mask, va, + tr); + for (i = 0; i + 1 < n; i++) + ige_enq_txd (vm, node, txq, tb[i], tbi[i], 0, 0, d, bi, &tail, + mask, va, tr); + ige_enq_txd (vm, node, txq, tb[i], tbi[i], 0, 1, d, bi, &tail, mask, + va, tr); + } + else + ige_enq_txd (vm, node, txq, b, hbi, 1, 1, d, bi, &tail, mask, va, tr); + + next: + n_pkts++; + } + + if (n_drop_too_long) + { + vlib_error_count (vm, node->node_index, + IGE_TX_NODE_CTR_BUFFER_CHAIN_TOO_LONG, + n_drop_too_long); + vlib_buffer_free (vm, drop_too_long, n_drop_too_long); + } + + if (itq->tail != tail) + { + __atomic_store_n (itq->reg_tdt, tail & mask, __ATOMIC_RELEASE); + itq->tail = tail; + } + + return n_pkts; +} + +VNET_DEV_NODE_FN (ige_tx_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + vnet_dev_tx_node_runtime_t *rt = vnet_dev_get_tx_node_runtime (node); + vnet_dev_tx_queue_t *txq = rt->tx_queue; + vnet_dev_t *dev = txq->port->dev; + u32 *from = vlib_frame_vector_args (frame); + u16 n, n_left; + int n_reties = 2; + + n_left = frame->n_vectors; + + vnet_dev_tx_queue_lock_if_needed (txq); + + while (n_reties--) + { + if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE)) + n = ige_txq_enq (vm, node, txq, from, n_left, dev->va_dma != 0, 1); + else if (dev->va_dma) + n = ige_txq_enq (vm, node, txq, from, n_left, 1, 0); + else + n = ige_txq_enq (vm, node, txq, from, n_left, 0, 0); + + from += n; + n_left -= n; + + if (n == 0 || n == n_left) + break; + } + + if (n_left) + { + fformat (stderr, "no_free_slots %u\n", n_left); + vlib_buffer_free (vm, from, n_left); + vlib_error_count (vm, node->node_index, IGE_TX_NODE_CTR_NO_FREE_SLOTS, + n_left); + } + + vnet_dev_tx_queue_unlock_if_needed (txq); + + return frame->n_vectors - n_left; +}