2 * Copyright (c) 2016 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
18 * This driver is not intended for production use and it is unsupported.
19 * It is provided for educational use only.
20 * Please use supported DPDK driver instead.
23 #include <vppinfra/vector.h>
25 #ifndef CLIB_HAVE_VEC128
26 #warning HACK: ixge driver wont really work, missing u32x4
27 typedef unsigned long long u32x4;
30 #include <vlib/vlib.h>
31 #include <vlib/unix/unix.h>
32 #include <vlib/pci/pci.h>
33 #include <vnet/vnet.h>
34 #include <vnet/devices/nic/ixge.h>
35 #include <vnet/ethernet/ethernet.h>
37 #define IXGE_ALWAYS_POLL 0
39 #define EVENT_SET_FLAGS 0
40 #define IXGE_HWBP_RACE_ELOG 0
42 #define PCI_VENDOR_ID_INTEL 0x8086
44 /* 10 GIG E (XGE) PHY IEEE 802.3 clause 45 definitions. */
45 #define XGE_PHY_DEV_TYPE_PMA_PMD 1
46 #define XGE_PHY_DEV_TYPE_PHY_XS 4
47 #define XGE_PHY_ID1 0x2
48 #define XGE_PHY_ID2 0x3
49 #define XGE_PHY_CONTROL 0x0
50 #define XGE_PHY_CONTROL_RESET (1 << 15)
52 ixge_main_t ixge_main;
53 static vlib_node_registration_t ixge_input_node;
54 static vlib_node_registration_t ixge_process_node;
56 static void ixge_semaphore_get (ixge_device_t * xd)
58 ixge_main_t * xm = &ixge_main;
59 vlib_main_t * vm = xm->vlib_main;
60 ixge_regs_t * r = xd->regs;
64 while (! (r->software_semaphore & (1 << 0)))
67 vlib_process_suspend (vm, 100e-6);
71 r->software_semaphore |= 1 << 1;
72 } while (! (r->software_semaphore & (1 << 1)));
75 static void ixge_semaphore_release (ixge_device_t * xd)
77 ixge_regs_t * r = xd->regs;
78 r->software_semaphore &= ~3;
81 static void ixge_software_firmware_sync (ixge_device_t * xd, u32 sw_mask)
83 ixge_main_t * xm = &ixge_main;
84 vlib_main_t * vm = xm->vlib_main;
85 ixge_regs_t * r = xd->regs;
86 u32 fw_mask = sw_mask << 5;
91 ixge_semaphore_get (xd);
92 m = r->software_firmware_sync;
93 done = (m & fw_mask) == 0;
95 r->software_firmware_sync = m | sw_mask;
96 ixge_semaphore_release (xd);
98 vlib_process_suspend (vm, 10e-3);
102 static void ixge_software_firmware_sync_release (ixge_device_t * xd, u32 sw_mask)
104 ixge_regs_t * r = xd->regs;
105 ixge_semaphore_get (xd);
106 r->software_firmware_sync &= ~sw_mask;
107 ixge_semaphore_release (xd);
110 u32 ixge_read_write_phy_reg (ixge_device_t * xd, u32 dev_type, u32 reg_index, u32 v, u32 is_read)
112 ixge_regs_t * r = xd->regs;
113 const u32 busy_bit = 1 << 30;
116 ASSERT (xd->phy_index < 2);
117 ixge_software_firmware_sync (xd, 1 << (1 + xd->phy_index));
119 ASSERT (reg_index < (1 << 16));
120 ASSERT (dev_type < (1 << 5));
122 r->xge_mac.phy_data = v;
125 x = reg_index | (dev_type << 16) | (xd->phys[xd->phy_index].mdio_address << 21);
126 r->xge_mac.phy_command = x | busy_bit;
127 /* Busy wait timed to take 28e-6 secs. No suspend. */
128 while (r->xge_mac.phy_command & busy_bit)
131 r->xge_mac.phy_command = x | ((is_read ? 2 : 1) << 26) | busy_bit;
132 while (r->xge_mac.phy_command & busy_bit)
136 v = r->xge_mac.phy_data >> 16;
138 ixge_software_firmware_sync_release (xd, 1 << (1 + xd->phy_index));
143 static u32 ixge_read_phy_reg (ixge_device_t * xd, u32 dev_type, u32 reg_index)
144 { return ixge_read_write_phy_reg (xd, dev_type, reg_index, 0, /* is_read */ 1); }
146 static void ixge_write_phy_reg (ixge_device_t * xd, u32 dev_type, u32 reg_index, u32 v)
147 { (void) ixge_read_write_phy_reg (xd, dev_type, reg_index, v, /* is_read */ 0); }
149 static void ixge_i2c_put_bits (i2c_bus_t * b, int scl, int sda)
151 ixge_main_t * xm = &ixge_main;
152 ixge_device_t * xd = vec_elt_at_index (xm->devices, b->private_data);
156 v |= (sda != 0) << 3;
157 v |= (scl != 0) << 1;
158 xd->regs->i2c_control = v;
161 static void ixge_i2c_get_bits (i2c_bus_t * b, int * scl, int * sda)
163 ixge_main_t * xm = &ixge_main;
164 ixge_device_t * xd = vec_elt_at_index (xm->devices, b->private_data);
167 v = xd->regs->i2c_control;
168 *sda = (v & (1 << 2)) != 0;
169 *scl = (v & (1 << 0)) != 0;
172 static u16 ixge_read_eeprom (ixge_device_t * xd, u32 address)
174 ixge_regs_t * r = xd->regs;
176 r->eeprom_read = ((/* start bit */ (1 << 0)) | (address << 2));
177 /* Wait for done bit. */
178 while (! ((v = r->eeprom_read) & (1 << 1)))
184 ixge_sfp_enable_disable_laser (ixge_device_t * xd, uword enable)
186 u32 tx_disable_bit = 1 << 3;
188 xd->regs->sdp_control &= ~tx_disable_bit;
190 xd->regs->sdp_control |= tx_disable_bit;
194 ixge_sfp_enable_disable_10g (ixge_device_t * xd, uword enable)
196 u32 is_10g_bit = 1 << 5;
198 xd->regs->sdp_control |= is_10g_bit;
200 xd->regs->sdp_control &= ~is_10g_bit;
203 static clib_error_t *
204 ixge_sfp_phy_init_from_eeprom (ixge_device_t * xd, u16 sfp_type)
206 u16 a, id, reg_values_addr = 0;
208 a = ixge_read_eeprom (xd, 0x2b);
209 if (a == 0 || a == 0xffff)
210 return clib_error_create ("no init sequence in eeprom");
214 id = ixge_read_eeprom (xd, ++a);
217 reg_values_addr = ixge_read_eeprom (xd, ++a);
222 return clib_error_create ("failed to find id 0x%x", sfp_type);
224 ixge_software_firmware_sync (xd, 1 << 3);
227 u16 v = ixge_read_eeprom (xd, ++reg_values_addr);
230 xd->regs->core_analog_config = v;
232 ixge_software_firmware_sync_release (xd, 1 << 3);
234 /* Make sure laser is off. We'll turn on the laser when
235 the interface is brought up. */
236 ixge_sfp_enable_disable_laser (xd, /* enable */ 0);
237 ixge_sfp_enable_disable_10g (xd, /* is_10g */ 1);
243 ixge_sfp_device_up_down (ixge_device_t * xd, uword is_up)
249 /* pma/pmd 10g serial SFI. */
250 xd->regs->xge_mac.auto_negotiation_control2 &= ~(3 << 16);
251 xd->regs->xge_mac.auto_negotiation_control2 |= 2 << 16;
253 v = xd->regs->xge_mac.auto_negotiation_control;
256 /* Restart autoneg. */
258 xd->regs->xge_mac.auto_negotiation_control = v;
260 while (! (xd->regs->xge_mac.link_partner_ability[0] & 0xf0000))
263 v = xd->regs->xge_mac.auto_negotiation_control;
265 /* link mode 10g sfi serdes */
269 /* Restart autoneg. */
271 xd->regs->xge_mac.auto_negotiation_control = v;
273 xd->regs->xge_mac.link_status;
276 ixge_sfp_enable_disable_laser (xd, /* enable */ is_up);
278 /* Give time for link partner to notice that we're up. */
280 vlib_in_process_context(vlib_get_main())) {
281 vlib_process_suspend (vlib_get_main(), 300e-3);
285 always_inline ixge_dma_regs_t *
286 get_dma_regs (ixge_device_t * xd, vlib_rx_or_tx_t rt, u32 qi)
288 ixge_regs_t * r = xd->regs;
291 return qi < 64 ? &r->rx_dma0[qi] : &r->rx_dma1[qi - 64];
293 return &r->tx_dma[qi];
296 static clib_error_t *
297 ixge_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
299 vnet_hw_interface_t * hif = vnet_get_hw_interface (vnm, hw_if_index);
300 uword is_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
301 ixge_main_t * xm = &ixge_main;
302 ixge_device_t * xd = vec_elt_at_index (xm->devices, hif->dev_instance);
303 ixge_dma_regs_t * dr = get_dma_regs (xd, VLIB_RX, 0);
307 xd->regs->rx_enable |= 1;
308 xd->regs->tx_dma_control |= 1;
309 dr->control |= 1 << 25;
310 while (! (dr->control & (1 << 25)))
315 xd->regs->rx_enable &= ~1;
316 xd->regs->tx_dma_control &= ~1;
319 ixge_sfp_device_up_down (xd, is_up);
321 return /* no error */ 0;
324 static void ixge_sfp_phy_init (ixge_device_t * xd)
326 ixge_phy_t * phy = xd->phys + xd->phy_index;
327 i2c_bus_t * ib = &xd->i2c_bus;
329 ib->private_data = xd->device_index;
330 ib->put_bits = ixge_i2c_put_bits;
331 ib->get_bits = ixge_i2c_get_bits;
334 vlib_i2c_read_eeprom (ib, 0x50, 0, 128, (u8 *) &xd->sfp_eeprom);
336 if ( vlib_i2c_bus_timed_out(ib) || ! sfp_eeprom_is_valid (&xd->sfp_eeprom))
337 xd->sfp_eeprom.id = SFP_ID_unknown;
340 /* FIXME 5 => SR/LR eeprom ID. */
341 clib_error_t * e = ixge_sfp_phy_init_from_eeprom (xd, 5 + xd->pci_function);
343 clib_error_report (e);
346 phy->mdio_address = ~0;
349 static void ixge_phy_init (ixge_device_t * xd)
351 ixge_main_t * xm = &ixge_main;
352 vlib_main_t * vm = xm->vlib_main;
353 ixge_phy_t * phy = xd->phys + xd->phy_index;
355 switch (xd->device_id)
358 case IXGE_82599_sfp_em:
359 case IXGE_82599_sfp_fcoe:
361 return ixge_sfp_phy_init (xd);
367 /* Probe address of phy. */
371 phy->mdio_address = ~0;
372 for (i = 0; i < 32; i++)
374 phy->mdio_address = i;
375 v = ixge_read_phy_reg (xd, XGE_PHY_DEV_TYPE_PMA_PMD, XGE_PHY_ID1);
376 if (v != 0xffff && v != 0)
385 phy->id = ((ixge_read_phy_reg (xd, XGE_PHY_DEV_TYPE_PMA_PMD, XGE_PHY_ID1) << 16)
386 | ixge_read_phy_reg (xd, XGE_PHY_DEV_TYPE_PMA_PMD, XGE_PHY_ID2));
389 ELOG_TYPE_DECLARE (e) = {
390 .function = (char *) __FUNCTION__,
391 .format = "ixge %d, phy id 0x%d mdio address %d",
392 .format_args = "i4i4i4",
394 struct { u32 instance, id, address; } * ed;
395 ed = ELOG_DATA (&vm->elog_main, e);
396 ed->instance = xd->device_index;
398 ed->address = phy->mdio_address;
402 ixge_write_phy_reg (xd, XGE_PHY_DEV_TYPE_PHY_XS, XGE_PHY_CONTROL, XGE_PHY_CONTROL_RESET);
404 /* Wait for self-clearning reset bit to clear. */
406 vlib_process_suspend (vm, 1e-3);
407 } while (ixge_read_phy_reg (xd, XGE_PHY_DEV_TYPE_PHY_XS, XGE_PHY_CONTROL) & XGE_PHY_CONTROL_RESET);
410 static u8 * format_ixge_rx_from_hw_descriptor (u8 * s, va_list * va)
412 ixge_rx_from_hw_descriptor_t * d = va_arg (*va, ixge_rx_from_hw_descriptor_t *);
413 u32 s0 = d->status[0], s2 = d->status[2];
414 u32 is_ip4, is_ip6, is_ip, is_tcp, is_udp;
415 uword indent = format_get_indent (s);
417 s = format (s, "%s-owned",
418 (s2 & IXGE_RX_DESCRIPTOR_STATUS2_IS_OWNED_BY_SOFTWARE) ? "sw" : "hw");
419 s = format (s, ", length this descriptor %d, l3 offset %d",
420 d->n_packet_bytes_this_descriptor,
421 IXGE_RX_DESCRIPTOR_STATUS0_L3_OFFSET (s0));
422 if (s2 & IXGE_RX_DESCRIPTOR_STATUS2_IS_END_OF_PACKET)
423 s = format (s, ", end-of-packet");
425 s = format (s, "\n%U", format_white_space, indent);
427 if (s2 & IXGE_RX_DESCRIPTOR_STATUS2_ETHERNET_ERROR)
428 s = format (s, "layer2 error");
430 if (s0 & IXGE_RX_DESCRIPTOR_STATUS0_IS_LAYER2)
432 s = format (s, "layer 2 type %d", (s0 & 0x1f));
436 if (s2 & IXGE_RX_DESCRIPTOR_STATUS2_IS_VLAN)
437 s = format (s, "vlan header 0x%x\n%U", d->vlan_tag,
438 format_white_space, indent);
440 if ((is_ip4 = (s0 & IXGE_RX_DESCRIPTOR_STATUS0_IS_IP4)))
442 s = format (s, "ip4%s",
443 (s0 & IXGE_RX_DESCRIPTOR_STATUS0_IS_IP4_EXT) ? " options" : "");
444 if (s2 & IXGE_RX_DESCRIPTOR_STATUS2_IS_IP4_CHECKSUMMED)
445 s = format (s, " checksum %s",
446 (s2 & IXGE_RX_DESCRIPTOR_STATUS2_IP4_CHECKSUM_ERROR) ? "bad" : "ok");
448 if ((is_ip6 = (s0 & IXGE_RX_DESCRIPTOR_STATUS0_IS_IP6)))
449 s = format (s, "ip6%s",
450 (s0 & IXGE_RX_DESCRIPTOR_STATUS0_IS_IP6_EXT) ? " extended" : "");
452 if ((is_ip = (is_ip4 | is_ip6)))
454 is_tcp = (s0 & IXGE_RX_DESCRIPTOR_STATUS0_IS_TCP) != 0;
455 is_udp = (s0 & IXGE_RX_DESCRIPTOR_STATUS0_IS_UDP) != 0;
457 s = format (s, ", tcp");
459 s = format (s, ", udp");
462 if (s2 & IXGE_RX_DESCRIPTOR_STATUS2_IS_TCP_CHECKSUMMED)
463 s = format (s, ", tcp checksum %s",
464 (s2 & IXGE_RX_DESCRIPTOR_STATUS2_TCP_CHECKSUM_ERROR) ? "bad" : "ok");
465 if (s2 & IXGE_RX_DESCRIPTOR_STATUS2_IS_UDP_CHECKSUMMED)
466 s = format (s, ", udp checksum %s",
467 (s2 & IXGE_RX_DESCRIPTOR_STATUS2_UDP_CHECKSUM_ERROR) ? "bad" : "ok");
472 static u8 * format_ixge_tx_descriptor (u8 * s, va_list * va)
474 ixge_tx_descriptor_t * d = va_arg (*va, ixge_tx_descriptor_t *);
475 u32 s0 = d->status0, s1 = d->status1;
476 uword indent = format_get_indent (s);
479 s = format (s, "buffer 0x%Lx, %d packet bytes, %d bytes this buffer",
481 s1 >> 14, d->n_bytes_this_buffer);
483 s = format (s, "\n%U", format_white_space, indent);
485 if ((v = (s0 >> 0) & 3))
486 s = format (s, "reserved 0x%x, ", v);
488 if ((v = (s0 >> 2) & 3))
489 s = format (s, "mac 0x%x, ", v);
491 if ((v = (s0 >> 4) & 0xf) != 3)
492 s = format (s, "type 0x%x, ", v);
494 s = format (s, "%s%s%s%s%s%s%s%s",
495 (s0 & (1 << 8)) ? "eop, " : "",
496 (s0 & (1 << 9)) ? "insert-fcs, " : "",
497 (s0 & (1 << 10)) ? "reserved26, " : "",
498 (s0 & (1 << 11)) ? "report-status, " : "",
499 (s0 & (1 << 12)) ? "reserved28, " : "",
500 (s0 & (1 << 13)) ? "is-advanced, " : "",
501 (s0 & (1 << 14)) ? "vlan-enable, " : "",
502 (s0 & (1 << 15)) ? "tx-segmentation, " : "");
504 if ((v = s1 & 0xf) != 0)
505 s = format (s, "status 0x%x, ", v);
507 if ((v = (s1 >> 4) & 0xf))
508 s = format (s, "context 0x%x, ", v);
510 if ((v = (s1 >> 8) & 0x3f))
511 s = format (s, "options 0x%x, ", v);
517 ixge_descriptor_t before, after;
525 u8 is_start_of_packet;
527 /* Copy of VLIB buffer; packet data stored in pre_data. */
528 vlib_buffer_t buffer;
529 } ixge_rx_dma_trace_t;
531 static u8 * format_ixge_rx_dma_trace (u8 * s, va_list * va)
533 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
534 vlib_node_t * node = va_arg (*va, vlib_node_t *);
535 vnet_main_t * vnm = vnet_get_main();
536 ixge_rx_dma_trace_t * t = va_arg (*va, ixge_rx_dma_trace_t *);
537 ixge_main_t * xm = &ixge_main;
538 ixge_device_t * xd = vec_elt_at_index (xm->devices, t->device_index);
539 format_function_t * f;
540 uword indent = format_get_indent (s);
543 vnet_sw_interface_t * sw = vnet_get_sw_interface (vnm, xd->vlib_sw_if_index);
544 s = format (s, "%U rx queue %d",
545 format_vnet_sw_interface_name, vnm, sw,
549 s = format (s, "\n%Ubefore: %U",
550 format_white_space, indent,
551 format_ixge_rx_from_hw_descriptor, &t->before);
552 s = format (s, "\n%Uafter : head/tail address 0x%Lx/0x%Lx",
553 format_white_space, indent,
554 t->after.rx_to_hw.head_address,
555 t->after.rx_to_hw.tail_address);
557 s = format (s, "\n%Ubuffer 0x%x: %U",
558 format_white_space, indent,
560 format_vlib_buffer, &t->buffer);
562 s = format (s, "\n%U",
563 format_white_space, indent);
565 f = node->format_buffer;
566 if (! f || ! t->is_start_of_packet)
567 f = format_hex_bytes;
568 s = format (s, "%U", f, t->buffer.pre_data, sizeof (t->buffer.pre_data));
573 #define foreach_ixge_error \
574 _ (none, "no error") \
575 _ (tx_full_drops, "tx ring full drops") \
576 _ (ip4_checksum_error, "ip4 checksum errors") \
577 _ (rx_alloc_fail, "rx buf alloc from free list failed") \
578 _ (rx_alloc_no_physmem, "rx buf alloc failed no physmem")
581 #define _(f,s) IXGE_ERROR_##f,
588 ixge_rx_next_and_error_from_status_x1 (ixge_device_t *xd,
590 u8 * next0, u8 * error0, u32 * flags0)
592 u8 is0_ip4, is0_ip6, n0, e0;
595 e0 = IXGE_ERROR_none;
596 n0 = IXGE_RX_NEXT_ETHERNET_INPUT;
598 is0_ip4 = s02 & IXGE_RX_DESCRIPTOR_STATUS2_IS_IP4_CHECKSUMMED;
599 n0 = is0_ip4 ? IXGE_RX_NEXT_IP4_INPUT : n0;
601 e0 = (is0_ip4 && (s02 & IXGE_RX_DESCRIPTOR_STATUS2_IP4_CHECKSUM_ERROR)
602 ? IXGE_ERROR_ip4_checksum_error
605 is0_ip6 = s00 & IXGE_RX_DESCRIPTOR_STATUS0_IS_IP6;
606 n0 = is0_ip6 ? IXGE_RX_NEXT_IP6_INPUT : n0;
608 n0 = (xd->per_interface_next_index != ~0) ?
609 xd->per_interface_next_index : n0;
611 /* Check for error. */
612 n0 = e0 != IXGE_ERROR_none ? IXGE_RX_NEXT_DROP : n0;
614 f0 = ((s02 & (IXGE_RX_DESCRIPTOR_STATUS2_IS_TCP_CHECKSUMMED
615 | IXGE_RX_DESCRIPTOR_STATUS2_IS_UDP_CHECKSUMMED))
616 ? IP_BUFFER_L4_CHECKSUM_COMPUTED
619 f0 |= ((s02 & (IXGE_RX_DESCRIPTOR_STATUS2_TCP_CHECKSUM_ERROR
620 | IXGE_RX_DESCRIPTOR_STATUS2_UDP_CHECKSUM_ERROR))
622 : IP_BUFFER_L4_CHECKSUM_CORRECT);
630 ixge_rx_next_and_error_from_status_x2 (ixge_device_t *xd,
633 u8 * next0, u8 * error0, u32 * flags0,
634 u8 * next1, u8 * error1, u32 * flags1)
636 u8 is0_ip4, is0_ip6, n0, e0;
637 u8 is1_ip4, is1_ip6, n1, e1;
640 e0 = e1 = IXGE_ERROR_none;
641 n0 = n1 = IXGE_RX_NEXT_IP4_INPUT;
643 is0_ip4 = s02 & IXGE_RX_DESCRIPTOR_STATUS2_IS_IP4_CHECKSUMMED;
644 is1_ip4 = s12 & IXGE_RX_DESCRIPTOR_STATUS2_IS_IP4_CHECKSUMMED;
646 n0 = is0_ip4 ? IXGE_RX_NEXT_IP4_INPUT : n0;
647 n1 = is1_ip4 ? IXGE_RX_NEXT_IP4_INPUT : n1;
649 e0 = (is0_ip4 && (s02 & IXGE_RX_DESCRIPTOR_STATUS2_IP4_CHECKSUM_ERROR)
650 ? IXGE_ERROR_ip4_checksum_error
652 e1 = (is1_ip4 && (s12 & IXGE_RX_DESCRIPTOR_STATUS2_IP4_CHECKSUM_ERROR)
653 ? IXGE_ERROR_ip4_checksum_error
656 is0_ip6 = s00 & IXGE_RX_DESCRIPTOR_STATUS0_IS_IP6;
657 is1_ip6 = s10 & IXGE_RX_DESCRIPTOR_STATUS0_IS_IP6;
659 n0 = is0_ip6 ? IXGE_RX_NEXT_IP6_INPUT : n0;
660 n1 = is1_ip6 ? IXGE_RX_NEXT_IP6_INPUT : n1;
662 n0 = (xd->per_interface_next_index != ~0) ?
663 xd->per_interface_next_index : n0;
664 n1 = (xd->per_interface_next_index != ~0) ?
665 xd->per_interface_next_index : n1;
667 /* Check for error. */
668 n0 = e0 != IXGE_ERROR_none ? IXGE_RX_NEXT_DROP : n0;
669 n1 = e1 != IXGE_ERROR_none ? IXGE_RX_NEXT_DROP : n1;
677 f0 = ((s02 & (IXGE_RX_DESCRIPTOR_STATUS2_IS_TCP_CHECKSUMMED
678 | IXGE_RX_DESCRIPTOR_STATUS2_IS_UDP_CHECKSUMMED))
679 ? IP_BUFFER_L4_CHECKSUM_COMPUTED
681 f1 = ((s12 & (IXGE_RX_DESCRIPTOR_STATUS2_IS_TCP_CHECKSUMMED
682 | IXGE_RX_DESCRIPTOR_STATUS2_IS_UDP_CHECKSUMMED))
683 ? IP_BUFFER_L4_CHECKSUM_COMPUTED
686 f0 |= ((s02 & (IXGE_RX_DESCRIPTOR_STATUS2_TCP_CHECKSUM_ERROR
687 | IXGE_RX_DESCRIPTOR_STATUS2_UDP_CHECKSUM_ERROR))
689 : IP_BUFFER_L4_CHECKSUM_CORRECT);
690 f1 |= ((s12 & (IXGE_RX_DESCRIPTOR_STATUS2_TCP_CHECKSUM_ERROR
691 | IXGE_RX_DESCRIPTOR_STATUS2_UDP_CHECKSUM_ERROR))
693 : IP_BUFFER_L4_CHECKSUM_CORRECT);
700 ixge_rx_trace (ixge_main_t * xm,
702 ixge_dma_queue_t * dq,
703 ixge_descriptor_t * before_descriptors,
704 u32 * before_buffers,
705 ixge_descriptor_t * after_descriptors,
708 vlib_main_t * vm = xm->vlib_main;
709 vlib_node_runtime_t * node = dq->rx.node;
710 ixge_rx_from_hw_descriptor_t * bd;
711 ixge_rx_to_hw_descriptor_t * ad;
712 u32 * b, n_left, is_sop, next_index_sop;
714 n_left = n_descriptors;
716 bd = &before_descriptors->rx_from_hw;
717 ad = &after_descriptors->rx_to_hw;
718 is_sop = dq->rx.is_start_of_packet;
719 next_index_sop = dq->rx.saved_start_of_packet_next_index;
723 u32 bi0, bi1, flags0, flags1;
724 vlib_buffer_t * b0, * b1;
725 ixge_rx_dma_trace_t * t0, * t1;
726 u8 next0, error0, next1, error1;
732 b0 = vlib_get_buffer (vm, bi0);
733 b1 = vlib_get_buffer (vm, bi1);
735 ixge_rx_next_and_error_from_status_x2 (xd,
736 bd[0].status[0], bd[0].status[2],
737 bd[1].status[0], bd[1].status[2],
738 &next0, &error0, &flags0,
739 &next1, &error1, &flags1);
741 next_index_sop = is_sop ? next0 : next_index_sop;
742 vlib_trace_buffer (vm, node, next_index_sop, b0, /* follow_chain */ 0);
743 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
744 t0->is_start_of_packet = is_sop;
745 is_sop = (b0->flags & VLIB_BUFFER_NEXT_PRESENT) == 0;
747 next_index_sop = is_sop ? next1 : next_index_sop;
748 vlib_trace_buffer (vm, node, next_index_sop, b1, /* follow_chain */ 0);
749 t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
750 t1->is_start_of_packet = is_sop;
751 is_sop = (b1->flags & VLIB_BUFFER_NEXT_PRESENT) == 0;
753 t0->queue_index = dq->queue_index;
754 t1->queue_index = dq->queue_index;
755 t0->device_index = xd->device_index;
756 t1->device_index = xd->device_index;
757 t0->before.rx_from_hw = bd[0];
758 t1->before.rx_from_hw = bd[1];
759 t0->after.rx_to_hw = ad[0];
760 t1->after.rx_to_hw = ad[1];
761 t0->buffer_index = bi0;
762 t1->buffer_index = bi1;
763 memcpy (&t0->buffer, b0, sizeof (b0[0]) - sizeof (b0->pre_data));
764 memcpy (&t1->buffer, b1, sizeof (b1[0]) - sizeof (b0->pre_data));
765 memcpy (t0->buffer.pre_data, b0->data + b0->current_data,
766 sizeof (t0->buffer.pre_data));
767 memcpy (t1->buffer.pre_data, b1->data + b1->current_data,
768 sizeof (t1->buffer.pre_data));
779 ixge_rx_dma_trace_t * t0;
785 b0 = vlib_get_buffer (vm, bi0);
787 ixge_rx_next_and_error_from_status_x1 (xd,
788 bd[0].status[0], bd[0].status[2],
789 &next0, &error0, &flags0);
791 next_index_sop = is_sop ? next0 : next_index_sop;
792 vlib_trace_buffer (vm, node, next_index_sop, b0, /* follow_chain */ 0);
793 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
794 t0->is_start_of_packet = is_sop;
795 is_sop = (b0->flags & VLIB_BUFFER_NEXT_PRESENT) == 0;
797 t0->queue_index = dq->queue_index;
798 t0->device_index = xd->device_index;
799 t0->before.rx_from_hw = bd[0];
800 t0->after.rx_to_hw = ad[0];
801 t0->buffer_index = bi0;
802 memcpy (&t0->buffer, b0, sizeof (b0[0]) - sizeof (b0->pre_data));
803 memcpy (t0->buffer.pre_data, b0->data + b0->current_data,
804 sizeof (t0->buffer.pre_data));
813 ixge_tx_descriptor_t descriptor;
821 u8 is_start_of_packet;
823 /* Copy of VLIB buffer; packet data stored in pre_data. */
824 vlib_buffer_t buffer;
825 } ixge_tx_dma_trace_t;
827 static u8 * format_ixge_tx_dma_trace (u8 * s, va_list * va)
829 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
830 CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
831 ixge_tx_dma_trace_t * t = va_arg (*va, ixge_tx_dma_trace_t *);
832 vnet_main_t * vnm = vnet_get_main();
833 ixge_main_t * xm = &ixge_main;
834 ixge_device_t * xd = vec_elt_at_index (xm->devices, t->device_index);
835 format_function_t * f;
836 uword indent = format_get_indent (s);
839 vnet_sw_interface_t * sw = vnet_get_sw_interface (vnm, xd->vlib_sw_if_index);
840 s = format (s, "%U tx queue %d",
841 format_vnet_sw_interface_name, vnm, sw,
845 s = format (s, "\n%Udescriptor: %U",
846 format_white_space, indent,
847 format_ixge_tx_descriptor, &t->descriptor);
849 s = format (s, "\n%Ubuffer 0x%x: %U",
850 format_white_space, indent,
852 format_vlib_buffer, &t->buffer);
854 s = format (s, "\n%U",
855 format_white_space, indent);
857 f = format_ethernet_header_with_length;
858 if (! f || ! t->is_start_of_packet)
859 f = format_hex_bytes;
860 s = format (s, "%U", f, t->buffer.pre_data, sizeof (t->buffer.pre_data));
866 vlib_node_runtime_t * node;
868 u32 is_start_of_packet;
870 u32 n_bytes_in_packet;
872 ixge_tx_descriptor_t * start_of_packet_descriptor;
876 ixge_tx_trace (ixge_main_t * xm,
878 ixge_dma_queue_t * dq,
879 ixge_tx_state_t * tx_state,
880 ixge_tx_descriptor_t * descriptors,
884 vlib_main_t * vm = xm->vlib_main;
885 vlib_node_runtime_t * node = tx_state->node;
886 ixge_tx_descriptor_t * d;
887 u32 * b, n_left, is_sop;
889 n_left = n_descriptors;
892 is_sop = tx_state->is_start_of_packet;
897 vlib_buffer_t * b0, * b1;
898 ixge_tx_dma_trace_t * t0, * t1;
904 b0 = vlib_get_buffer (vm, bi0);
905 b1 = vlib_get_buffer (vm, bi1);
907 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
908 t0->is_start_of_packet = is_sop;
909 is_sop = (b0->flags & VLIB_BUFFER_NEXT_PRESENT) == 0;
911 t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
912 t1->is_start_of_packet = is_sop;
913 is_sop = (b1->flags & VLIB_BUFFER_NEXT_PRESENT) == 0;
915 t0->queue_index = dq->queue_index;
916 t1->queue_index = dq->queue_index;
917 t0->device_index = xd->device_index;
918 t1->device_index = xd->device_index;
919 t0->descriptor = d[0];
920 t1->descriptor = d[1];
921 t0->buffer_index = bi0;
922 t1->buffer_index = bi1;
923 memcpy (&t0->buffer, b0, sizeof (b0[0]) - sizeof (b0->pre_data));
924 memcpy (&t1->buffer, b1, sizeof (b1[0]) - sizeof (b0->pre_data));
925 memcpy (t0->buffer.pre_data, b0->data + b0->current_data,
926 sizeof (t0->buffer.pre_data));
927 memcpy (t1->buffer.pre_data, b1->data + b1->current_data,
928 sizeof (t1->buffer.pre_data));
938 ixge_tx_dma_trace_t * t0;
943 b0 = vlib_get_buffer (vm, bi0);
945 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
946 t0->is_start_of_packet = is_sop;
947 is_sop = (b0->flags & VLIB_BUFFER_NEXT_PRESENT) == 0;
949 t0->queue_index = dq->queue_index;
950 t0->device_index = xd->device_index;
951 t0->descriptor = d[0];
952 t0->buffer_index = bi0;
953 memcpy (&t0->buffer, b0, sizeof (b0[0]) - sizeof (b0->pre_data));
954 memcpy (t0->buffer.pre_data, b0->data + b0->current_data,
955 sizeof (t0->buffer.pre_data));
963 ixge_ring_sub (ixge_dma_queue_t * q, u32 i0, u32 i1)
966 ASSERT (i0 < q->n_descriptors);
967 ASSERT (i1 < q->n_descriptors);
968 return d < 0 ? q->n_descriptors + d : d;
972 ixge_ring_add (ixge_dma_queue_t * q, u32 i0, u32 i1)
975 ASSERT (i0 < q->n_descriptors);
976 ASSERT (i1 < q->n_descriptors);
977 d -= d >= q->n_descriptors ? q->n_descriptors : 0;
982 ixge_tx_descriptor_matches_template (ixge_main_t * xm, ixge_tx_descriptor_t * d)
986 cmp = ((d->status0 & xm->tx_descriptor_template_mask.status0)
987 ^ xm->tx_descriptor_template.status0);
990 cmp = ((d->status1 & xm->tx_descriptor_template_mask.status1)
991 ^ xm->tx_descriptor_template.status1);
999 ixge_tx_no_wrap (ixge_main_t * xm,
1001 ixge_dma_queue_t * dq,
1003 u32 start_descriptor_index,
1005 ixge_tx_state_t * tx_state)
1007 vlib_main_t * vm = xm->vlib_main;
1008 ixge_tx_descriptor_t * d, * d_sop;
1009 u32 n_left = n_descriptors;
1010 u32 * to_free = vec_end (xm->tx_buffers_pending_free);
1011 u32 * to_tx = vec_elt_at_index (dq->descriptor_buffer_indices, start_descriptor_index);
1012 u32 is_sop = tx_state->is_start_of_packet;
1013 u32 len_sop = tx_state->n_bytes_in_packet;
1014 u16 template_status = xm->tx_descriptor_template.status0;
1015 u32 descriptor_prefetch_rotor = 0;
1017 ASSERT (start_descriptor_index + n_descriptors <= dq->n_descriptors);
1018 d = &dq->descriptors[start_descriptor_index].tx;
1019 d_sop = is_sop ? d : tx_state->start_of_packet_descriptor;
1023 vlib_buffer_t * b0, * b1;
1026 u8 is_eop0, is_eop1;
1028 /* Prefetch next iteration. */
1029 vlib_prefetch_buffer_with_index (vm, buffers[2], LOAD);
1030 vlib_prefetch_buffer_with_index (vm, buffers[3], LOAD);
1032 if ((descriptor_prefetch_rotor & 0x3) == 0)
1033 CLIB_PREFETCH (d + 4, CLIB_CACHE_LINE_BYTES, STORE);
1035 descriptor_prefetch_rotor += 2;
1040 to_free[0] = fi0 = to_tx[0];
1042 to_free += fi0 != 0;
1044 to_free[0] = fi1 = to_tx[1];
1046 to_free += fi1 != 0;
1052 b0 = vlib_get_buffer (vm, bi0);
1053 b1 = vlib_get_buffer (vm, bi1);
1055 is_eop0 = (b0->flags & VLIB_BUFFER_NEXT_PRESENT) == 0;
1056 is_eop1 = (b1->flags & VLIB_BUFFER_NEXT_PRESENT) == 0;
1058 len0 = b0->current_length;
1059 len1 = b1->current_length;
1061 ASSERT (ixge_tx_descriptor_matches_template (xm, d + 0));
1062 ASSERT (ixge_tx_descriptor_matches_template (xm, d + 1));
1064 d[0].buffer_address = vlib_get_buffer_data_physical_address (vm, bi0) + b0->current_data;
1065 d[1].buffer_address = vlib_get_buffer_data_physical_address (vm, bi1) + b1->current_data;
1067 d[0].n_bytes_this_buffer = len0;
1068 d[1].n_bytes_this_buffer = len1;
1070 d[0].status0 = template_status | (is_eop0 << IXGE_TX_DESCRIPTOR_STATUS0_LOG2_IS_END_OF_PACKET);
1071 d[1].status0 = template_status | (is_eop1 << IXGE_TX_DESCRIPTOR_STATUS0_LOG2_IS_END_OF_PACKET);
1073 len_sop = (is_sop ? 0 : len_sop) + len0;
1074 d_sop[0].status1 = IXGE_TX_DESCRIPTOR_STATUS1_N_BYTES_IN_PACKET (len_sop);
1076 d_sop = is_eop0 ? d : d_sop;
1080 len_sop = (is_sop ? 0 : len_sop) + len1;
1081 d_sop[0].status1 = IXGE_TX_DESCRIPTOR_STATUS1_N_BYTES_IN_PACKET (len_sop);
1083 d_sop = is_eop1 ? d : d_sop;
1096 to_free[0] = fi0 = to_tx[0];
1098 to_free += fi0 != 0;
1104 b0 = vlib_get_buffer (vm, bi0);
1106 is_eop0 = (b0->flags & VLIB_BUFFER_NEXT_PRESENT) == 0;
1108 len0 = b0->current_length;
1110 ASSERT (ixge_tx_descriptor_matches_template (xm, d + 0));
1112 d[0].buffer_address = vlib_get_buffer_data_physical_address (vm, bi0) + b0->current_data;
1114 d[0].n_bytes_this_buffer = len0;
1116 d[0].status0 = template_status | (is_eop0 << IXGE_TX_DESCRIPTOR_STATUS0_LOG2_IS_END_OF_PACKET);
1118 len_sop = (is_sop ? 0 : len_sop) + len0;
1119 d_sop[0].status1 = IXGE_TX_DESCRIPTOR_STATUS1_N_BYTES_IN_PACKET (len_sop);
1121 d_sop = is_eop0 ? d : d_sop;
1126 if (tx_state->node->flags & VLIB_NODE_FLAG_TRACE)
1128 to_tx = vec_elt_at_index (dq->descriptor_buffer_indices, start_descriptor_index);
1129 ixge_tx_trace (xm, xd, dq, tx_state,
1130 &dq->descriptors[start_descriptor_index].tx,
1135 _vec_len (xm->tx_buffers_pending_free) = to_free - xm->tx_buffers_pending_free;
1137 /* When we are done d_sop can point to end of ring. Wrap it if so. */
1139 ixge_tx_descriptor_t * d_start = &dq->descriptors[0].tx;
1141 ASSERT (d_sop - d_start <= dq->n_descriptors);
1142 d_sop = d_sop - d_start == dq->n_descriptors ? d_start : d_sop;
1145 tx_state->is_start_of_packet = is_sop;
1146 tx_state->start_of_packet_descriptor = d_sop;
1147 tx_state->n_bytes_in_packet = len_sop;
1149 return n_descriptors;
1153 ixge_interface_tx (vlib_main_t * vm,
1154 vlib_node_runtime_t * node,
1157 ixge_main_t * xm = &ixge_main;
1158 vnet_interface_output_runtime_t * rd = (void *) node->runtime_data;
1159 ixge_device_t * xd = vec_elt_at_index (xm->devices, rd->dev_instance);
1160 ixge_dma_queue_t * dq;
1161 u32 * from, n_left_tx, n_descriptors_to_tx, n_tail_drop;
1162 u32 queue_index = 0; /* fixme parameter */
1163 ixge_tx_state_t tx_state;
1165 tx_state.node = node;
1166 tx_state.is_start_of_packet = 1;
1167 tx_state.start_of_packet_descriptor = 0;
1168 tx_state.n_bytes_in_packet = 0;
1170 from = vlib_frame_vector_args (f);
1172 dq = vec_elt_at_index (xd->dma_queues[VLIB_TX], queue_index);
1174 dq->head_index = dq->tx.head_index_write_back[0];
1176 /* Since head == tail means ring is empty we can send up to dq->n_descriptors - 1. */
1177 n_left_tx = dq->n_descriptors - 1;
1178 n_left_tx -= ixge_ring_sub (dq, dq->head_index, dq->tail_index);
1180 _vec_len (xm->tx_buffers_pending_free) = 0;
1182 n_descriptors_to_tx = f->n_vectors;
1184 if (PREDICT_FALSE (n_descriptors_to_tx > n_left_tx))
1186 i32 i, n_ok, i_eop, i_sop;
1189 for (i = n_left_tx - 1; i >= 0; i--)
1191 vlib_buffer_t * b = vlib_get_buffer (vm, from[i]);
1192 if (! (b->flags & VLIB_BUFFER_NEXT_PRESENT))
1194 if (i_sop != ~0 && i_eop != ~0)
1206 ELOG_TYPE_DECLARE (e) = {
1207 .function = (char *) __FUNCTION__,
1208 .format = "ixge %d, ring full to tx %d head %d tail %d",
1209 .format_args = "i2i2i2i2",
1211 struct { u16 instance, to_tx, head, tail; } * ed;
1212 ed = ELOG_DATA (&vm->elog_main, e);
1213 ed->instance = xd->device_index;
1214 ed->to_tx = n_descriptors_to_tx;
1215 ed->head = dq->head_index;
1216 ed->tail = dq->tail_index;
1219 if (n_ok < n_descriptors_to_tx)
1221 n_tail_drop = n_descriptors_to_tx - n_ok;
1222 vec_add (xm->tx_buffers_pending_free, from + n_ok, n_tail_drop);
1223 vlib_error_count (vm, ixge_input_node.index, IXGE_ERROR_tx_full_drops, n_tail_drop);
1226 n_descriptors_to_tx = n_ok;
1229 dq->tx.n_buffers_on_ring += n_descriptors_to_tx;
1231 /* Process from tail to end of descriptor ring. */
1232 if (n_descriptors_to_tx > 0 && dq->tail_index < dq->n_descriptors)
1234 u32 n = clib_min (dq->n_descriptors - dq->tail_index, n_descriptors_to_tx);
1235 n = ixge_tx_no_wrap (xm, xd, dq, from, dq->tail_index, n, &tx_state);
1237 n_descriptors_to_tx -= n;
1238 dq->tail_index += n;
1239 ASSERT (dq->tail_index <= dq->n_descriptors);
1240 if (dq->tail_index == dq->n_descriptors)
1244 if (n_descriptors_to_tx > 0)
1246 u32 n = ixge_tx_no_wrap (xm, xd, dq, from, 0, n_descriptors_to_tx, &tx_state);
1248 ASSERT (n == n_descriptors_to_tx);
1249 dq->tail_index += n;
1250 ASSERT (dq->tail_index <= dq->n_descriptors);
1251 if (dq->tail_index == dq->n_descriptors)
1255 /* We should only get full packets. */
1256 ASSERT (tx_state.is_start_of_packet);
1258 /* Report status when last descriptor is done. */
1260 u32 i = dq->tail_index == 0 ? dq->n_descriptors - 1 : dq->tail_index - 1;
1261 ixge_tx_descriptor_t * d = &dq->descriptors[i].tx;
1262 d->status0 |= IXGE_TX_DESCRIPTOR_STATUS0_REPORT_STATUS;
1265 /* Give new descriptors to hardware. */
1267 ixge_dma_regs_t * dr = get_dma_regs (xd, VLIB_TX, queue_index);
1269 CLIB_MEMORY_BARRIER ();
1271 dr->tail_index = dq->tail_index;
1274 /* Free any buffers that are done. */
1276 u32 n = _vec_len (xm->tx_buffers_pending_free);
1279 vlib_buffer_free_no_next (vm, xm->tx_buffers_pending_free, n);
1280 _vec_len (xm->tx_buffers_pending_free) = 0;
1281 ASSERT (dq->tx.n_buffers_on_ring >= n);
1282 dq->tx.n_buffers_on_ring -= (n - n_tail_drop);
1286 return f->n_vectors;
1290 ixge_rx_queue_no_wrap (ixge_main_t * xm,
1292 ixge_dma_queue_t * dq,
1293 u32 start_descriptor_index,
1296 vlib_main_t * vm = xm->vlib_main;
1297 vlib_node_runtime_t * node = dq->rx.node;
1298 ixge_descriptor_t * d;
1299 static ixge_descriptor_t * d_trace_save;
1300 static u32 * d_trace_buffers;
1301 u32 n_descriptors_left = n_descriptors;
1302 u32 * to_rx = vec_elt_at_index (dq->descriptor_buffer_indices, start_descriptor_index);
1304 u32 bi_sop = dq->rx.saved_start_of_packet_buffer_index;
1305 u32 bi_last = dq->rx.saved_last_buffer_index;
1306 u32 next_index_sop = dq->rx.saved_start_of_packet_next_index;
1307 u32 is_sop = dq->rx.is_start_of_packet;
1308 u32 next_index, n_left_to_next, * to_next;
1311 u32 n_trace = vlib_get_trace_count (vm, node);
1312 vlib_buffer_t * b_last, b_dummy;
1314 ASSERT (start_descriptor_index + n_descriptors <= dq->n_descriptors);
1315 d = &dq->descriptors[start_descriptor_index];
1317 b_last = bi_last != ~0 ? vlib_get_buffer (vm, bi_last) : &b_dummy;
1318 next_index = dq->rx.next_index;
1322 u32 n = clib_min (n_trace, n_descriptors);
1325 _vec_len (d_trace_save) = 0;
1326 _vec_len (d_trace_buffers) = 0;
1328 vec_add (d_trace_save, (ixge_descriptor_t *) d, n);
1329 vec_add (d_trace_buffers, to_rx, n);
1333 uword l = vec_len (xm->rx_buffers_to_add);
1335 if (l < n_descriptors_left)
1337 u32 n_to_alloc = 2 * dq->n_descriptors - l;
1340 vec_resize (xm->rx_buffers_to_add, n_to_alloc);
1342 _vec_len (xm->rx_buffers_to_add) = l;
1343 n_allocated = vlib_buffer_alloc_from_free_list
1344 (vm, xm->rx_buffers_to_add + l, n_to_alloc,
1345 xm->vlib_buffer_free_list_index);
1346 _vec_len (xm->rx_buffers_to_add) += n_allocated;
1348 /* Handle transient allocation failure */
1349 if (PREDICT_FALSE(l + n_allocated <= n_descriptors_left))
1351 if (n_allocated == 0)
1352 vlib_error_count (vm, ixge_input_node.index,
1353 IXGE_ERROR_rx_alloc_no_physmem, 1);
1355 vlib_error_count (vm, ixge_input_node.index,
1356 IXGE_ERROR_rx_alloc_fail, 1);
1358 n_descriptors_left = l + n_allocated;
1360 n_descriptors = n_descriptors_left;
1363 /* Add buffers from end of vector going backwards. */
1364 to_add = vec_end (xm->rx_buffers_to_add) - 1;
1367 while (n_descriptors_left > 0)
1369 vlib_get_next_frame (vm, node, next_index,
1370 to_next, n_left_to_next);
1372 while (n_descriptors_left >= 4 && n_left_to_next >= 2)
1374 vlib_buffer_t * b0, * b1;
1375 u32 bi0, fi0, len0, l3_offset0, s20, s00, flags0;
1376 u32 bi1, fi1, len1, l3_offset1, s21, s01, flags1;
1377 u8 is_eop0, error0, next0;
1378 u8 is_eop1, error1, next1;
1379 ixge_descriptor_t d0, d1;
1381 vlib_prefetch_buffer_with_index (vm, to_rx[2], STORE);
1382 vlib_prefetch_buffer_with_index (vm, to_rx[3], STORE);
1384 CLIB_PREFETCH (d + 2, 32, STORE);
1386 d0.as_u32x4 = d[0].as_u32x4;
1387 d1.as_u32x4 = d[1].as_u32x4;
1389 s20 = d0.rx_from_hw.status[2];
1390 s21 = d1.rx_from_hw.status[2];
1392 s00 = d0.rx_from_hw.status[0];
1393 s01 = d1.rx_from_hw.status[0];
1395 if (! ((s20 & s21) & IXGE_RX_DESCRIPTOR_STATUS2_IS_OWNED_BY_SOFTWARE))
1396 goto found_hw_owned_descriptor_x2;
1401 ASSERT (to_add - 1 >= xm->rx_buffers_to_add);
1410 ASSERT (VLIB_BUFFER_KNOWN_ALLOCATED == vlib_buffer_is_known (vm, bi0));
1411 ASSERT (VLIB_BUFFER_KNOWN_ALLOCATED == vlib_buffer_is_known (vm, bi1));
1412 ASSERT (VLIB_BUFFER_KNOWN_ALLOCATED == vlib_buffer_is_known (vm, fi0));
1413 ASSERT (VLIB_BUFFER_KNOWN_ALLOCATED == vlib_buffer_is_known (vm, fi1));
1415 b0 = vlib_get_buffer (vm, bi0);
1416 b1 = vlib_get_buffer (vm, bi1);
1419 * Turn this on if you run into
1420 * "bad monkey" contexts, and you want to know exactly
1421 * which nodes they've visited... See main.c...
1423 VLIB_BUFFER_TRACE_TRAJECTORY_INIT(b0);
1424 VLIB_BUFFER_TRACE_TRAJECTORY_INIT(b1);
1426 CLIB_PREFETCH (b0->data, CLIB_CACHE_LINE_BYTES, LOAD);
1427 CLIB_PREFETCH (b1->data, CLIB_CACHE_LINE_BYTES, LOAD);
1429 is_eop0 = (s20 & IXGE_RX_DESCRIPTOR_STATUS2_IS_END_OF_PACKET) != 0;
1430 is_eop1 = (s21 & IXGE_RX_DESCRIPTOR_STATUS2_IS_END_OF_PACKET) != 0;
1432 ixge_rx_next_and_error_from_status_x2 (xd, s00, s20, s01, s21,
1433 &next0, &error0, &flags0,
1434 &next1, &error1, &flags1);
1436 next0 = is_sop ? next0 : next_index_sop;
1437 next1 = is_eop0 ? next1 : next0;
1438 next_index_sop = next1;
1440 b0->flags |= flags0 | (!is_eop0 << VLIB_BUFFER_LOG2_NEXT_PRESENT);
1441 b1->flags |= flags1 | (!is_eop1 << VLIB_BUFFER_LOG2_NEXT_PRESENT);
1443 vnet_buffer (b0)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index;
1444 vnet_buffer (b1)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index;
1445 vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32)~0;
1446 vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32)~0;
1448 b0->error = node->errors[error0];
1449 b1->error = node->errors[error1];
1451 len0 = d0.rx_from_hw.n_packet_bytes_this_descriptor;
1452 len1 = d1.rx_from_hw.n_packet_bytes_this_descriptor;
1453 n_bytes += len0 + len1;
1454 n_packets += is_eop0 + is_eop1;
1456 /* Give new buffers to hardware. */
1457 d0.rx_to_hw.tail_address =
1458 vlib_get_buffer_data_physical_address (vm, fi0);
1459 d1.rx_to_hw.tail_address =
1460 vlib_get_buffer_data_physical_address (vm, fi1);
1461 d0.rx_to_hw.head_address = d[0].rx_to_hw.tail_address;
1462 d1.rx_to_hw.head_address = d[1].rx_to_hw.tail_address;
1463 d[0].as_u32x4 = d0.as_u32x4;
1464 d[1].as_u32x4 = d1.as_u32x4;
1467 n_descriptors_left -= 2;
1469 /* Point to either l2 or l3 header depending on next. */
1470 l3_offset0 = (is_sop && (next0 != IXGE_RX_NEXT_ETHERNET_INPUT))
1471 ? IXGE_RX_DESCRIPTOR_STATUS0_L3_OFFSET (s00)
1473 l3_offset1 = (is_eop0 && (next1 != IXGE_RX_NEXT_ETHERNET_INPUT))
1474 ? IXGE_RX_DESCRIPTOR_STATUS0_L3_OFFSET (s01)
1477 b0->current_length = len0 - l3_offset0;
1478 b1->current_length = len1 - l3_offset1;
1479 b0->current_data = l3_offset0;
1480 b1->current_data = l3_offset1;
1482 b_last->next_buffer = is_sop ? ~0 : bi0;
1483 b0->next_buffer = is_eop0 ? ~0 : bi1;
1489 u32 bi_sop0 = is_sop ? bi0 : bi_sop;
1490 u32 bi_sop1 = is_eop0 ? bi1 : bi_sop0;
1494 u8 * msg = vlib_validate_buffer (vm, bi_sop0, /* follow_buffer_next */ 1);
1499 u8 * msg = vlib_validate_buffer (vm, bi_sop1, /* follow_buffer_next */ 1);
1503 if (0) /* "Dave" version */
1505 u32 bi_sop0 = is_sop ? bi0 : bi_sop;
1506 u32 bi_sop1 = is_eop0 ? bi1 : bi_sop0;
1510 to_next[0] = bi_sop0;
1514 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1515 to_next, n_left_to_next,
1520 to_next[0] = bi_sop1;
1524 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1525 to_next, n_left_to_next,
1531 if (1) /* "Eliot" version */
1533 /* Speculatively enqueue to cached next. */
1534 u8 saved_is_sop = is_sop;
1535 u32 bi_sop_save = bi_sop;
1537 bi_sop = saved_is_sop ? bi0 : bi_sop;
1538 to_next[0] = bi_sop;
1540 n_left_to_next -= is_eop0;
1542 bi_sop = is_eop0 ? bi1 : bi_sop;
1543 to_next[0] = bi_sop;
1545 n_left_to_next -= is_eop1;
1549 if (PREDICT_FALSE (! (next0 == next_index && next1 == next_index)))
1551 /* Undo speculation. */
1552 to_next -= is_eop0 + is_eop1;
1553 n_left_to_next += is_eop0 + is_eop1;
1555 /* Re-do both descriptors being careful about where we enqueue. */
1556 bi_sop = saved_is_sop ? bi0 : bi_sop_save;
1559 if (next0 != next_index)
1560 vlib_set_next_frame_buffer (vm, node, next0, bi_sop);
1563 to_next[0] = bi_sop;
1565 n_left_to_next -= 1;
1569 bi_sop = is_eop0 ? bi1 : bi_sop;
1572 if (next1 != next_index)
1573 vlib_set_next_frame_buffer (vm, node, next1, bi_sop);
1576 to_next[0] = bi_sop;
1578 n_left_to_next -= 1;
1582 /* Switch cached next index when next for both packets is the same. */
1583 if (is_eop0 && is_eop1 && next0 == next1)
1585 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1587 vlib_get_next_frame (vm, node, next_index,
1588 to_next, n_left_to_next);
1594 /* Bail out of dual loop and proceed with single loop. */
1595 found_hw_owned_descriptor_x2:
1597 while (n_descriptors_left > 0 && n_left_to_next > 0)
1600 u32 bi0, fi0, len0, l3_offset0, s20, s00, flags0;
1601 u8 is_eop0, error0, next0;
1602 ixge_descriptor_t d0;
1604 d0.as_u32x4 = d[0].as_u32x4;
1606 s20 = d0.rx_from_hw.status[2];
1607 s00 = d0.rx_from_hw.status[0];
1609 if (! (s20 & IXGE_RX_DESCRIPTOR_STATUS2_IS_OWNED_BY_SOFTWARE))
1610 goto found_hw_owned_descriptor_x1;
1613 ASSERT (to_add >= xm->rx_buffers_to_add);
1620 ASSERT (VLIB_BUFFER_KNOWN_ALLOCATED == vlib_buffer_is_known (vm, bi0));
1621 ASSERT (VLIB_BUFFER_KNOWN_ALLOCATED == vlib_buffer_is_known (vm, fi0));
1623 b0 = vlib_get_buffer (vm, bi0);
1626 * Turn this on if you run into
1627 * "bad monkey" contexts, and you want to know exactly
1628 * which nodes they've visited...
1630 VLIB_BUFFER_TRACE_TRAJECTORY_INIT(b0);
1632 is_eop0 = (s20 & IXGE_RX_DESCRIPTOR_STATUS2_IS_END_OF_PACKET) != 0;
1633 ixge_rx_next_and_error_from_status_x1
1634 (xd, s00, s20, &next0, &error0, &flags0);
1636 next0 = is_sop ? next0 : next_index_sop;
1637 next_index_sop = next0;
1639 b0->flags |= flags0 | (!is_eop0 << VLIB_BUFFER_LOG2_NEXT_PRESENT);
1641 vnet_buffer (b0)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index;
1642 vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32)~0;
1644 b0->error = node->errors[error0];
1646 len0 = d0.rx_from_hw.n_packet_bytes_this_descriptor;
1648 n_packets += is_eop0;
1650 /* Give new buffer to hardware. */
1651 d0.rx_to_hw.tail_address =
1652 vlib_get_buffer_data_physical_address (vm, fi0);
1653 d0.rx_to_hw.head_address = d0.rx_to_hw.tail_address;
1654 d[0].as_u32x4 = d0.as_u32x4;
1657 n_descriptors_left -= 1;
1659 /* Point to either l2 or l3 header depending on next. */
1660 l3_offset0 = (is_sop && (next0 != IXGE_RX_NEXT_ETHERNET_INPUT))
1661 ? IXGE_RX_DESCRIPTOR_STATUS0_L3_OFFSET (s00)
1663 b0->current_length = len0 - l3_offset0;
1664 b0->current_data = l3_offset0;
1666 b_last->next_buffer = is_sop ? ~0 : bi0;
1670 bi_sop = is_sop ? bi0 : bi_sop;
1672 if (CLIB_DEBUG > 0 && is_eop0)
1674 u8 * msg = vlib_validate_buffer (vm, bi_sop, /* follow_buffer_next */ 1);
1678 if (0) /* "Dave" version */
1682 to_next[0] = bi_sop;
1686 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1687 to_next, n_left_to_next,
1691 if (1) /* "Eliot" version */
1693 if (PREDICT_TRUE (next0 == next_index))
1695 to_next[0] = bi_sop;
1697 n_left_to_next -= is_eop0;
1701 if (next0 != next_index && is_eop0)
1702 vlib_set_next_frame_buffer (vm, node, next0, bi_sop);
1704 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1706 vlib_get_next_frame (vm, node, next_index,
1707 to_next, n_left_to_next);
1712 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1715 found_hw_owned_descriptor_x1:
1716 if (n_descriptors_left > 0)
1717 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1719 _vec_len (xm->rx_buffers_to_add) = (to_add + 1) - xm->rx_buffers_to_add;
1722 u32 n_done = n_descriptors - n_descriptors_left;
1724 if (n_trace > 0 && n_done > 0)
1726 u32 n = clib_min (n_trace, n_done);
1727 ixge_rx_trace (xm, xd, dq,
1730 &dq->descriptors[start_descriptor_index],
1732 vlib_set_trace_count (vm, node, n_trace - n);
1736 _vec_len (d_trace_save) = 0;
1737 _vec_len (d_trace_buffers) = 0;
1740 /* Don't keep a reference to b_last if we don't have to.
1741 Otherwise we can over-write a next_buffer pointer after already haven
1742 enqueued a packet. */
1745 b_last->next_buffer = ~0;
1749 dq->rx.n_descriptors_done_this_call = n_done;
1750 dq->rx.n_descriptors_done_total += n_done;
1751 dq->rx.is_start_of_packet = is_sop;
1752 dq->rx.saved_start_of_packet_buffer_index = bi_sop;
1753 dq->rx.saved_last_buffer_index = bi_last;
1754 dq->rx.saved_start_of_packet_next_index = next_index_sop;
1755 dq->rx.next_index = next_index;
1756 dq->rx.n_bytes += n_bytes;
1763 ixge_rx_queue (ixge_main_t * xm,
1765 vlib_node_runtime_t * node,
1768 ixge_dma_queue_t * dq = vec_elt_at_index (xd->dma_queues[VLIB_RX], queue_index);
1769 ixge_dma_regs_t * dr = get_dma_regs (xd, VLIB_RX, dq->queue_index);
1770 uword n_packets = 0;
1771 u32 hw_head_index, sw_head_index;
1773 /* One time initialization. */
1777 dq->rx.is_start_of_packet = 1;
1778 dq->rx.saved_start_of_packet_buffer_index = ~0;
1779 dq->rx.saved_last_buffer_index = ~0;
1782 dq->rx.next_index = node->cached_next_index;
1784 dq->rx.n_descriptors_done_total = 0;
1785 dq->rx.n_descriptors_done_this_call = 0;
1788 /* Fetch head from hardware and compare to where we think we are. */
1789 hw_head_index = dr->head_index;
1790 sw_head_index = dq->head_index;
1792 if (hw_head_index == sw_head_index)
1795 if (hw_head_index < sw_head_index)
1797 u32 n_tried = dq->n_descriptors - sw_head_index;
1798 n_packets += ixge_rx_queue_no_wrap (xm, xd, dq, sw_head_index, n_tried);
1799 sw_head_index = ixge_ring_add (dq, sw_head_index, dq->rx.n_descriptors_done_this_call);
1801 if (dq->rx.n_descriptors_done_this_call != n_tried)
1804 if (hw_head_index >= sw_head_index)
1806 u32 n_tried = hw_head_index - sw_head_index;
1807 n_packets += ixge_rx_queue_no_wrap (xm, xd, dq, sw_head_index, n_tried);
1808 sw_head_index = ixge_ring_add (dq, sw_head_index, dq->rx.n_descriptors_done_this_call);
1812 dq->head_index = sw_head_index;
1813 dq->tail_index = ixge_ring_add (dq, dq->tail_index, dq->rx.n_descriptors_done_total);
1815 /* Give tail back to hardware. */
1816 CLIB_MEMORY_BARRIER ();
1818 dr->tail_index = dq->tail_index;
1820 vlib_increment_combined_counter (vnet_main.interface_main.combined_sw_if_counters
1821 + VNET_INTERFACE_COUNTER_RX,
1823 xd->vlib_sw_if_index,
1830 static void ixge_interrupt (ixge_main_t * xm, ixge_device_t * xd, u32 i)
1832 vlib_main_t * vm = xm->vlib_main;
1833 ixge_regs_t * r = xd->regs;
1837 ELOG_TYPE_DECLARE (e) = {
1838 .function = (char *) __FUNCTION__,
1839 .format = "ixge %d, %s",
1840 .format_args = "i1t1",
1841 .n_enum_strings = 16,
1847 "link status change",
1848 "linksec key exchange",
1849 "manageability event",
1856 "descriptor handler error",
1861 struct { u8 instance; u8 index; } * ed;
1862 ed = ELOG_DATA (&vm->elog_main, e);
1863 ed->instance = xd->device_index;
1868 u32 v = r->xge_mac.link_status;
1869 uword is_up = (v & (1 << 30)) != 0;
1871 ELOG_TYPE_DECLARE (e) = {
1872 .function = (char *) __FUNCTION__,
1873 .format = "ixge %d, link status change 0x%x",
1874 .format_args = "i4i4",
1876 struct { u32 instance, link_status; } * ed;
1877 ed = ELOG_DATA (&vm->elog_main, e);
1878 ed->instance = xd->device_index;
1879 ed->link_status = v;
1880 xd->link_status_at_last_link_change = v;
1882 vlib_process_signal_event (vm, ixge_process_node.index,
1884 ((is_up<<31) | xd->vlib_hw_if_index));
1889 clean_block (u32 * b, u32 * t, u32 n_left)
1895 u32 bi0, bi1, bi2, bi3;
1932 ixge_tx_queue (ixge_main_t * xm, ixge_device_t * xd, u32 queue_index)
1934 vlib_main_t * vm = xm->vlib_main;
1935 ixge_dma_queue_t * dq = vec_elt_at_index (xd->dma_queues[VLIB_TX], queue_index);
1936 u32 n_clean, * b, * t, * t0;
1937 i32 n_hw_owned_descriptors;
1938 i32 first_to_clean, last_to_clean;
1941 /* Handle case where head write back pointer update
1942 * arrives after the interrupt during high PCI bus loads.
1944 while ((dq->head_index == dq->tx.head_index_write_back[0]) &&
1945 dq->tx.n_buffers_on_ring && (dq->head_index != dq->tail_index))
1948 if (IXGE_HWBP_RACE_ELOG && (hwbp_race == 1))
1950 ELOG_TYPE_DECLARE (e) = {
1951 .function = (char *) __FUNCTION__,
1952 .format = "ixge %d tx head index race: head %4d, tail %4d, buffs %4d",
1953 .format_args = "i4i4i4i4",
1955 struct { u32 instance, head_index, tail_index, n_buffers_on_ring; } * ed;
1956 ed = ELOG_DATA (&vm->elog_main, e);
1957 ed->instance = xd->device_index;
1958 ed->head_index = dq->head_index;
1959 ed->tail_index = dq->tail_index;
1960 ed->n_buffers_on_ring = dq->tx.n_buffers_on_ring;
1964 dq->head_index = dq->tx.head_index_write_back[0];
1965 n_hw_owned_descriptors = ixge_ring_sub (dq, dq->head_index, dq->tail_index);
1966 ASSERT(dq->tx.n_buffers_on_ring >= n_hw_owned_descriptors);
1967 n_clean = dq->tx.n_buffers_on_ring - n_hw_owned_descriptors;
1969 if (IXGE_HWBP_RACE_ELOG && hwbp_race)
1971 ELOG_TYPE_DECLARE (e) = {
1972 .function = (char *) __FUNCTION__,
1973 .format = "ixge %d tx head index race: head %4d, hw_owned %4d, n_clean %4d, retries %d",
1974 .format_args = "i4i4i4i4i4",
1976 struct { u32 instance, head_index, n_hw_owned_descriptors, n_clean, retries; } * ed;
1977 ed = ELOG_DATA (&vm->elog_main, e);
1978 ed->instance = xd->device_index;
1979 ed->head_index = dq->head_index;
1980 ed->n_hw_owned_descriptors = n_hw_owned_descriptors;
1981 ed->n_clean = n_clean;
1982 ed->retries = hwbp_race;
1986 * This function used to wait until hardware owned zero descriptors.
1987 * At high PPS rates, that doesn't happen until the TX ring is
1988 * completely full of descriptors which need to be cleaned up.
1989 * That, in turn, causes TX ring-full drops and/or long RX service
1995 /* Clean the n_clean descriptors prior to the reported hardware head */
1996 last_to_clean = dq->head_index - 1;
1997 last_to_clean = (last_to_clean < 0) ? last_to_clean + dq->n_descriptors :
2000 first_to_clean = (last_to_clean) - (n_clean - 1);
2001 first_to_clean = (first_to_clean < 0) ? first_to_clean + dq->n_descriptors :
2004 vec_resize (xm->tx_buffers_pending_free, dq->n_descriptors - 1);
2005 t0 = t = xm->tx_buffers_pending_free;
2006 b = dq->descriptor_buffer_indices + first_to_clean;
2008 /* Wrap case: clean from first to end, then start to last */
2009 if (first_to_clean > last_to_clean)
2011 t += clean_block (b, t, (dq->n_descriptors - 1) - first_to_clean);
2013 b = dq->descriptor_buffer_indices;
2016 /* Typical case: clean from first to last */
2017 if (first_to_clean <= last_to_clean)
2018 t += clean_block (b, t, (last_to_clean - first_to_clean) + 1);
2023 vlib_buffer_free_no_next (vm, t0, n);
2024 ASSERT (dq->tx.n_buffers_on_ring >= n);
2025 dq->tx.n_buffers_on_ring -= n;
2026 _vec_len (xm->tx_buffers_pending_free) = 0;
2030 /* RX queue interrupts 0 thru 7; TX 8 thru 15. */
2031 always_inline uword ixge_interrupt_is_rx_queue (uword i)
2034 always_inline uword ixge_interrupt_is_tx_queue (uword i)
2035 { return i >= 8 && i < 16; }
2037 always_inline uword ixge_tx_queue_to_interrupt (uword i)
2040 always_inline uword ixge_rx_queue_to_interrupt (uword i)
2043 always_inline uword ixge_interrupt_rx_queue (uword i)
2045 ASSERT (ixge_interrupt_is_rx_queue (i));
2049 always_inline uword ixge_interrupt_tx_queue (uword i)
2051 ASSERT (ixge_interrupt_is_tx_queue (i));
2056 ixge_device_input (ixge_main_t * xm,
2058 vlib_node_runtime_t * node)
2060 ixge_regs_t * r = xd->regs;
2062 uword n_rx_packets = 0;
2064 s = r->interrupt.status_write_1_to_set;
2066 r->interrupt.status_write_1_to_clear = s;
2068 foreach_set_bit (i, s, ({
2069 if (ixge_interrupt_is_rx_queue (i))
2070 n_rx_packets += ixge_rx_queue (xm, xd, node, ixge_interrupt_rx_queue (i));
2072 else if (ixge_interrupt_is_tx_queue (i))
2073 ixge_tx_queue (xm, xd, ixge_interrupt_tx_queue (i));
2076 ixge_interrupt (xm, xd, i);
2079 return n_rx_packets;
2083 ixge_input (vlib_main_t * vm,
2084 vlib_node_runtime_t * node,
2087 ixge_main_t * xm = &ixge_main;
2089 uword n_rx_packets = 0;
2091 if (node->state == VLIB_NODE_STATE_INTERRUPT)
2095 /* Loop over devices with interrupts. */
2096 foreach_set_bit (i, node->runtime_data[0], ({
2097 xd = vec_elt_at_index (xm->devices, i);
2098 n_rx_packets += ixge_device_input (xm, xd, node);
2100 /* Re-enable interrupts since we're going to stay in interrupt mode. */
2101 if (! (node->flags & VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE))
2102 xd->regs->interrupt.enable_write_1_to_set = ~0;
2105 /* Clear mask of devices with pending interrupts. */
2106 node->runtime_data[0] = 0;
2110 /* Poll all devices for input/interrupts. */
2111 vec_foreach (xd, xm->devices)
2113 n_rx_packets += ixge_device_input (xm, xd, node);
2115 /* Re-enable interrupts when switching out of polling mode. */
2117 VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE)
2118 xd->regs->interrupt.enable_write_1_to_set = ~0;
2122 return n_rx_packets;
2125 static char * ixge_error_strings[] = {
2131 static vlib_node_registration_t ixge_input_node = {
2132 .function = ixge_input,
2133 .type = VLIB_NODE_TYPE_INPUT,
2134 .name = "ixge-input",
2136 /* Will be enabled if/when hardware is detected. */
2137 .state = VLIB_NODE_STATE_DISABLED,
2139 .format_buffer = format_ethernet_header_with_length,
2140 .format_trace = format_ixge_rx_dma_trace,
2142 .n_errors = IXGE_N_ERROR,
2143 .error_strings = ixge_error_strings,
2145 .n_next_nodes = IXGE_RX_N_NEXT,
2147 [IXGE_RX_NEXT_DROP] = "error-drop",
2148 [IXGE_RX_NEXT_ETHERNET_INPUT] = "ethernet-input",
2149 [IXGE_RX_NEXT_IP4_INPUT] = "ip4-input",
2150 [IXGE_RX_NEXT_IP6_INPUT] = "ip6-input",
2154 VLIB_NODE_FUNCTION_MULTIARCH_CLONE (ixge_input)
2155 CLIB_MULTIARCH_SELECT_FN (ixge_input)
2157 static u8 * format_ixge_device_name (u8 * s, va_list * args)
2159 u32 i = va_arg (*args, u32);
2160 ixge_main_t * xm = &ixge_main;
2161 ixge_device_t * xd = vec_elt_at_index (xm->devices, i);
2162 return format (s, "TenGigabitEthernet%U",
2163 format_vlib_pci_handle, &xd->pci_device.bus_address);
2166 #define IXGE_COUNTER_IS_64_BIT (1 << 0)
2167 #define IXGE_COUNTER_NOT_CLEAR_ON_READ (1 << 1)
2169 static u8 ixge_counter_flags[] = {
2171 #define _64(a,f) IXGE_COUNTER_IS_64_BIT,
2172 foreach_ixge_counter
2177 static void ixge_update_counters (ixge_device_t * xd)
2179 /* Byte offset for counter registers. */
2180 static u32 reg_offsets[] = {
2181 #define _(a,f) (a) / sizeof (u32),
2182 #define _64(a,f) _(a,f)
2183 foreach_ixge_counter
2187 volatile u32 * r = (volatile u32 *) xd->regs;
2190 for (i = 0; i < ARRAY_LEN (xd->counters); i++)
2192 u32 o = reg_offsets[i];
2193 xd->counters[i] += r[o];
2194 if (ixge_counter_flags[i] & IXGE_COUNTER_NOT_CLEAR_ON_READ)
2196 if (ixge_counter_flags[i] & IXGE_COUNTER_IS_64_BIT)
2197 xd->counters[i] += (u64) r[o+1] << (u64) 32;
2201 static u8 * format_ixge_device_id (u8 * s, va_list * args)
2203 u32 device_id = va_arg (*args, u32);
2207 #define _(f,n) case n: t = #f; break;
2208 foreach_ixge_pci_device_id;
2215 s = format (s, "unknown 0x%x", device_id);
2217 s = format (s, "%s", t);
2221 static u8 * format_ixge_link_status (u8 * s, va_list * args)
2223 ixge_device_t * xd = va_arg (*args, ixge_device_t *);
2224 u32 v = xd->link_status_at_last_link_change;
2226 s = format (s, "%s", (v & (1 << 30)) ? "up" : "down");
2230 "1g", "10g parallel", "10g serial", "autoneg",
2233 "unknown", "100m", "1g", "10g",
2235 s = format (s, ", mode %s, speed %s",
2236 modes[(v >> 26) & 3],
2237 speeds[(v >> 28) & 3]);
2243 static u8 * format_ixge_device (u8 * s, va_list * args)
2245 u32 dev_instance = va_arg (*args, u32);
2246 CLIB_UNUSED (int verbose) = va_arg (*args, int);
2247 ixge_main_t * xm = &ixge_main;
2248 ixge_device_t * xd = vec_elt_at_index (xm->devices, dev_instance);
2249 ixge_phy_t * phy = xd->phys + xd->phy_index;
2250 uword indent = format_get_indent (s);
2252 ixge_update_counters (xd);
2253 xd->link_status_at_last_link_change = xd->regs->xge_mac.link_status;
2255 s = format (s, "Intel 8259X: id %U\n%Ulink %U",
2256 format_ixge_device_id, xd->device_id,
2257 format_white_space, indent + 2,
2258 format_ixge_link_status, xd);
2262 s = format (s, "\n%UPCIe %U", format_white_space, indent + 2,
2263 format_vlib_pci_link_speed, &xd->pci_device);
2266 s = format (s, "\n%U", format_white_space, indent + 2);
2267 if (phy->mdio_address != ~0)
2268 s = format (s, "PHY address %d, id 0x%x", phy->mdio_address, phy->id);
2269 else if (xd->sfp_eeprom.id == SFP_ID_sfp)
2270 s = format (s, "SFP %U", format_sfp_eeprom, &xd->sfp_eeprom);
2272 s = format (s, "PHY not found");
2276 ixge_dma_queue_t * dq = vec_elt_at_index (xd->dma_queues[VLIB_RX], 0);
2277 ixge_dma_regs_t * dr = get_dma_regs (xd, VLIB_RX, 0);
2278 u32 hw_head_index = dr->head_index;
2279 u32 sw_head_index = dq->head_index;
2282 nitems = ixge_ring_sub (dq, hw_head_index, sw_head_index);
2283 s = format (s, "\n%U%d unprocessed, %d total buffers on rx queue 0 ring",
2284 format_white_space, indent + 2, nitems, dq->n_descriptors);
2286 s = format (s, "\n%U%d buffers in driver rx cache",
2287 format_white_space, indent + 2, vec_len(xm->rx_buffers_to_add));
2289 s = format (s, "\n%U%d buffers on tx queue 0 ring",
2290 format_white_space, indent + 2,
2291 xd->dma_queues[VLIB_TX][0].tx.n_buffers_on_ring);
2296 static char * names[] = {
2298 #define _64(a,f) _(a,f)
2299 foreach_ixge_counter
2304 for (i = 0; i < ARRAY_LEN (names); i++)
2306 v = xd->counters[i] - xd->counters_last_clear[i];
2308 s = format (s, "\n%U%-40U%16Ld",
2309 format_white_space, indent + 2,
2310 format_c_identifier, names[i],
2318 static void ixge_clear_hw_interface_counters (u32 instance)
2320 ixge_main_t * xm = &ixge_main;
2321 ixge_device_t * xd = vec_elt_at_index (xm->devices, instance);
2322 ixge_update_counters (xd);
2323 memcpy (xd->counters_last_clear, xd->counters, sizeof (xd->counters));
2327 * Dynamically redirect all pkts from a specific interface
2328 * to the specified node
2330 static void ixge_set_interface_next_node (vnet_main_t *vnm, u32 hw_if_index,
2333 ixge_main_t * xm = &ixge_main;
2334 vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
2335 ixge_device_t * xd = vec_elt_at_index (xm->devices, hw->dev_instance);
2337 /* Shut off redirection */
2338 if (node_index == ~0)
2340 xd->per_interface_next_index = node_index;
2344 xd->per_interface_next_index =
2345 vlib_node_add_next (xm->vlib_main, ixge_input_node.index, node_index);
2349 VNET_DEVICE_CLASS (ixge_device_class) = {
2351 .tx_function = ixge_interface_tx,
2352 .format_device_name = format_ixge_device_name,
2353 .format_device = format_ixge_device,
2354 .format_tx_trace = format_ixge_tx_dma_trace,
2355 .clear_counters = ixge_clear_hw_interface_counters,
2356 .admin_up_down_function = ixge_interface_admin_up_down,
2357 .rx_redirect_to_node = ixge_set_interface_next_node,
2360 #define IXGE_N_BYTES_IN_RX_BUFFER (2048) // DAW-HACK: Set Rx buffer size so all packets < ETH_MTU_SIZE fit in the buffer (i.e. sop & eop for all descriptors).
2362 static clib_error_t *
2363 ixge_dma_init (ixge_device_t * xd, vlib_rx_or_tx_t rt, u32 queue_index)
2365 ixge_main_t * xm = &ixge_main;
2366 vlib_main_t * vm = xm->vlib_main;
2367 ixge_dma_queue_t * dq;
2368 clib_error_t * error = 0;
2370 vec_validate (xd->dma_queues[rt], queue_index);
2371 dq = vec_elt_at_index (xd->dma_queues[rt], queue_index);
2373 if (! xm->n_descriptors_per_cache_line)
2374 xm->n_descriptors_per_cache_line = CLIB_CACHE_LINE_BYTES / sizeof (dq->descriptors[0]);
2376 if (! xm->n_bytes_in_rx_buffer)
2377 xm->n_bytes_in_rx_buffer = IXGE_N_BYTES_IN_RX_BUFFER;
2378 xm->n_bytes_in_rx_buffer = round_pow2 (xm->n_bytes_in_rx_buffer, 1024);
2379 if (! xm->vlib_buffer_free_list_index)
2381 xm->vlib_buffer_free_list_index = vlib_buffer_get_or_create_free_list (vm, xm->n_bytes_in_rx_buffer, "ixge rx");
2382 ASSERT (xm->vlib_buffer_free_list_index != 0);
2385 if (! xm->n_descriptors[rt])
2386 xm->n_descriptors[rt] = 4 * VLIB_FRAME_SIZE;
2388 dq->queue_index = queue_index;
2389 dq->n_descriptors = round_pow2 (xm->n_descriptors[rt], xm->n_descriptors_per_cache_line);
2390 dq->head_index = dq->tail_index = 0;
2392 dq->descriptors = vlib_physmem_alloc_aligned (vm, &error,
2393 dq->n_descriptors * sizeof (dq->descriptors[0]),
2394 128 /* per chip spec */);
2398 memset (dq->descriptors, 0, dq->n_descriptors * sizeof (dq->descriptors[0]));
2399 vec_resize (dq->descriptor_buffer_indices, dq->n_descriptors);
2405 n_alloc = vlib_buffer_alloc_from_free_list
2406 (vm, dq->descriptor_buffer_indices, vec_len (dq->descriptor_buffer_indices),
2407 xm->vlib_buffer_free_list_index);
2408 ASSERT (n_alloc == vec_len (dq->descriptor_buffer_indices));
2409 for (i = 0; i < n_alloc; i++)
2411 vlib_buffer_t * b = vlib_get_buffer (vm, dq->descriptor_buffer_indices[i]);
2412 dq->descriptors[i].rx_to_hw.tail_address = vlib_physmem_virtual_to_physical (vm, b->data);
2419 dq->tx.head_index_write_back = vlib_physmem_alloc (vm, &error, CLIB_CACHE_LINE_BYTES);
2421 for (i = 0; i < dq->n_descriptors; i++)
2422 dq->descriptors[i].tx = xm->tx_descriptor_template;
2424 vec_validate (xm->tx_buffers_pending_free, dq->n_descriptors - 1);
2428 ixge_dma_regs_t * dr = get_dma_regs (xd, rt, queue_index);
2431 a = vlib_physmem_virtual_to_physical (vm, dq->descriptors);
2432 dr->descriptor_address[0] = a & 0xFFFFFFFF;
2433 dr->descriptor_address[1] = a >> (u64) 32;
2434 dr->n_descriptor_bytes = dq->n_descriptors * sizeof (dq->descriptors[0]);
2435 dq->head_index = dq->tail_index = 0;
2439 ASSERT ((xm->n_bytes_in_rx_buffer / 1024) < 32);
2440 dr->rx_split_control =
2441 (/* buffer size */ ((xm->n_bytes_in_rx_buffer / 1024) << 0)
2442 | (/* lo free descriptor threshold (units of 64 descriptors) */
2444 | (/* descriptor type: advanced one buffer */
2446 | (/* drop if no descriptors available */
2449 /* Give hardware all but last 16 cache lines' worth of descriptors. */
2450 dq->tail_index = dq->n_descriptors -
2451 16*xm->n_descriptors_per_cache_line;
2455 /* Make sure its initialized before hardware can get to it. */
2456 dq->tx.head_index_write_back[0] = dq->head_index;
2458 a = vlib_physmem_virtual_to_physical (vm, dq->tx.head_index_write_back);
2459 dr->tx.head_index_write_back_address[0] = /* enable bit */ 1 | a;
2460 dr->tx.head_index_write_back_address[1] = (u64) a >> (u64) 32;
2463 /* DMA on 82599 does not work with [13] rx data write relaxed ordering
2464 and [12] undocumented set. */
2466 dr->dca_control &= ~((1 << 13) | (1 << 12));
2468 CLIB_MEMORY_BARRIER ();
2472 xd->regs->tx_dma_control |= (1 << 0);
2473 dr->control |= ((32 << 0) /* prefetch threshold */
2474 | (64 << 8) /* host threshold */
2475 | (0 << 16) /* writeback threshold*/);
2478 /* Enable this queue and wait for hardware to initialize
2479 before adding to tail. */
2482 dr->control |= 1 << 25;
2483 while (! (dr->control & (1 << 25)))
2487 /* Set head/tail indices and enable DMA. */
2488 dr->head_index = dq->head_index;
2489 dr->tail_index = dq->tail_index;
2495 static u32 ixge_flag_change (vnet_main_t * vnm,
2496 vnet_hw_interface_t * hw,
2502 ixge_main_t * xm = &ixge_main;
2504 xd = vec_elt_at_index (xm->devices, hw->dev_instance);
2507 old = r->filter_control;
2509 if (flags & ETHERNET_INTERFACE_FLAG_ACCEPT_ALL)
2510 r->filter_control = old |(1 << 9) /* unicast promiscuous */;
2512 r->filter_control = old & ~(1 << 9);
2517 static void ixge_device_init (ixge_main_t * xm)
2519 vnet_main_t * vnm = vnet_get_main();
2522 /* Reset chip(s). */
2523 vec_foreach (xd, xm->devices)
2525 ixge_regs_t * r = xd->regs;
2526 const u32 reset_bit = (1 << 26) | (1 << 3);
2528 r->control |= reset_bit;
2530 /* No need to suspend. Timed to take ~1e-6 secs */
2531 while (r->control & reset_bit)
2534 /* Software loaded. */
2535 r->extended_control |= (1 << 28);
2539 /* Register ethernet interface. */
2543 clib_error_t * error;
2545 addr32[0] = r->rx_ethernet_address0[0][0];
2546 addr32[1] = r->rx_ethernet_address0[0][1];
2547 for (i = 0; i < 6; i++)
2548 addr8[i] = addr32[i / 4] >> ((i % 4) * 8);
2550 error = ethernet_register_interface
2552 ixge_device_class.index,
2554 /* ethernet address */ addr8,
2555 &xd->vlib_hw_if_index,
2558 clib_error_report (error);
2562 vnet_sw_interface_t * sw = vnet_get_hw_sw_interface (vnm, xd->vlib_hw_if_index);
2563 xd->vlib_sw_if_index = sw->sw_if_index;
2566 ixge_dma_init (xd, VLIB_RX, /* queue_index */ 0);
2568 xm->n_descriptors[VLIB_TX] = 20 * VLIB_FRAME_SIZE;
2570 ixge_dma_init (xd, VLIB_TX, /* queue_index */ 0);
2572 /* RX/TX queue 0 gets mapped to interrupt bits 0 & 8. */
2573 r->interrupt.queue_mapping[0] =
2574 ((/* valid bit */ (1 << 7) |
2575 ixge_rx_queue_to_interrupt (0)) << 0);
2577 r->interrupt.queue_mapping[0] |=
2578 ((/* valid bit */ (1 << 7) |
2579 ixge_tx_queue_to_interrupt (0)) << 8);
2581 /* No use in getting too many interrupts.
2582 Limit them to one every 3/4 ring size at line rate
2584 No need for this since kernel/vlib main loop provides adequate interrupt
2588 f64 line_rate_max_pps = 10e9 / (8 * (64 + /* interframe padding */ 20));
2589 ixge_throttle_queue_interrupt (r, 0, .75 * xm->n_descriptors[VLIB_RX] / line_rate_max_pps);
2592 /* Accept all multicast and broadcast packets. Should really add them
2593 to the dst_ethernet_address register array. */
2594 r->filter_control |= (1 << 10) | (1 << 8);
2596 /* Enable frames up to size in mac frame size register. */
2597 r->xge_mac.control |= 1 << 2;
2598 r->xge_mac.rx_max_frame_size = (9216 + 14) << 16;
2600 /* Enable all interrupts. */
2601 if (! IXGE_ALWAYS_POLL)
2602 r->interrupt.enable_write_1_to_set = ~0;
2607 ixge_process (vlib_main_t * vm,
2608 vlib_node_runtime_t * rt,
2611 vnet_main_t * vnm = vnet_get_main();
2612 ixge_main_t * xm = &ixge_main;
2614 uword event_type, * event_data = 0;
2615 f64 timeout, link_debounce_deadline;
2617 ixge_device_init (xm);
2619 /* Clear all counters. */
2620 vec_foreach (xd, xm->devices)
2622 ixge_update_counters (xd);
2623 memset (xd->counters, 0, sizeof (xd->counters));
2627 link_debounce_deadline = 1e70;
2631 /* 36 bit stat counters could overflow in ~50 secs.
2632 We poll every 30 secs to be conservative. */
2633 vlib_process_wait_for_event_or_clock (vm, timeout);
2635 event_type = vlib_process_get_events (vm, &event_data);
2637 switch (event_type) {
2638 case EVENT_SET_FLAGS:
2640 link_debounce_deadline = vlib_time_now(vm) + 1e-3;
2645 /* No events found: timer expired. */
2646 if (vlib_time_now(vm) > link_debounce_deadline)
2648 vec_foreach (xd, xm->devices)
2650 ixge_regs_t * r = xd->regs;
2651 u32 v = r->xge_mac.link_status;
2652 uword is_up = (v & (1 << 30)) != 0;
2654 vnet_hw_interface_set_flags
2655 (vnm, xd->vlib_hw_if_index,
2656 is_up ? VNET_HW_INTERFACE_FLAG_LINK_UP : 0);
2658 link_debounce_deadline = 1e70;
2668 _vec_len (event_data) = 0;
2670 /* Query stats every 30 secs. */
2672 f64 now = vlib_time_now (vm);
2673 if (now - xm->time_last_stats_update > 30)
2675 xm->time_last_stats_update = now;
2676 vec_foreach (xd, xm->devices)
2677 ixge_update_counters (xd);
2685 static vlib_node_registration_t ixge_process_node = {
2686 .function = ixge_process,
2687 .type = VLIB_NODE_TYPE_PROCESS,
2688 .name = "ixge-process",
2691 clib_error_t * ixge_init (vlib_main_t * vm)
2693 ixge_main_t * xm = &ixge_main;
2694 clib_error_t * error;
2697 memset (&xm->tx_descriptor_template, 0, sizeof (xm->tx_descriptor_template));
2698 memset (&xm->tx_descriptor_template_mask, 0, sizeof (xm->tx_descriptor_template_mask));
2699 xm->tx_descriptor_template.status0 =
2700 (IXGE_TX_DESCRIPTOR_STATUS0_ADVANCED
2701 | IXGE_TX_DESCRIPTOR_STATUS0_IS_ADVANCED
2702 | IXGE_TX_DESCRIPTOR_STATUS0_INSERT_FCS);
2703 xm->tx_descriptor_template_mask.status0 = 0xffff;
2704 xm->tx_descriptor_template_mask.status1 = 0x00003fff;
2706 xm->tx_descriptor_template_mask.status0 &=
2707 ~(IXGE_TX_DESCRIPTOR_STATUS0_IS_END_OF_PACKET
2708 | IXGE_TX_DESCRIPTOR_STATUS0_REPORT_STATUS);
2709 xm->tx_descriptor_template_mask.status1 &=
2710 ~(IXGE_TX_DESCRIPTOR_STATUS1_DONE);
2712 error = vlib_call_init_function (vm, pci_bus_init);
2717 VLIB_INIT_FUNCTION (ixge_init);
2721 ixge_pci_intr_handler(vlib_pci_device_t * dev)
2723 ixge_main_t * xm = &ixge_main;
2724 vlib_main_t * vm = xm->vlib_main;
2726 vlib_node_set_interrupt_pending (vm, ixge_input_node.index);
2728 /* Let node know which device is interrupting. */
2730 vlib_node_runtime_t * rt = vlib_node_get_runtime (vm, ixge_input_node.index);
2731 rt->runtime_data[0] |= 1 << dev->private_data;
2735 static clib_error_t *
2736 ixge_pci_init (vlib_main_t * vm, vlib_pci_device_t * dev)
2738 ixge_main_t * xm = &ixge_main;
2739 clib_error_t * error;
2743 /* Device found: make sure we have dma memory. */
2744 if (unix_physmem_is_fake (vm))
2745 return clib_error_return (0, "no physical memory available");
2747 error = vlib_pci_map_resource (dev, 0, &r);
2751 vec_add2 (xm->devices, xd, 1);
2753 if (vec_len (xm->devices) == 1)
2755 ixge_input_node.function = ixge_input_multiarch_select();
2756 vlib_register_node (vm, &ixge_input_node);
2759 xd->pci_device = dev[0];
2760 xd->device_id = xd->pci_device.config0.header.device_id;
2762 xd->device_index = xd - xm->devices;
2763 xd->pci_function = dev->bus_address.function;
2764 xd->per_interface_next_index = ~0;
2767 /* Chip found so enable node. */
2769 vlib_node_set_state (vm, ixge_input_node.index,
2771 ? VLIB_NODE_STATE_POLLING
2772 : VLIB_NODE_STATE_INTERRUPT));
2774 dev->private_data = xd->device_index;
2777 if (vec_len (xm->devices) == 1)
2779 vlib_register_node (vm, &ixge_process_node);
2780 xm->process_node_index = ixge_process_node.index;
2783 error = vlib_pci_bus_master_enable(dev);
2788 return vlib_pci_intr_enable(dev);
2791 PCI_REGISTER_DEVICE (ixge_pci_device_registration,static) = {
2792 .init_function = ixge_pci_init,
2793 .interrupt_handler = ixge_pci_intr_handler,
2794 .supported_devices = {
2795 #define _(t,i) { .vendor_id = PCI_VENDOR_ID_INTEL, .device_id = i, },
2796 foreach_ixge_pci_device_id
2802 void ixge_set_next_node (ixge_rx_next_t next, char *name)
2804 vlib_node_registration_t *r = &ixge_input_node;
2808 case IXGE_RX_NEXT_IP4_INPUT:
2809 case IXGE_RX_NEXT_IP6_INPUT:
2810 case IXGE_RX_NEXT_ETHERNET_INPUT:
2811 r->next_nodes[next] = name;
2815 clib_warning ("%s: illegal next %d\n", __FUNCTION__, next);