2 * Copyright (c) 2016 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
18 * This driver is not intended for production use and it is unsupported.
19 * It is provided for educational use only.
20 * Please use supported DPDK driver instead.
23 #if __x86_64__ || __i386__
24 #include <vppinfra/vector.h>
26 #ifndef CLIB_HAVE_VEC128
27 #warning HACK: ixge driver wont really work, missing u32x4
28 typedef unsigned long long u32x4;
31 #include <vlib/vlib.h>
32 #include <vlib/unix/unix.h>
33 #include <vlib/pci/pci.h>
34 #include <vnet/vnet.h>
35 #include <ixge/ixge.h>
36 #include <vnet/ethernet/ethernet.h>
37 #include <vnet/plugin/plugin.h>
38 #include <vpp/app/version.h>
40 #define IXGE_ALWAYS_POLL 0
42 #define EVENT_SET_FLAGS 0
43 #define IXGE_HWBP_RACE_ELOG 0
45 #define PCI_VENDOR_ID_INTEL 0x8086
47 /* 10 GIG E (XGE) PHY IEEE 802.3 clause 45 definitions. */
48 #define XGE_PHY_DEV_TYPE_PMA_PMD 1
49 #define XGE_PHY_DEV_TYPE_PHY_XS 4
50 #define XGE_PHY_ID1 0x2
51 #define XGE_PHY_ID2 0x3
52 #define XGE_PHY_CONTROL 0x0
53 #define XGE_PHY_CONTROL_RESET (1 << 15)
55 ixge_main_t ixge_main;
56 static vlib_node_registration_t ixge_input_node;
57 static vlib_node_registration_t ixge_process_node;
60 ixge_semaphore_get (ixge_device_t * xd)
62 ixge_main_t *xm = &ixge_main;
63 vlib_main_t *vm = xm->vlib_main;
64 ixge_regs_t *r = xd->regs;
68 while (!(r->software_semaphore & (1 << 0)))
71 vlib_process_suspend (vm, 100e-6);
76 r->software_semaphore |= 1 << 1;
78 while (!(r->software_semaphore & (1 << 1)));
82 ixge_semaphore_release (ixge_device_t * xd)
84 ixge_regs_t *r = xd->regs;
85 r->software_semaphore &= ~3;
89 ixge_software_firmware_sync (ixge_device_t * xd, u32 sw_mask)
91 ixge_main_t *xm = &ixge_main;
92 vlib_main_t *vm = xm->vlib_main;
93 ixge_regs_t *r = xd->regs;
94 u32 fw_mask = sw_mask << 5;
99 ixge_semaphore_get (xd);
100 m = r->software_firmware_sync;
101 done = (m & fw_mask) == 0;
103 r->software_firmware_sync = m | sw_mask;
104 ixge_semaphore_release (xd);
106 vlib_process_suspend (vm, 10e-3);
111 ixge_software_firmware_sync_release (ixge_device_t * xd, u32 sw_mask)
113 ixge_regs_t *r = xd->regs;
114 ixge_semaphore_get (xd);
115 r->software_firmware_sync &= ~sw_mask;
116 ixge_semaphore_release (xd);
120 ixge_read_write_phy_reg (ixge_device_t * xd, u32 dev_type, u32 reg_index,
123 ixge_regs_t *r = xd->regs;
124 const u32 busy_bit = 1 << 30;
127 ASSERT (xd->phy_index < 2);
128 ixge_software_firmware_sync (xd, 1 << (1 + xd->phy_index));
130 ASSERT (reg_index < (1 << 16));
131 ASSERT (dev_type < (1 << 5));
133 r->xge_mac.phy_data = v;
137 reg_index | (dev_type << 16) | (xd->
138 phys[xd->phy_index].mdio_address << 21);
139 r->xge_mac.phy_command = x | busy_bit;
140 /* Busy wait timed to take 28e-6 secs. No suspend. */
141 while (r->xge_mac.phy_command & busy_bit)
144 r->xge_mac.phy_command = x | ((is_read ? 2 : 1) << 26) | busy_bit;
145 while (r->xge_mac.phy_command & busy_bit)
149 v = r->xge_mac.phy_data >> 16;
151 ixge_software_firmware_sync_release (xd, 1 << (1 + xd->phy_index));
157 ixge_read_phy_reg (ixge_device_t * xd, u32 dev_type, u32 reg_index)
159 return ixge_read_write_phy_reg (xd, dev_type, reg_index, 0, /* is_read */
164 ixge_write_phy_reg (ixge_device_t * xd, u32 dev_type, u32 reg_index, u32 v)
166 (void) ixge_read_write_phy_reg (xd, dev_type, reg_index, v, /* is_read */
171 ixge_i2c_put_bits (i2c_bus_t * b, int scl, int sda)
173 ixge_main_t *xm = &ixge_main;
174 ixge_device_t *xd = vec_elt_at_index (xm->devices, b->private_data);
178 v |= (sda != 0) << 3;
179 v |= (scl != 0) << 1;
180 xd->regs->i2c_control = v;
184 ixge_i2c_get_bits (i2c_bus_t * b, int *scl, int *sda)
186 ixge_main_t *xm = &ixge_main;
187 ixge_device_t *xd = vec_elt_at_index (xm->devices, b->private_data);
190 v = xd->regs->i2c_control;
191 *sda = (v & (1 << 2)) != 0;
192 *scl = (v & (1 << 0)) != 0;
196 ixge_read_eeprom (ixge_device_t * xd, u32 address)
198 ixge_regs_t *r = xd->regs;
200 r->eeprom_read = (( /* start bit */ (1 << 0)) | (address << 2));
201 /* Wait for done bit. */
202 while (!((v = r->eeprom_read) & (1 << 1)))
208 ixge_sfp_enable_disable_laser (ixge_device_t * xd, uword enable)
210 u32 tx_disable_bit = 1 << 3;
212 xd->regs->sdp_control &= ~tx_disable_bit;
214 xd->regs->sdp_control |= tx_disable_bit;
218 ixge_sfp_enable_disable_10g (ixge_device_t * xd, uword enable)
220 u32 is_10g_bit = 1 << 5;
222 xd->regs->sdp_control |= is_10g_bit;
224 xd->regs->sdp_control &= ~is_10g_bit;
227 static clib_error_t *
228 ixge_sfp_phy_init_from_eeprom (ixge_device_t * xd, u16 sfp_type)
230 u16 a, id, reg_values_addr = 0;
232 a = ixge_read_eeprom (xd, 0x2b);
233 if (a == 0 || a == 0xffff)
234 return clib_error_create ("no init sequence in eeprom");
238 id = ixge_read_eeprom (xd, ++a);
241 reg_values_addr = ixge_read_eeprom (xd, ++a);
246 return clib_error_create ("failed to find id 0x%x", sfp_type);
248 ixge_software_firmware_sync (xd, 1 << 3);
251 u16 v = ixge_read_eeprom (xd, ++reg_values_addr);
254 xd->regs->core_analog_config = v;
256 ixge_software_firmware_sync_release (xd, 1 << 3);
258 /* Make sure laser is off. We'll turn on the laser when
259 the interface is brought up. */
260 ixge_sfp_enable_disable_laser (xd, /* enable */ 0);
261 ixge_sfp_enable_disable_10g (xd, /* is_10g */ 1);
267 ixge_sfp_device_up_down (ixge_device_t * xd, uword is_up)
273 /* pma/pmd 10g serial SFI. */
274 xd->regs->xge_mac.auto_negotiation_control2 &= ~(3 << 16);
275 xd->regs->xge_mac.auto_negotiation_control2 |= 2 << 16;
277 v = xd->regs->xge_mac.auto_negotiation_control;
280 /* Restart autoneg. */
282 xd->regs->xge_mac.auto_negotiation_control = v;
284 while (!(xd->regs->xge_mac.link_partner_ability[0] & 0xf0000))
287 v = xd->regs->xge_mac.auto_negotiation_control;
289 /* link mode 10g sfi serdes */
293 /* Restart autoneg. */
295 xd->regs->xge_mac.auto_negotiation_control = v;
297 xd->regs->xge_mac.link_status;
300 ixge_sfp_enable_disable_laser (xd, /* enable */ is_up);
302 /* Give time for link partner to notice that we're up. */
303 if (is_up && vlib_in_process_context (vlib_get_main ()))
305 vlib_process_suspend (vlib_get_main (), 300e-3);
309 always_inline ixge_dma_regs_t *
310 get_dma_regs (ixge_device_t * xd, vlib_rx_or_tx_t rt, u32 qi)
312 ixge_regs_t *r = xd->regs;
315 return qi < 64 ? &r->rx_dma0[qi] : &r->rx_dma1[qi - 64];
317 return &r->tx_dma[qi];
320 static clib_error_t *
321 ixge_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
323 vnet_hw_interface_t *hif = vnet_get_hw_interface (vnm, hw_if_index);
324 uword is_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
325 ixge_main_t *xm = &ixge_main;
326 ixge_device_t *xd = vec_elt_at_index (xm->devices, hif->dev_instance);
327 ixge_dma_regs_t *dr = get_dma_regs (xd, VLIB_RX, 0);
331 xd->regs->rx_enable |= 1;
332 xd->regs->tx_dma_control |= 1;
333 dr->control |= 1 << 25;
334 while (!(dr->control & (1 << 25)))
339 xd->regs->rx_enable &= ~1;
340 xd->regs->tx_dma_control &= ~1;
343 ixge_sfp_device_up_down (xd, is_up);
345 return /* no error */ 0;
349 ixge_sfp_phy_init (ixge_device_t * xd)
351 ixge_phy_t *phy = xd->phys + xd->phy_index;
352 i2c_bus_t *ib = &xd->i2c_bus;
354 ib->private_data = xd->device_index;
355 ib->put_bits = ixge_i2c_put_bits;
356 ib->get_bits = ixge_i2c_get_bits;
359 vlib_i2c_read_eeprom (ib, 0x50, 0, 128, (u8 *) & xd->sfp_eeprom);
361 if (vlib_i2c_bus_timed_out (ib) || !sfp_eeprom_is_valid (&xd->sfp_eeprom))
362 xd->sfp_eeprom.id = SFP_ID_unknown;
365 /* FIXME 5 => SR/LR eeprom ID. */
367 ixge_sfp_phy_init_from_eeprom (xd, 5 + xd->pci_function);
369 clib_error_report (e);
372 phy->mdio_address = ~0;
376 ixge_phy_init (ixge_device_t * xd)
378 ixge_main_t *xm = &ixge_main;
379 vlib_main_t *vm = xm->vlib_main;
380 ixge_phy_t *phy = xd->phys + xd->phy_index;
382 switch (xd->device_id)
385 case IXGE_82599_sfp_em:
386 case IXGE_82599_sfp_fcoe:
388 return ixge_sfp_phy_init (xd);
394 /* Probe address of phy. */
398 phy->mdio_address = ~0;
399 for (i = 0; i < 32; i++)
401 phy->mdio_address = i;
402 v = ixge_read_phy_reg (xd, XGE_PHY_DEV_TYPE_PMA_PMD, XGE_PHY_ID1);
403 if (v != 0xffff && v != 0)
413 ((ixge_read_phy_reg (xd, XGE_PHY_DEV_TYPE_PMA_PMD, XGE_PHY_ID1) << 16) |
414 ixge_read_phy_reg (xd, XGE_PHY_DEV_TYPE_PMA_PMD, XGE_PHY_ID2));
417 ELOG_TYPE_DECLARE (e) =
419 .function = (char *) __FUNCTION__,.format =
420 "ixge %d, phy id 0x%d mdio address %d",.format_args = "i4i4i4",};
423 u32 instance, id, address;
425 ed = ELOG_DATA (&vm->elog_main, e);
426 ed->instance = xd->device_index;
428 ed->address = phy->mdio_address;
432 ixge_write_phy_reg (xd, XGE_PHY_DEV_TYPE_PHY_XS, XGE_PHY_CONTROL,
433 XGE_PHY_CONTROL_RESET);
435 /* Wait for self-clearning reset bit to clear. */
438 vlib_process_suspend (vm, 1e-3);
440 while (ixge_read_phy_reg (xd, XGE_PHY_DEV_TYPE_PHY_XS, XGE_PHY_CONTROL) &
441 XGE_PHY_CONTROL_RESET);
445 format_ixge_rx_from_hw_descriptor (u8 * s, va_list * va)
447 ixge_rx_from_hw_descriptor_t *d =
448 va_arg (*va, ixge_rx_from_hw_descriptor_t *);
449 u32 s0 = d->status[0], s2 = d->status[2];
450 u32 is_ip4, is_ip6, is_ip, is_tcp, is_udp;
451 u32 indent = format_get_indent (s);
453 s = format (s, "%s-owned",
454 (s2 & IXGE_RX_DESCRIPTOR_STATUS2_IS_OWNED_BY_SOFTWARE) ? "sw" :
457 format (s, ", length this descriptor %d, l3 offset %d",
458 d->n_packet_bytes_this_descriptor,
459 IXGE_RX_DESCRIPTOR_STATUS0_L3_OFFSET (s0));
460 if (s2 & IXGE_RX_DESCRIPTOR_STATUS2_IS_END_OF_PACKET)
461 s = format (s, ", end-of-packet");
463 s = format (s, "\n%U", format_white_space, indent);
465 if (s2 & IXGE_RX_DESCRIPTOR_STATUS2_ETHERNET_ERROR)
466 s = format (s, "layer2 error");
468 if (s0 & IXGE_RX_DESCRIPTOR_STATUS0_IS_LAYER2)
470 s = format (s, "layer 2 type %d", (s0 & 0x1f));
474 if (s2 & IXGE_RX_DESCRIPTOR_STATUS2_IS_VLAN)
475 s = format (s, "vlan header 0x%x\n%U", d->vlan_tag,
476 format_white_space, indent);
478 if ((is_ip4 = (s0 & IXGE_RX_DESCRIPTOR_STATUS0_IS_IP4)))
480 s = format (s, "ip4%s",
481 (s0 & IXGE_RX_DESCRIPTOR_STATUS0_IS_IP4_EXT) ? " options" :
483 if (s2 & IXGE_RX_DESCRIPTOR_STATUS2_IS_IP4_CHECKSUMMED)
484 s = format (s, " checksum %s",
485 (s2 & IXGE_RX_DESCRIPTOR_STATUS2_IP4_CHECKSUM_ERROR) ?
488 if ((is_ip6 = (s0 & IXGE_RX_DESCRIPTOR_STATUS0_IS_IP6)))
489 s = format (s, "ip6%s",
490 (s0 & IXGE_RX_DESCRIPTOR_STATUS0_IS_IP6_EXT) ? " extended" :
493 if ((is_ip = (is_ip4 | is_ip6)))
495 is_tcp = (s0 & IXGE_RX_DESCRIPTOR_STATUS0_IS_TCP) != 0;
496 is_udp = (s0 & IXGE_RX_DESCRIPTOR_STATUS0_IS_UDP) != 0;
498 s = format (s, ", tcp");
500 s = format (s, ", udp");
503 if (s2 & IXGE_RX_DESCRIPTOR_STATUS2_IS_TCP_CHECKSUMMED)
504 s = format (s, ", tcp checksum %s",
505 (s2 & IXGE_RX_DESCRIPTOR_STATUS2_TCP_CHECKSUM_ERROR) ? "bad" :
507 if (s2 & IXGE_RX_DESCRIPTOR_STATUS2_IS_UDP_CHECKSUMMED)
508 s = format (s, ", udp checksum %s",
509 (s2 & IXGE_RX_DESCRIPTOR_STATUS2_UDP_CHECKSUM_ERROR) ? "bad" :
516 format_ixge_tx_descriptor (u8 * s, va_list * va)
518 ixge_tx_descriptor_t *d = va_arg (*va, ixge_tx_descriptor_t *);
519 u32 s0 = d->status0, s1 = d->status1;
520 u32 indent = format_get_indent (s);
523 s = format (s, "buffer 0x%Lx, %d packet bytes, %d bytes this buffer",
524 d->buffer_address, s1 >> 14, d->n_bytes_this_buffer);
526 s = format (s, "\n%U", format_white_space, indent);
528 if ((v = (s0 >> 0) & 3))
529 s = format (s, "reserved 0x%x, ", v);
531 if ((v = (s0 >> 2) & 3))
532 s = format (s, "mac 0x%x, ", v);
534 if ((v = (s0 >> 4) & 0xf) != 3)
535 s = format (s, "type 0x%x, ", v);
537 s = format (s, "%s%s%s%s%s%s%s%s",
538 (s0 & (1 << 8)) ? "eop, " : "",
539 (s0 & (1 << 9)) ? "insert-fcs, " : "",
540 (s0 & (1 << 10)) ? "reserved26, " : "",
541 (s0 & (1 << 11)) ? "report-status, " : "",
542 (s0 & (1 << 12)) ? "reserved28, " : "",
543 (s0 & (1 << 13)) ? "is-advanced, " : "",
544 (s0 & (1 << 14)) ? "vlan-enable, " : "",
545 (s0 & (1 << 15)) ? "tx-segmentation, " : "");
547 if ((v = s1 & 0xf) != 0)
548 s = format (s, "status 0x%x, ", v);
550 if ((v = (s1 >> 4) & 0xf))
551 s = format (s, "context 0x%x, ", v);
553 if ((v = (s1 >> 8) & 0x3f))
554 s = format (s, "options 0x%x, ", v);
561 ixge_descriptor_t before, after;
569 u8 is_start_of_packet;
571 /* Copy of VLIB buffer; packet data stored in pre_data. */
572 vlib_buffer_t buffer;
573 } ixge_rx_dma_trace_t;
576 format_ixge_rx_dma_trace (u8 * s, va_list * va)
578 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
579 vlib_node_t *node = va_arg (*va, vlib_node_t *);
580 vnet_main_t *vnm = vnet_get_main ();
581 ixge_rx_dma_trace_t *t = va_arg (*va, ixge_rx_dma_trace_t *);
582 ixge_main_t *xm = &ixge_main;
583 ixge_device_t *xd = vec_elt_at_index (xm->devices, t->device_index);
584 format_function_t *f;
585 u32 indent = format_get_indent (s);
588 vnet_sw_interface_t *sw =
589 vnet_get_sw_interface (vnm, xd->vlib_sw_if_index);
591 format (s, "%U rx queue %d", format_vnet_sw_interface_name, vnm, sw,
595 s = format (s, "\n%Ubefore: %U",
596 format_white_space, indent,
597 format_ixge_rx_from_hw_descriptor, &t->before);
598 s = format (s, "\n%Uafter : head/tail address 0x%Lx/0x%Lx",
599 format_white_space, indent,
600 t->after.rx_to_hw.head_address, t->after.rx_to_hw.tail_address);
602 s = format (s, "\n%Ubuffer 0x%x: %U",
603 format_white_space, indent,
604 t->buffer_index, format_vnet_buffer, &t->buffer);
606 s = format (s, "\n%U", format_white_space, indent);
608 f = node->format_buffer;
609 if (!f || !t->is_start_of_packet)
610 f = format_hex_bytes;
611 s = format (s, "%U", f, t->buffer.pre_data, sizeof (t->buffer.pre_data));
616 #define foreach_ixge_error \
617 _ (none, "no error") \
618 _ (tx_full_drops, "tx ring full drops") \
619 _ (ip4_checksum_error, "ip4 checksum errors") \
620 _ (rx_alloc_fail, "rx buf alloc from free list failed") \
621 _ (rx_alloc_no_physmem, "rx buf alloc failed no physmem")
625 #define _(f,s) IXGE_ERROR_##f,
632 ixge_rx_next_and_error_from_status_x1 (ixge_device_t * xd,
634 u8 * next0, u8 * error0, u32 * flags0)
636 u8 is0_ip4, is0_ip6, n0, e0;
639 e0 = IXGE_ERROR_none;
640 n0 = IXGE_RX_NEXT_ETHERNET_INPUT;
642 is0_ip4 = s02 & IXGE_RX_DESCRIPTOR_STATUS2_IS_IP4_CHECKSUMMED;
643 n0 = is0_ip4 ? IXGE_RX_NEXT_IP4_INPUT : n0;
645 e0 = (is0_ip4 && (s02 & IXGE_RX_DESCRIPTOR_STATUS2_IP4_CHECKSUM_ERROR)
646 ? IXGE_ERROR_ip4_checksum_error : e0);
648 is0_ip6 = s00 & IXGE_RX_DESCRIPTOR_STATUS0_IS_IP6;
649 n0 = is0_ip6 ? IXGE_RX_NEXT_IP6_INPUT : n0;
651 n0 = (xd->per_interface_next_index != ~0) ?
652 xd->per_interface_next_index : n0;
654 /* Check for error. */
655 n0 = e0 != IXGE_ERROR_none ? IXGE_RX_NEXT_DROP : n0;
657 f0 = ((s02 & (IXGE_RX_DESCRIPTOR_STATUS2_IS_TCP_CHECKSUMMED
658 | IXGE_RX_DESCRIPTOR_STATUS2_IS_UDP_CHECKSUMMED))
659 ? VNET_BUFFER_F_L4_CHECKSUM_COMPUTED : 0);
661 f0 |= ((s02 & (IXGE_RX_DESCRIPTOR_STATUS2_TCP_CHECKSUM_ERROR
662 | IXGE_RX_DESCRIPTOR_STATUS2_UDP_CHECKSUM_ERROR))
663 ? 0 : VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
671 ixge_rx_next_and_error_from_status_x2 (ixge_device_t * xd,
674 u8 * next0, u8 * error0, u32 * flags0,
675 u8 * next1, u8 * error1, u32 * flags1)
677 u8 is0_ip4, is0_ip6, n0, e0;
678 u8 is1_ip4, is1_ip6, n1, e1;
681 e0 = e1 = IXGE_ERROR_none;
682 n0 = n1 = IXGE_RX_NEXT_IP4_INPUT;
684 is0_ip4 = s02 & IXGE_RX_DESCRIPTOR_STATUS2_IS_IP4_CHECKSUMMED;
685 is1_ip4 = s12 & IXGE_RX_DESCRIPTOR_STATUS2_IS_IP4_CHECKSUMMED;
687 n0 = is0_ip4 ? IXGE_RX_NEXT_IP4_INPUT : n0;
688 n1 = is1_ip4 ? IXGE_RX_NEXT_IP4_INPUT : n1;
690 e0 = (is0_ip4 && (s02 & IXGE_RX_DESCRIPTOR_STATUS2_IP4_CHECKSUM_ERROR)
691 ? IXGE_ERROR_ip4_checksum_error : e0);
692 e1 = (is1_ip4 && (s12 & IXGE_RX_DESCRIPTOR_STATUS2_IP4_CHECKSUM_ERROR)
693 ? IXGE_ERROR_ip4_checksum_error : e1);
695 is0_ip6 = s00 & IXGE_RX_DESCRIPTOR_STATUS0_IS_IP6;
696 is1_ip6 = s10 & IXGE_RX_DESCRIPTOR_STATUS0_IS_IP6;
698 n0 = is0_ip6 ? IXGE_RX_NEXT_IP6_INPUT : n0;
699 n1 = is1_ip6 ? IXGE_RX_NEXT_IP6_INPUT : n1;
701 n0 = (xd->per_interface_next_index != ~0) ?
702 xd->per_interface_next_index : n0;
703 n1 = (xd->per_interface_next_index != ~0) ?
704 xd->per_interface_next_index : n1;
706 /* Check for error. */
707 n0 = e0 != IXGE_ERROR_none ? IXGE_RX_NEXT_DROP : n0;
708 n1 = e1 != IXGE_ERROR_none ? IXGE_RX_NEXT_DROP : n1;
716 f0 = ((s02 & (IXGE_RX_DESCRIPTOR_STATUS2_IS_TCP_CHECKSUMMED
717 | IXGE_RX_DESCRIPTOR_STATUS2_IS_UDP_CHECKSUMMED))
718 ? VNET_BUFFER_F_L4_CHECKSUM_COMPUTED : 0);
719 f1 = ((s12 & (IXGE_RX_DESCRIPTOR_STATUS2_IS_TCP_CHECKSUMMED
720 | IXGE_RX_DESCRIPTOR_STATUS2_IS_UDP_CHECKSUMMED))
721 ? VNET_BUFFER_F_L4_CHECKSUM_COMPUTED : 0);
723 f0 |= ((s02 & (IXGE_RX_DESCRIPTOR_STATUS2_TCP_CHECKSUM_ERROR
724 | IXGE_RX_DESCRIPTOR_STATUS2_UDP_CHECKSUM_ERROR))
725 ? 0 : VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
726 f1 |= ((s12 & (IXGE_RX_DESCRIPTOR_STATUS2_TCP_CHECKSUM_ERROR
727 | IXGE_RX_DESCRIPTOR_STATUS2_UDP_CHECKSUM_ERROR))
728 ? 0 : VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
735 ixge_rx_trace (ixge_main_t * xm,
737 ixge_dma_queue_t * dq,
738 ixge_descriptor_t * before_descriptors,
739 u32 * before_buffers,
740 ixge_descriptor_t * after_descriptors, uword n_descriptors)
742 vlib_main_t *vm = xm->vlib_main;
743 vlib_node_runtime_t *node = dq->rx.node;
744 ixge_rx_from_hw_descriptor_t *bd;
745 ixge_rx_to_hw_descriptor_t *ad;
746 u32 *b, n_left, is_sop, next_index_sop;
748 n_left = n_descriptors;
750 bd = &before_descriptors->rx_from_hw;
751 ad = &after_descriptors->rx_to_hw;
752 is_sop = dq->rx.is_start_of_packet;
753 next_index_sop = dq->rx.saved_start_of_packet_next_index;
757 u32 bi0, bi1, flags0, flags1;
758 vlib_buffer_t *b0, *b1;
759 ixge_rx_dma_trace_t *t0, *t1;
760 u8 next0, error0, next1, error1;
766 b0 = vlib_get_buffer (vm, bi0);
767 b1 = vlib_get_buffer (vm, bi1);
769 ixge_rx_next_and_error_from_status_x2 (xd,
770 bd[0].status[0], bd[0].status[2],
771 bd[1].status[0], bd[1].status[2],
772 &next0, &error0, &flags0,
773 &next1, &error1, &flags1);
775 next_index_sop = is_sop ? next0 : next_index_sop;
776 vlib_trace_buffer (vm, node, next_index_sop, b0, /* follow_chain */ 0);
777 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
778 t0->is_start_of_packet = is_sop;
779 is_sop = (b0->flags & VLIB_BUFFER_NEXT_PRESENT) == 0;
781 next_index_sop = is_sop ? next1 : next_index_sop;
782 vlib_trace_buffer (vm, node, next_index_sop, b1, /* follow_chain */ 0);
783 t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
784 t1->is_start_of_packet = is_sop;
785 is_sop = (b1->flags & VLIB_BUFFER_NEXT_PRESENT) == 0;
787 t0->queue_index = dq->queue_index;
788 t1->queue_index = dq->queue_index;
789 t0->device_index = xd->device_index;
790 t1->device_index = xd->device_index;
791 t0->before.rx_from_hw = bd[0];
792 t1->before.rx_from_hw = bd[1];
793 t0->after.rx_to_hw = ad[0];
794 t1->after.rx_to_hw = ad[1];
795 t0->buffer_index = bi0;
796 t1->buffer_index = bi1;
797 memcpy (&t0->buffer, b0, sizeof (b0[0]) - sizeof (b0->pre_data));
798 memcpy (&t1->buffer, b1, sizeof (b1[0]) - sizeof (b0->pre_data));
799 memcpy (t0->buffer.pre_data, b0->data + b0->current_data,
800 sizeof (t0->buffer.pre_data));
801 memcpy (t1->buffer.pre_data, b1->data + b1->current_data,
802 sizeof (t1->buffer.pre_data));
813 ixge_rx_dma_trace_t *t0;
819 b0 = vlib_get_buffer (vm, bi0);
821 ixge_rx_next_and_error_from_status_x1 (xd,
822 bd[0].status[0], bd[0].status[2],
823 &next0, &error0, &flags0);
825 next_index_sop = is_sop ? next0 : next_index_sop;
826 vlib_trace_buffer (vm, node, next_index_sop, b0, /* follow_chain */ 0);
827 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
828 t0->is_start_of_packet = is_sop;
829 is_sop = (b0->flags & VLIB_BUFFER_NEXT_PRESENT) == 0;
831 t0->queue_index = dq->queue_index;
832 t0->device_index = xd->device_index;
833 t0->before.rx_from_hw = bd[0];
834 t0->after.rx_to_hw = ad[0];
835 t0->buffer_index = bi0;
836 memcpy (&t0->buffer, b0, sizeof (b0[0]) - sizeof (b0->pre_data));
837 memcpy (t0->buffer.pre_data, b0->data + b0->current_data,
838 sizeof (t0->buffer.pre_data));
848 ixge_tx_descriptor_t descriptor;
856 u8 is_start_of_packet;
858 /* Copy of VLIB buffer; packet data stored in pre_data. */
859 vlib_buffer_t buffer;
860 } ixge_tx_dma_trace_t;
863 format_ixge_tx_dma_trace (u8 * s, va_list * va)
865 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
866 CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
867 ixge_tx_dma_trace_t *t = va_arg (*va, ixge_tx_dma_trace_t *);
868 vnet_main_t *vnm = vnet_get_main ();
869 ixge_main_t *xm = &ixge_main;
870 ixge_device_t *xd = vec_elt_at_index (xm->devices, t->device_index);
871 format_function_t *f;
872 u32 indent = format_get_indent (s);
875 vnet_sw_interface_t *sw =
876 vnet_get_sw_interface (vnm, xd->vlib_sw_if_index);
878 format (s, "%U tx queue %d", format_vnet_sw_interface_name, vnm, sw,
882 s = format (s, "\n%Udescriptor: %U",
883 format_white_space, indent,
884 format_ixge_tx_descriptor, &t->descriptor);
886 s = format (s, "\n%Ubuffer 0x%x: %U",
887 format_white_space, indent,
888 t->buffer_index, format_vnet_buffer, &t->buffer);
890 s = format (s, "\n%U", format_white_space, indent);
892 f = format_ethernet_header_with_length;
893 if (!f || !t->is_start_of_packet)
894 f = format_hex_bytes;
895 s = format (s, "%U", f, t->buffer.pre_data, sizeof (t->buffer.pre_data));
902 vlib_node_runtime_t *node;
904 u32 is_start_of_packet;
906 u32 n_bytes_in_packet;
908 ixge_tx_descriptor_t *start_of_packet_descriptor;
912 ixge_tx_trace (ixge_main_t * xm,
914 ixge_dma_queue_t * dq,
915 ixge_tx_state_t * tx_state,
916 ixge_tx_descriptor_t * descriptors,
917 u32 * buffers, uword n_descriptors)
919 vlib_main_t *vm = xm->vlib_main;
920 vlib_node_runtime_t *node = tx_state->node;
921 ixge_tx_descriptor_t *d;
922 u32 *b, n_left, is_sop;
924 n_left = n_descriptors;
927 is_sop = tx_state->is_start_of_packet;
932 vlib_buffer_t *b0, *b1;
933 ixge_tx_dma_trace_t *t0, *t1;
939 b0 = vlib_get_buffer (vm, bi0);
940 b1 = vlib_get_buffer (vm, bi1);
942 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
943 t0->is_start_of_packet = is_sop;
944 is_sop = (b0->flags & VLIB_BUFFER_NEXT_PRESENT) == 0;
946 t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
947 t1->is_start_of_packet = is_sop;
948 is_sop = (b1->flags & VLIB_BUFFER_NEXT_PRESENT) == 0;
950 t0->queue_index = dq->queue_index;
951 t1->queue_index = dq->queue_index;
952 t0->device_index = xd->device_index;
953 t1->device_index = xd->device_index;
954 t0->descriptor = d[0];
955 t1->descriptor = d[1];
956 t0->buffer_index = bi0;
957 t1->buffer_index = bi1;
958 memcpy (&t0->buffer, b0, sizeof (b0[0]) - sizeof (b0->pre_data));
959 memcpy (&t1->buffer, b1, sizeof (b1[0]) - sizeof (b0->pre_data));
960 memcpy (t0->buffer.pre_data, b0->data + b0->current_data,
961 sizeof (t0->buffer.pre_data));
962 memcpy (t1->buffer.pre_data, b1->data + b1->current_data,
963 sizeof (t1->buffer.pre_data));
973 ixge_tx_dma_trace_t *t0;
978 b0 = vlib_get_buffer (vm, bi0);
980 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
981 t0->is_start_of_packet = is_sop;
982 is_sop = (b0->flags & VLIB_BUFFER_NEXT_PRESENT) == 0;
984 t0->queue_index = dq->queue_index;
985 t0->device_index = xd->device_index;
986 t0->descriptor = d[0];
987 t0->buffer_index = bi0;
988 memcpy (&t0->buffer, b0, sizeof (b0[0]) - sizeof (b0->pre_data));
989 memcpy (t0->buffer.pre_data, b0->data + b0->current_data,
990 sizeof (t0->buffer.pre_data));
998 ixge_ring_sub (ixge_dma_queue_t * q, u32 i0, u32 i1)
1001 ASSERT (i0 < q->n_descriptors);
1002 ASSERT (i1 < q->n_descriptors);
1003 return d < 0 ? q->n_descriptors + d : d;
1007 ixge_ring_add (ixge_dma_queue_t * q, u32 i0, u32 i1)
1010 ASSERT (i0 < q->n_descriptors);
1011 ASSERT (i1 < q->n_descriptors);
1012 d -= d >= q->n_descriptors ? q->n_descriptors : 0;
1017 ixge_tx_descriptor_matches_template (ixge_main_t * xm,
1018 ixge_tx_descriptor_t * d)
1022 cmp = ((d->status0 & xm->tx_descriptor_template_mask.status0)
1023 ^ xm->tx_descriptor_template.status0);
1026 cmp = ((d->status1 & xm->tx_descriptor_template_mask.status1)
1027 ^ xm->tx_descriptor_template.status1);
1035 ixge_tx_no_wrap (ixge_main_t * xm,
1037 ixge_dma_queue_t * dq,
1039 u32 start_descriptor_index,
1040 u32 n_descriptors, ixge_tx_state_t * tx_state)
1042 vlib_main_t *vm = xm->vlib_main;
1043 ixge_tx_descriptor_t *d, *d_sop;
1044 u32 n_left = n_descriptors;
1045 u32 *to_free = vec_end (xm->tx_buffers_pending_free);
1047 vec_elt_at_index (dq->descriptor_buffer_indices, start_descriptor_index);
1048 u32 is_sop = tx_state->is_start_of_packet;
1049 u32 len_sop = tx_state->n_bytes_in_packet;
1050 u16 template_status = xm->tx_descriptor_template.status0;
1051 u32 descriptor_prefetch_rotor = 0;
1053 ASSERT (start_descriptor_index + n_descriptors <= dq->n_descriptors);
1054 d = &dq->descriptors[start_descriptor_index].tx;
1055 d_sop = is_sop ? d : tx_state->start_of_packet_descriptor;
1059 vlib_buffer_t *b0, *b1;
1062 u8 is_eop0, is_eop1;
1064 /* Prefetch next iteration. */
1065 vlib_prefetch_buffer_with_index (vm, buffers[2], LOAD);
1066 vlib_prefetch_buffer_with_index (vm, buffers[3], LOAD);
1068 if ((descriptor_prefetch_rotor & 0x3) == 0)
1069 CLIB_PREFETCH (d + 4, CLIB_CACHE_LINE_BYTES, STORE);
1071 descriptor_prefetch_rotor += 2;
1076 to_free[0] = fi0 = to_tx[0];
1078 to_free += fi0 != 0;
1080 to_free[0] = fi1 = to_tx[1];
1082 to_free += fi1 != 0;
1088 b0 = vlib_get_buffer (vm, bi0);
1089 b1 = vlib_get_buffer (vm, bi1);
1091 is_eop0 = (b0->flags & VLIB_BUFFER_NEXT_PRESENT) == 0;
1092 is_eop1 = (b1->flags & VLIB_BUFFER_NEXT_PRESENT) == 0;
1094 len0 = b0->current_length;
1095 len1 = b1->current_length;
1097 ASSERT (ixge_tx_descriptor_matches_template (xm, d + 0));
1098 ASSERT (ixge_tx_descriptor_matches_template (xm, d + 1));
1100 d[0].buffer_address =
1101 vlib_get_buffer_data_physical_address (vm, bi0) + b0->current_data;
1102 d[1].buffer_address =
1103 vlib_get_buffer_data_physical_address (vm, bi1) + b1->current_data;
1105 d[0].n_bytes_this_buffer = len0;
1106 d[1].n_bytes_this_buffer = len1;
1109 template_status | (is_eop0 <<
1110 IXGE_TX_DESCRIPTOR_STATUS0_LOG2_IS_END_OF_PACKET);
1112 template_status | (is_eop1 <<
1113 IXGE_TX_DESCRIPTOR_STATUS0_LOG2_IS_END_OF_PACKET);
1115 len_sop = (is_sop ? 0 : len_sop) + len0;
1117 IXGE_TX_DESCRIPTOR_STATUS1_N_BYTES_IN_PACKET (len_sop);
1119 d_sop = is_eop0 ? d : d_sop;
1123 len_sop = (is_sop ? 0 : len_sop) + len1;
1125 IXGE_TX_DESCRIPTOR_STATUS1_N_BYTES_IN_PACKET (len_sop);
1127 d_sop = is_eop1 ? d : d_sop;
1140 to_free[0] = fi0 = to_tx[0];
1142 to_free += fi0 != 0;
1148 b0 = vlib_get_buffer (vm, bi0);
1150 is_eop0 = (b0->flags & VLIB_BUFFER_NEXT_PRESENT) == 0;
1152 len0 = b0->current_length;
1154 ASSERT (ixge_tx_descriptor_matches_template (xm, d + 0));
1156 d[0].buffer_address =
1157 vlib_get_buffer_data_physical_address (vm, bi0) + b0->current_data;
1159 d[0].n_bytes_this_buffer = len0;
1162 template_status | (is_eop0 <<
1163 IXGE_TX_DESCRIPTOR_STATUS0_LOG2_IS_END_OF_PACKET);
1165 len_sop = (is_sop ? 0 : len_sop) + len0;
1167 IXGE_TX_DESCRIPTOR_STATUS1_N_BYTES_IN_PACKET (len_sop);
1169 d_sop = is_eop0 ? d : d_sop;
1174 if (tx_state->node->flags & VLIB_NODE_FLAG_TRACE)
1177 vec_elt_at_index (dq->descriptor_buffer_indices,
1178 start_descriptor_index);
1179 ixge_tx_trace (xm, xd, dq, tx_state,
1180 &dq->descriptors[start_descriptor_index].tx, to_tx,
1184 _vec_len (xm->tx_buffers_pending_free) =
1185 to_free - xm->tx_buffers_pending_free;
1187 /* When we are done d_sop can point to end of ring. Wrap it if so. */
1189 ixge_tx_descriptor_t *d_start = &dq->descriptors[0].tx;
1191 ASSERT (d_sop - d_start <= dq->n_descriptors);
1192 d_sop = d_sop - d_start == dq->n_descriptors ? d_start : d_sop;
1195 tx_state->is_start_of_packet = is_sop;
1196 tx_state->start_of_packet_descriptor = d_sop;
1197 tx_state->n_bytes_in_packet = len_sop;
1199 return n_descriptors;
1203 ixge_interface_tx (vlib_main_t * vm,
1204 vlib_node_runtime_t * node, vlib_frame_t * f)
1206 ixge_main_t *xm = &ixge_main;
1207 vnet_interface_output_runtime_t *rd = (void *) node->runtime_data;
1208 ixge_device_t *xd = vec_elt_at_index (xm->devices, rd->dev_instance);
1209 ixge_dma_queue_t *dq;
1210 u32 *from, n_left_tx, n_descriptors_to_tx, n_tail_drop;
1211 u32 queue_index = 0; /* fixme parameter */
1212 ixge_tx_state_t tx_state;
1214 tx_state.node = node;
1215 tx_state.is_start_of_packet = 1;
1216 tx_state.start_of_packet_descriptor = 0;
1217 tx_state.n_bytes_in_packet = 0;
1219 from = vlib_frame_vector_args (f);
1221 dq = vec_elt_at_index (xd->dma_queues[VLIB_TX], queue_index);
1223 dq->head_index = dq->tx.head_index_write_back[0];
1225 /* Since head == tail means ring is empty we can send up to dq->n_descriptors - 1. */
1226 n_left_tx = dq->n_descriptors - 1;
1227 n_left_tx -= ixge_ring_sub (dq, dq->head_index, dq->tail_index);
1229 _vec_len (xm->tx_buffers_pending_free) = 0;
1231 n_descriptors_to_tx = f->n_vectors;
1233 if (PREDICT_FALSE (n_descriptors_to_tx > n_left_tx))
1235 i32 i, n_ok, i_eop, i_sop;
1238 for (i = n_left_tx - 1; i >= 0; i--)
1240 vlib_buffer_t *b = vlib_get_buffer (vm, from[i]);
1241 if (!(b->flags & VLIB_BUFFER_NEXT_PRESENT))
1243 if (i_sop != ~0 && i_eop != ~0)
1255 ELOG_TYPE_DECLARE (e) =
1257 .function = (char *) __FUNCTION__,.format =
1258 "ixge %d, ring full to tx %d head %d tail %d",.format_args =
1262 u16 instance, to_tx, head, tail;
1264 ed = ELOG_DATA (&vm->elog_main, e);
1265 ed->instance = xd->device_index;
1266 ed->to_tx = n_descriptors_to_tx;
1267 ed->head = dq->head_index;
1268 ed->tail = dq->tail_index;
1271 if (n_ok < n_descriptors_to_tx)
1273 n_tail_drop = n_descriptors_to_tx - n_ok;
1274 vec_add (xm->tx_buffers_pending_free, from + n_ok, n_tail_drop);
1275 vlib_error_count (vm, ixge_input_node.index,
1276 IXGE_ERROR_tx_full_drops, n_tail_drop);
1279 n_descriptors_to_tx = n_ok;
1282 dq->tx.n_buffers_on_ring += n_descriptors_to_tx;
1284 /* Process from tail to end of descriptor ring. */
1285 if (n_descriptors_to_tx > 0 && dq->tail_index < dq->n_descriptors)
1288 clib_min (dq->n_descriptors - dq->tail_index, n_descriptors_to_tx);
1289 n = ixge_tx_no_wrap (xm, xd, dq, from, dq->tail_index, n, &tx_state);
1291 n_descriptors_to_tx -= n;
1292 dq->tail_index += n;
1293 ASSERT (dq->tail_index <= dq->n_descriptors);
1294 if (dq->tail_index == dq->n_descriptors)
1298 if (n_descriptors_to_tx > 0)
1301 ixge_tx_no_wrap (xm, xd, dq, from, 0, n_descriptors_to_tx, &tx_state);
1303 ASSERT (n == n_descriptors_to_tx);
1304 dq->tail_index += n;
1305 ASSERT (dq->tail_index <= dq->n_descriptors);
1306 if (dq->tail_index == dq->n_descriptors)
1310 /* We should only get full packets. */
1311 ASSERT (tx_state.is_start_of_packet);
1313 /* Report status when last descriptor is done. */
1315 u32 i = dq->tail_index == 0 ? dq->n_descriptors - 1 : dq->tail_index - 1;
1316 ixge_tx_descriptor_t *d = &dq->descriptors[i].tx;
1317 d->status0 |= IXGE_TX_DESCRIPTOR_STATUS0_REPORT_STATUS;
1320 /* Give new descriptors to hardware. */
1322 ixge_dma_regs_t *dr = get_dma_regs (xd, VLIB_TX, queue_index);
1324 CLIB_MEMORY_BARRIER ();
1326 dr->tail_index = dq->tail_index;
1329 /* Free any buffers that are done. */
1331 u32 n = _vec_len (xm->tx_buffers_pending_free);
1334 vlib_buffer_free_no_next (vm, xm->tx_buffers_pending_free, n);
1335 _vec_len (xm->tx_buffers_pending_free) = 0;
1336 ASSERT (dq->tx.n_buffers_on_ring >= n);
1337 dq->tx.n_buffers_on_ring -= (n - n_tail_drop);
1341 return f->n_vectors;
1345 ixge_rx_queue_no_wrap (ixge_main_t * xm,
1347 ixge_dma_queue_t * dq,
1348 u32 start_descriptor_index, u32 n_descriptors)
1350 vlib_main_t *vm = xm->vlib_main;
1351 vlib_node_runtime_t *node = dq->rx.node;
1352 ixge_descriptor_t *d;
1353 static ixge_descriptor_t *d_trace_save;
1354 static u32 *d_trace_buffers;
1355 u32 n_descriptors_left = n_descriptors;
1357 vec_elt_at_index (dq->descriptor_buffer_indices, start_descriptor_index);
1359 u32 bi_sop = dq->rx.saved_start_of_packet_buffer_index;
1360 u32 bi_last = dq->rx.saved_last_buffer_index;
1361 u32 next_index_sop = dq->rx.saved_start_of_packet_next_index;
1362 u32 is_sop = dq->rx.is_start_of_packet;
1363 u32 next_index, n_left_to_next, *to_next;
1366 u32 n_trace = vlib_get_trace_count (vm, node);
1367 vlib_buffer_t *b_last, b_dummy;
1369 ASSERT (start_descriptor_index + n_descriptors <= dq->n_descriptors);
1370 d = &dq->descriptors[start_descriptor_index];
1372 b_last = bi_last != ~0 ? vlib_get_buffer (vm, bi_last) : &b_dummy;
1373 next_index = dq->rx.next_index;
1377 u32 n = clib_min (n_trace, n_descriptors);
1380 _vec_len (d_trace_save) = 0;
1381 _vec_len (d_trace_buffers) = 0;
1383 vec_add (d_trace_save, (ixge_descriptor_t *) d, n);
1384 vec_add (d_trace_buffers, to_rx, n);
1388 uword l = vec_len (xm->rx_buffers_to_add);
1390 if (l < n_descriptors_left)
1392 u32 n_to_alloc = 2 * dq->n_descriptors - l;
1395 vec_resize (xm->rx_buffers_to_add, n_to_alloc);
1397 _vec_len (xm->rx_buffers_to_add) = l;
1399 vlib_buffer_alloc (vm, xm->rx_buffers_to_add + l, n_to_alloc);
1400 _vec_len (xm->rx_buffers_to_add) += n_allocated;
1402 /* Handle transient allocation failure */
1403 if (PREDICT_FALSE (l + n_allocated <= n_descriptors_left))
1405 if (n_allocated == 0)
1406 vlib_error_count (vm, ixge_input_node.index,
1407 IXGE_ERROR_rx_alloc_no_physmem, 1);
1409 vlib_error_count (vm, ixge_input_node.index,
1410 IXGE_ERROR_rx_alloc_fail, 1);
1412 n_descriptors_left = l + n_allocated;
1414 n_descriptors = n_descriptors_left;
1417 /* Add buffers from end of vector going backwards. */
1418 to_add = vec_end (xm->rx_buffers_to_add) - 1;
1421 while (n_descriptors_left > 0)
1423 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1425 while (n_descriptors_left >= 4 && n_left_to_next >= 2)
1427 vlib_buffer_t *b0, *b1;
1428 u32 bi0, fi0, len0, l3_offset0, s20, s00, flags0;
1429 u32 bi1, fi1, len1, l3_offset1, s21, s01, flags1;
1430 u8 is_eop0, error0, next0;
1431 u8 is_eop1, error1, next1;
1432 ixge_descriptor_t d0, d1;
1434 vlib_prefetch_buffer_with_index (vm, to_rx[2], STORE);
1435 vlib_prefetch_buffer_with_index (vm, to_rx[3], STORE);
1437 CLIB_PREFETCH (d + 2, 32, STORE);
1439 d0.as_u32x4 = d[0].as_u32x4;
1440 d1.as_u32x4 = d[1].as_u32x4;
1442 s20 = d0.rx_from_hw.status[2];
1443 s21 = d1.rx_from_hw.status[2];
1445 s00 = d0.rx_from_hw.status[0];
1446 s01 = d1.rx_from_hw.status[0];
1449 ((s20 & s21) & IXGE_RX_DESCRIPTOR_STATUS2_IS_OWNED_BY_SOFTWARE))
1450 goto found_hw_owned_descriptor_x2;
1455 ASSERT (to_add - 1 >= xm->rx_buffers_to_add);
1465 ASSERT (VLIB_BUFFER_KNOWN_ALLOCATED ==
1466 vlib_buffer_is_known (vm, bi0));
1467 ASSERT (VLIB_BUFFER_KNOWN_ALLOCATED ==
1468 vlib_buffer_is_known (vm, bi1));
1469 ASSERT (VLIB_BUFFER_KNOWN_ALLOCATED ==
1470 vlib_buffer_is_known (vm, fi0));
1471 ASSERT (VLIB_BUFFER_KNOWN_ALLOCATED ==
1472 vlib_buffer_is_known (vm, fi1));
1475 b0 = vlib_get_buffer (vm, bi0);
1476 b1 = vlib_get_buffer (vm, bi1);
1479 * Turn this on if you run into
1480 * "bad monkey" contexts, and you want to know exactly
1481 * which nodes they've visited... See main.c...
1483 VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
1484 VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b1);
1486 CLIB_PREFETCH (b0->data, CLIB_CACHE_LINE_BYTES, LOAD);
1487 CLIB_PREFETCH (b1->data, CLIB_CACHE_LINE_BYTES, LOAD);
1489 is_eop0 = (s20 & IXGE_RX_DESCRIPTOR_STATUS2_IS_END_OF_PACKET) != 0;
1490 is_eop1 = (s21 & IXGE_RX_DESCRIPTOR_STATUS2_IS_END_OF_PACKET) != 0;
1492 ixge_rx_next_and_error_from_status_x2 (xd, s00, s20, s01, s21,
1493 &next0, &error0, &flags0,
1494 &next1, &error1, &flags1);
1496 next0 = is_sop ? next0 : next_index_sop;
1497 next1 = is_eop0 ? next1 : next0;
1498 next_index_sop = next1;
1500 b0->flags |= flags0 | (!is_eop0 << VLIB_BUFFER_LOG2_NEXT_PRESENT);
1501 b1->flags |= flags1 | (!is_eop1 << VLIB_BUFFER_LOG2_NEXT_PRESENT);
1503 vnet_buffer (b0)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index;
1504 vnet_buffer (b1)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index;
1505 vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1506 vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1508 b0->error = node->errors[error0];
1509 b1->error = node->errors[error1];
1511 len0 = d0.rx_from_hw.n_packet_bytes_this_descriptor;
1512 len1 = d1.rx_from_hw.n_packet_bytes_this_descriptor;
1513 n_bytes += len0 + len1;
1514 n_packets += is_eop0 + is_eop1;
1516 /* Give new buffers to hardware. */
1517 d0.rx_to_hw.tail_address =
1518 vlib_get_buffer_data_physical_address (vm, fi0);
1519 d1.rx_to_hw.tail_address =
1520 vlib_get_buffer_data_physical_address (vm, fi1);
1521 d0.rx_to_hw.head_address = d[0].rx_to_hw.tail_address;
1522 d1.rx_to_hw.head_address = d[1].rx_to_hw.tail_address;
1523 d[0].as_u32x4 = d0.as_u32x4;
1524 d[1].as_u32x4 = d1.as_u32x4;
1527 n_descriptors_left -= 2;
1529 /* Point to either l2 or l3 header depending on next. */
1530 l3_offset0 = (is_sop && (next0 != IXGE_RX_NEXT_ETHERNET_INPUT))
1531 ? IXGE_RX_DESCRIPTOR_STATUS0_L3_OFFSET (s00) : 0;
1532 l3_offset1 = (is_eop0 && (next1 != IXGE_RX_NEXT_ETHERNET_INPUT))
1533 ? IXGE_RX_DESCRIPTOR_STATUS0_L3_OFFSET (s01) : 0;
1535 b0->current_length = len0 - l3_offset0;
1536 b1->current_length = len1 - l3_offset1;
1537 b0->current_data = l3_offset0;
1538 b1->current_data = l3_offset1;
1540 b_last->next_buffer = is_sop ? ~0 : bi0;
1541 b0->next_buffer = is_eop0 ? ~0 : bi1;
1547 u32 bi_sop0 = is_sop ? bi0 : bi_sop;
1548 u32 bi_sop1 = is_eop0 ? bi1 : bi_sop0;
1552 u8 *msg = vlib_validate_buffer (vm, bi_sop0,
1553 /* follow_buffer_next */ 1);
1558 u8 *msg = vlib_validate_buffer (vm, bi_sop1,
1559 /* follow_buffer_next */ 1);
1563 if (0) /* "Dave" version */
1565 u32 bi_sop0 = is_sop ? bi0 : bi_sop;
1566 u32 bi_sop1 = is_eop0 ? bi1 : bi_sop0;
1570 to_next[0] = bi_sop0;
1574 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1575 to_next, n_left_to_next,
1580 to_next[0] = bi_sop1;
1584 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1585 to_next, n_left_to_next,
1591 if (1) /* "Eliot" version */
1593 /* Speculatively enqueue to cached next. */
1594 u8 saved_is_sop = is_sop;
1595 u32 bi_sop_save = bi_sop;
1597 bi_sop = saved_is_sop ? bi0 : bi_sop;
1598 to_next[0] = bi_sop;
1600 n_left_to_next -= is_eop0;
1602 bi_sop = is_eop0 ? bi1 : bi_sop;
1603 to_next[0] = bi_sop;
1605 n_left_to_next -= is_eop1;
1610 (!(next0 == next_index && next1 == next_index)))
1612 /* Undo speculation. */
1613 to_next -= is_eop0 + is_eop1;
1614 n_left_to_next += is_eop0 + is_eop1;
1616 /* Re-do both descriptors being careful about where we enqueue. */
1617 bi_sop = saved_is_sop ? bi0 : bi_sop_save;
1620 if (next0 != next_index)
1621 vlib_set_next_frame_buffer (vm, node, next0, bi_sop);
1624 to_next[0] = bi_sop;
1626 n_left_to_next -= 1;
1630 bi_sop = is_eop0 ? bi1 : bi_sop;
1633 if (next1 != next_index)
1634 vlib_set_next_frame_buffer (vm, node, next1, bi_sop);
1637 to_next[0] = bi_sop;
1639 n_left_to_next -= 1;
1643 /* Switch cached next index when next for both packets is the same. */
1644 if (is_eop0 && is_eop1 && next0 == next1)
1646 vlib_put_next_frame (vm, node, next_index,
1649 vlib_get_next_frame (vm, node, next_index,
1650 to_next, n_left_to_next);
1656 /* Bail out of dual loop and proceed with single loop. */
1657 found_hw_owned_descriptor_x2:
1659 while (n_descriptors_left > 0 && n_left_to_next > 0)
1662 u32 bi0, fi0, len0, l3_offset0, s20, s00, flags0;
1663 u8 is_eop0, error0, next0;
1664 ixge_descriptor_t d0;
1666 d0.as_u32x4 = d[0].as_u32x4;
1668 s20 = d0.rx_from_hw.status[2];
1669 s00 = d0.rx_from_hw.status[0];
1671 if (!(s20 & IXGE_RX_DESCRIPTOR_STATUS2_IS_OWNED_BY_SOFTWARE))
1672 goto found_hw_owned_descriptor_x1;
1675 ASSERT (to_add >= xm->rx_buffers_to_add);
1683 ASSERT (VLIB_BUFFER_KNOWN_ALLOCATED ==
1684 vlib_buffer_is_known (vm, bi0));
1685 ASSERT (VLIB_BUFFER_KNOWN_ALLOCATED ==
1686 vlib_buffer_is_known (vm, fi0));
1689 b0 = vlib_get_buffer (vm, bi0);
1692 * Turn this on if you run into
1693 * "bad monkey" contexts, and you want to know exactly
1694 * which nodes they've visited...
1696 VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
1698 is_eop0 = (s20 & IXGE_RX_DESCRIPTOR_STATUS2_IS_END_OF_PACKET) != 0;
1699 ixge_rx_next_and_error_from_status_x1
1700 (xd, s00, s20, &next0, &error0, &flags0);
1702 next0 = is_sop ? next0 : next_index_sop;
1703 next_index_sop = next0;
1705 b0->flags |= flags0 | (!is_eop0 << VLIB_BUFFER_LOG2_NEXT_PRESENT);
1707 vnet_buffer (b0)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index;
1708 vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1710 b0->error = node->errors[error0];
1712 len0 = d0.rx_from_hw.n_packet_bytes_this_descriptor;
1714 n_packets += is_eop0;
1716 /* Give new buffer to hardware. */
1717 d0.rx_to_hw.tail_address =
1718 vlib_get_buffer_data_physical_address (vm, fi0);
1719 d0.rx_to_hw.head_address = d0.rx_to_hw.tail_address;
1720 d[0].as_u32x4 = d0.as_u32x4;
1723 n_descriptors_left -= 1;
1725 /* Point to either l2 or l3 header depending on next. */
1726 l3_offset0 = (is_sop && (next0 != IXGE_RX_NEXT_ETHERNET_INPUT))
1727 ? IXGE_RX_DESCRIPTOR_STATUS0_L3_OFFSET (s00) : 0;
1728 b0->current_length = len0 - l3_offset0;
1729 b0->current_data = l3_offset0;
1731 b_last->next_buffer = is_sop ? ~0 : bi0;
1735 bi_sop = is_sop ? bi0 : bi_sop;
1737 if (CLIB_DEBUG > 0 && is_eop0)
1740 vlib_validate_buffer (vm, bi_sop, /* follow_buffer_next */ 1);
1744 if (0) /* "Dave" version */
1748 to_next[0] = bi_sop;
1752 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1753 to_next, n_left_to_next,
1757 if (1) /* "Eliot" version */
1759 if (PREDICT_TRUE (next0 == next_index))
1761 to_next[0] = bi_sop;
1763 n_left_to_next -= is_eop0;
1767 if (next0 != next_index && is_eop0)
1768 vlib_set_next_frame_buffer (vm, node, next0, bi_sop);
1770 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1772 vlib_get_next_frame (vm, node, next_index,
1773 to_next, n_left_to_next);
1778 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1781 found_hw_owned_descriptor_x1:
1782 if (n_descriptors_left > 0)
1783 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1785 _vec_len (xm->rx_buffers_to_add) = (to_add + 1) - xm->rx_buffers_to_add;
1788 u32 n_done = n_descriptors - n_descriptors_left;
1790 if (n_trace > 0 && n_done > 0)
1792 u32 n = clib_min (n_trace, n_done);
1793 ixge_rx_trace (xm, xd, dq,
1796 &dq->descriptors[start_descriptor_index], n);
1797 vlib_set_trace_count (vm, node, n_trace - n);
1801 _vec_len (d_trace_save) = 0;
1802 _vec_len (d_trace_buffers) = 0;
1805 /* Don't keep a reference to b_last if we don't have to.
1806 Otherwise we can over-write a next_buffer pointer after already haven
1807 enqueued a packet. */
1810 b_last->next_buffer = ~0;
1814 dq->rx.n_descriptors_done_this_call = n_done;
1815 dq->rx.n_descriptors_done_total += n_done;
1816 dq->rx.is_start_of_packet = is_sop;
1817 dq->rx.saved_start_of_packet_buffer_index = bi_sop;
1818 dq->rx.saved_last_buffer_index = bi_last;
1819 dq->rx.saved_start_of_packet_next_index = next_index_sop;
1820 dq->rx.next_index = next_index;
1821 dq->rx.n_bytes += n_bytes;
1828 ixge_rx_queue (ixge_main_t * xm,
1830 vlib_node_runtime_t * node, u32 queue_index)
1832 ixge_dma_queue_t *dq =
1833 vec_elt_at_index (xd->dma_queues[VLIB_RX], queue_index);
1834 ixge_dma_regs_t *dr = get_dma_regs (xd, VLIB_RX, dq->queue_index);
1835 uword n_packets = 0;
1836 u32 hw_head_index, sw_head_index;
1838 /* One time initialization. */
1842 dq->rx.is_start_of_packet = 1;
1843 dq->rx.saved_start_of_packet_buffer_index = ~0;
1844 dq->rx.saved_last_buffer_index = ~0;
1847 dq->rx.next_index = node->cached_next_index;
1849 dq->rx.n_descriptors_done_total = 0;
1850 dq->rx.n_descriptors_done_this_call = 0;
1853 /* Fetch head from hardware and compare to where we think we are. */
1854 hw_head_index = dr->head_index;
1855 sw_head_index = dq->head_index;
1857 if (hw_head_index == sw_head_index)
1860 if (hw_head_index < sw_head_index)
1862 u32 n_tried = dq->n_descriptors - sw_head_index;
1863 n_packets += ixge_rx_queue_no_wrap (xm, xd, dq, sw_head_index, n_tried);
1865 ixge_ring_add (dq, sw_head_index,
1866 dq->rx.n_descriptors_done_this_call);
1868 if (dq->rx.n_descriptors_done_this_call != n_tried)
1871 if (hw_head_index >= sw_head_index)
1873 u32 n_tried = hw_head_index - sw_head_index;
1874 n_packets += ixge_rx_queue_no_wrap (xm, xd, dq, sw_head_index, n_tried);
1876 ixge_ring_add (dq, sw_head_index,
1877 dq->rx.n_descriptors_done_this_call);
1881 dq->head_index = sw_head_index;
1883 ixge_ring_add (dq, dq->tail_index, dq->rx.n_descriptors_done_total);
1885 /* Give tail back to hardware. */
1886 CLIB_MEMORY_BARRIER ();
1888 dr->tail_index = dq->tail_index;
1890 vlib_increment_combined_counter (vnet_main.
1891 interface_main.combined_sw_if_counters +
1892 VNET_INTERFACE_COUNTER_RX,
1893 0 /* thread_index */ ,
1894 xd->vlib_sw_if_index, n_packets,
1901 ixge_interrupt (ixge_main_t * xm, ixge_device_t * xd, u32 i)
1903 vlib_main_t *vm = xm->vlib_main;
1904 ixge_regs_t *r = xd->regs;
1908 ELOG_TYPE_DECLARE (e) =
1910 .function = (char *) __FUNCTION__,.format =
1911 "ixge %d, %s",.format_args = "i1t1",.n_enum_strings =
1918 "link status change",
1919 "linksec key exchange",
1920 "manageability event",
1926 "ecc", "descriptor handler error", "tcp timer", "other",},};
1932 ed = ELOG_DATA (&vm->elog_main, e);
1933 ed->instance = xd->device_index;
1938 u32 v = r->xge_mac.link_status;
1939 uword is_up = (v & (1 << 30)) != 0;
1941 ELOG_TYPE_DECLARE (e) =
1943 .function = (char *) __FUNCTION__,.format =
1944 "ixge %d, link status change 0x%x",.format_args = "i4i4",};
1947 u32 instance, link_status;
1949 ed = ELOG_DATA (&vm->elog_main, e);
1950 ed->instance = xd->device_index;
1951 ed->link_status = v;
1952 xd->link_status_at_last_link_change = v;
1954 vlib_process_signal_event (vm, ixge_process_node.index,
1956 ((is_up << 31) | xd->vlib_hw_if_index));
1961 clean_block (u32 * b, u32 * t, u32 n_left)
1967 u32 bi0, bi1, bi2, bi3;
2004 ixge_tx_queue (ixge_main_t * xm, ixge_device_t * xd, u32 queue_index)
2006 vlib_main_t *vm = xm->vlib_main;
2007 ixge_dma_queue_t *dq =
2008 vec_elt_at_index (xd->dma_queues[VLIB_TX], queue_index);
2009 u32 n_clean, *b, *t, *t0;
2010 i32 n_hw_owned_descriptors;
2011 i32 first_to_clean, last_to_clean;
2014 /* Handle case where head write back pointer update
2015 * arrives after the interrupt during high PCI bus loads.
2017 while ((dq->head_index == dq->tx.head_index_write_back[0]) &&
2018 dq->tx.n_buffers_on_ring && (dq->head_index != dq->tail_index))
2021 if (IXGE_HWBP_RACE_ELOG && (hwbp_race == 1))
2023 ELOG_TYPE_DECLARE (e) =
2025 .function = (char *) __FUNCTION__,.format =
2026 "ixge %d tx head index race: head %4d, tail %4d, buffs %4d",.format_args
2030 u32 instance, head_index, tail_index, n_buffers_on_ring;
2032 ed = ELOG_DATA (&vm->elog_main, e);
2033 ed->instance = xd->device_index;
2034 ed->head_index = dq->head_index;
2035 ed->tail_index = dq->tail_index;
2036 ed->n_buffers_on_ring = dq->tx.n_buffers_on_ring;
2040 dq->head_index = dq->tx.head_index_write_back[0];
2041 n_hw_owned_descriptors = ixge_ring_sub (dq, dq->head_index, dq->tail_index);
2042 ASSERT (dq->tx.n_buffers_on_ring >= n_hw_owned_descriptors);
2043 n_clean = dq->tx.n_buffers_on_ring - n_hw_owned_descriptors;
2045 if (IXGE_HWBP_RACE_ELOG && hwbp_race)
2047 ELOG_TYPE_DECLARE (e) =
2049 .function = (char *) __FUNCTION__,.format =
2050 "ixge %d tx head index race: head %4d, hw_owned %4d, n_clean %4d, retries %d",.format_args
2054 u32 instance, head_index, n_hw_owned_descriptors, n_clean, retries;
2056 ed = ELOG_DATA (&vm->elog_main, e);
2057 ed->instance = xd->device_index;
2058 ed->head_index = dq->head_index;
2059 ed->n_hw_owned_descriptors = n_hw_owned_descriptors;
2060 ed->n_clean = n_clean;
2061 ed->retries = hwbp_race;
2065 * This function used to wait until hardware owned zero descriptors.
2066 * At high PPS rates, that doesn't happen until the TX ring is
2067 * completely full of descriptors which need to be cleaned up.
2068 * That, in turn, causes TX ring-full drops and/or long RX service
2074 /* Clean the n_clean descriptors prior to the reported hardware head */
2075 last_to_clean = dq->head_index - 1;
2076 last_to_clean = (last_to_clean < 0) ? last_to_clean + dq->n_descriptors :
2079 first_to_clean = (last_to_clean) - (n_clean - 1);
2080 first_to_clean = (first_to_clean < 0) ? first_to_clean + dq->n_descriptors :
2083 vec_resize (xm->tx_buffers_pending_free, dq->n_descriptors - 1);
2084 t0 = t = xm->tx_buffers_pending_free;
2085 b = dq->descriptor_buffer_indices + first_to_clean;
2087 /* Wrap case: clean from first to end, then start to last */
2088 if (first_to_clean > last_to_clean)
2090 t += clean_block (b, t, (dq->n_descriptors - 1) - first_to_clean);
2092 b = dq->descriptor_buffer_indices;
2095 /* Typical case: clean from first to last */
2096 if (first_to_clean <= last_to_clean)
2097 t += clean_block (b, t, (last_to_clean - first_to_clean) + 1);
2102 vlib_buffer_free_no_next (vm, t0, n);
2103 ASSERT (dq->tx.n_buffers_on_ring >= n);
2104 dq->tx.n_buffers_on_ring -= n;
2105 _vec_len (xm->tx_buffers_pending_free) = 0;
2109 /* RX queue interrupts 0 thru 7; TX 8 thru 15. */
2111 ixge_interrupt_is_rx_queue (uword i)
2117 ixge_interrupt_is_tx_queue (uword i)
2119 return i >= 8 && i < 16;
2123 ixge_tx_queue_to_interrupt (uword i)
2129 ixge_rx_queue_to_interrupt (uword i)
2135 ixge_interrupt_rx_queue (uword i)
2137 ASSERT (ixge_interrupt_is_rx_queue (i));
2142 ixge_interrupt_tx_queue (uword i)
2144 ASSERT (ixge_interrupt_is_tx_queue (i));
2149 ixge_device_input (ixge_main_t * xm,
2150 ixge_device_t * xd, vlib_node_runtime_t * node)
2152 ixge_regs_t *r = xd->regs;
2154 uword n_rx_packets = 0;
2156 s = r->interrupt.status_write_1_to_set;
2158 r->interrupt.status_write_1_to_clear = s;
2161 foreach_set_bit (i, s, ({
2162 if (ixge_interrupt_is_rx_queue (i))
2163 n_rx_packets += ixge_rx_queue (xm, xd, node, ixge_interrupt_rx_queue (i));
2165 else if (ixge_interrupt_is_tx_queue (i))
2166 ixge_tx_queue (xm, xd, ixge_interrupt_tx_queue (i));
2169 ixge_interrupt (xm, xd, i);
2173 return n_rx_packets;
2177 ixge_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f)
2179 ixge_main_t *xm = &ixge_main;
2181 uword n_rx_packets = 0;
2183 if (node->state == VLIB_NODE_STATE_INTERRUPT)
2187 /* Loop over devices with interrupts. */
2189 foreach_set_bit (i, node->runtime_data[0], ({
2190 xd = vec_elt_at_index (xm->devices, i);
2191 n_rx_packets += ixge_device_input (xm, xd, node);
2193 /* Re-enable interrupts since we're going to stay in interrupt mode. */
2194 if (! (node->flags & VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE))
2195 xd->regs->interrupt.enable_write_1_to_set = ~0;
2199 /* Clear mask of devices with pending interrupts. */
2200 node->runtime_data[0] = 0;
2204 /* Poll all devices for input/interrupts. */
2205 vec_foreach (xd, xm->devices)
2207 n_rx_packets += ixge_device_input (xm, xd, node);
2209 /* Re-enable interrupts when switching out of polling mode. */
2211 VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE)
2212 xd->regs->interrupt.enable_write_1_to_set = ~0;
2216 return n_rx_packets;
2219 static char *ixge_error_strings[] = {
2226 VLIB_REGISTER_NODE (ixge_input_node, static) = {
2227 .function = ixge_input,
2228 .type = VLIB_NODE_TYPE_INPUT,
2229 .name = "ixge-input",
2231 /* Will be enabled if/when hardware is detected. */
2232 .state = VLIB_NODE_STATE_DISABLED,
2234 .format_buffer = format_ethernet_header_with_length,
2235 .format_trace = format_ixge_rx_dma_trace,
2237 .n_errors = IXGE_N_ERROR,
2238 .error_strings = ixge_error_strings,
2240 .n_next_nodes = IXGE_RX_N_NEXT,
2242 [IXGE_RX_NEXT_DROP] = "error-drop",
2243 [IXGE_RX_NEXT_ETHERNET_INPUT] = "ethernet-input",
2244 [IXGE_RX_NEXT_IP4_INPUT] = "ip4-input",
2245 [IXGE_RX_NEXT_IP6_INPUT] = "ip6-input",
2249 VLIB_NODE_FUNCTION_MULTIARCH_CLONE (ixge_input)
2250 CLIB_MULTIARCH_SELECT_FN (ixge_input)
2254 format_ixge_device_name (u8 * s, va_list * args)
2256 u32 i = va_arg (*args, u32);
2257 ixge_main_t *xm = &ixge_main;
2258 ixge_device_t *xd = vec_elt_at_index (xm->devices, i);
2259 vlib_pci_addr_t *addr = vlib_pci_get_addr (xd->pci_dev_handle);
2260 return format (s, "TenGigabitEthernet%x/%x/%x/%x",
2261 addr->domain, addr->bus, addr->slot, addr->function);
2264 #define IXGE_COUNTER_IS_64_BIT (1 << 0)
2265 #define IXGE_COUNTER_NOT_CLEAR_ON_READ (1 << 1)
2267 static u8 ixge_counter_flags[] = {
2269 #define _64(a,f) IXGE_COUNTER_IS_64_BIT,
2270 foreach_ixge_counter
2276 ixge_update_counters (ixge_device_t * xd)
2278 /* Byte offset for counter registers. */
2279 static u32 reg_offsets[] = {
2280 #define _(a,f) (a) / sizeof (u32),
2281 #define _64(a,f) _(a,f)
2282 foreach_ixge_counter
2286 volatile u32 *r = (volatile u32 *) xd->regs;
2289 for (i = 0; i < ARRAY_LEN (xd->counters); i++)
2291 u32 o = reg_offsets[i];
2292 xd->counters[i] += r[o];
2293 if (ixge_counter_flags[i] & IXGE_COUNTER_NOT_CLEAR_ON_READ)
2295 if (ixge_counter_flags[i] & IXGE_COUNTER_IS_64_BIT)
2296 xd->counters[i] += (u64) r[o + 1] << (u64) 32;
2301 format_ixge_device_id (u8 * s, va_list * args)
2303 u32 device_id = va_arg (*args, u32);
2307 #define _(f,n) case n: t = #f; break;
2308 foreach_ixge_pci_device_id;
2315 s = format (s, "unknown 0x%x", device_id);
2317 s = format (s, "%s", t);
2322 format_ixge_link_status (u8 * s, va_list * args)
2324 ixge_device_t *xd = va_arg (*args, ixge_device_t *);
2325 u32 v = xd->link_status_at_last_link_change;
2327 s = format (s, "%s", (v & (1 << 30)) ? "up" : "down");
2331 "1g", "10g parallel", "10g serial", "autoneg",
2334 "unknown", "100m", "1g", "10g",
2336 s = format (s, ", mode %s, speed %s",
2337 modes[(v >> 26) & 3], speeds[(v >> 28) & 3]);
2344 format_ixge_device (u8 * s, va_list * args)
2346 u32 dev_instance = va_arg (*args, u32);
2347 CLIB_UNUSED (int verbose) = va_arg (*args, int);
2348 ixge_main_t *xm = &ixge_main;
2349 ixge_device_t *xd = vec_elt_at_index (xm->devices, dev_instance);
2350 ixge_phy_t *phy = xd->phys + xd->phy_index;
2351 u32 indent = format_get_indent (s);
2353 ixge_update_counters (xd);
2354 xd->link_status_at_last_link_change = xd->regs->xge_mac.link_status;
2356 s = format (s, "Intel 8259X: id %U\n%Ulink %U",
2357 format_ixge_device_id, xd->device_id,
2358 format_white_space, indent + 2, format_ixge_link_status, xd);
2362 vlib_pci_addr_t *addr = vlib_pci_get_addr (xd->pci_dev_handle);
2363 vlib_pci_device_info_t *d = vlib_pci_get_device_info (addr, 0);
2366 s = format (s, "\n%UPCIe %U", format_white_space, indent + 2,
2367 format_vlib_pci_link_speed, d);
2370 s = format (s, "\n%U", format_white_space, indent + 2);
2371 if (phy->mdio_address != ~0)
2372 s = format (s, "PHY address %d, id 0x%x", phy->mdio_address, phy->id);
2373 else if (xd->sfp_eeprom.id == SFP_ID_sfp)
2374 s = format (s, "SFP %U", format_sfp_eeprom, &xd->sfp_eeprom);
2376 s = format (s, "PHY not found");
2380 ixge_dma_queue_t *dq = vec_elt_at_index (xd->dma_queues[VLIB_RX], 0);
2381 ixge_dma_regs_t *dr = get_dma_regs (xd, VLIB_RX, 0);
2382 u32 hw_head_index = dr->head_index;
2383 u32 sw_head_index = dq->head_index;
2386 nitems = ixge_ring_sub (dq, hw_head_index, sw_head_index);
2387 s = format (s, "\n%U%d unprocessed, %d total buffers on rx queue 0 ring",
2388 format_white_space, indent + 2, nitems, dq->n_descriptors);
2390 s = format (s, "\n%U%d buffers in driver rx cache",
2391 format_white_space, indent + 2,
2392 vec_len (xm->rx_buffers_to_add));
2394 s = format (s, "\n%U%d buffers on tx queue 0 ring",
2395 format_white_space, indent + 2,
2396 xd->dma_queues[VLIB_TX][0].tx.n_buffers_on_ring);
2401 static char *names[] = {
2403 #define _64(a,f) _(a,f)
2404 foreach_ixge_counter
2409 for (i = 0; i < ARRAY_LEN (names); i++)
2411 v = xd->counters[i] - xd->counters_last_clear[i];
2413 s = format (s, "\n%U%-40U%16Ld",
2414 format_white_space, indent + 2,
2415 format_c_identifier, names[i], v);
2423 ixge_clear_hw_interface_counters (u32 instance)
2425 ixge_main_t *xm = &ixge_main;
2426 ixge_device_t *xd = vec_elt_at_index (xm->devices, instance);
2427 ixge_update_counters (xd);
2428 memcpy (xd->counters_last_clear, xd->counters, sizeof (xd->counters));
2432 * Dynamically redirect all pkts from a specific interface
2433 * to the specified node
2436 ixge_set_interface_next_node (vnet_main_t * vnm, u32 hw_if_index,
2439 ixge_main_t *xm = &ixge_main;
2440 vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
2441 ixge_device_t *xd = vec_elt_at_index (xm->devices, hw->dev_instance);
2443 /* Shut off redirection */
2444 if (node_index == ~0)
2446 xd->per_interface_next_index = node_index;
2450 xd->per_interface_next_index =
2451 vlib_node_add_next (xm->vlib_main, ixge_input_node.index, node_index);
2456 VNET_DEVICE_CLASS (ixge_device_class) = {
2458 .tx_function = ixge_interface_tx,
2459 .format_device_name = format_ixge_device_name,
2460 .format_device = format_ixge_device,
2461 .format_tx_trace = format_ixge_tx_dma_trace,
2462 .clear_counters = ixge_clear_hw_interface_counters,
2463 .admin_up_down_function = ixge_interface_admin_up_down,
2464 .rx_redirect_to_node = ixge_set_interface_next_node,
2468 #define IXGE_N_BYTES_IN_RX_BUFFER (2048) // DAW-HACK: Set Rx buffer size so all packets < ETH_MTU_SIZE fit in the buffer (i.e. sop & eop for all descriptors).
2470 static clib_error_t *
2471 ixge_dma_init (ixge_device_t * xd, vlib_rx_or_tx_t rt, u32 queue_index)
2473 ixge_main_t *xm = &ixge_main;
2474 vlib_main_t *vm = xm->vlib_main;
2475 ixge_dma_queue_t *dq;
2476 clib_error_t *error = 0;
2478 vec_validate (xd->dma_queues[rt], queue_index);
2479 dq = vec_elt_at_index (xd->dma_queues[rt], queue_index);
2481 if (!xm->n_descriptors_per_cache_line)
2482 xm->n_descriptors_per_cache_line =
2483 CLIB_CACHE_LINE_BYTES / sizeof (dq->descriptors[0]);
2485 if (!xm->n_bytes_in_rx_buffer)
2486 xm->n_bytes_in_rx_buffer = IXGE_N_BYTES_IN_RX_BUFFER;
2487 xm->n_bytes_in_rx_buffer = round_pow2 (xm->n_bytes_in_rx_buffer, 1024);
2489 if (!xm->n_descriptors[rt])
2490 xm->n_descriptors[rt] = 4 * VLIB_FRAME_SIZE;
2492 dq->queue_index = queue_index;
2494 round_pow2 (xm->n_descriptors[rt], xm->n_descriptors_per_cache_line);
2495 dq->head_index = dq->tail_index = 0;
2498 vlib_physmem_alloc_aligned (vm, xm->physmem_region, &error,
2500 sizeof (dq->descriptors[0]),
2501 128 /* per chip spec */ );
2505 memset (dq->descriptors, 0,
2506 dq->n_descriptors * sizeof (dq->descriptors[0]));
2507 vec_resize (dq->descriptor_buffer_indices, dq->n_descriptors);
2513 n_alloc = vlib_buffer_alloc (vm, dq->descriptor_buffer_indices,
2514 vec_len (dq->descriptor_buffer_indices));
2515 ASSERT (n_alloc == vec_len (dq->descriptor_buffer_indices));
2516 for (i = 0; i < n_alloc; i++)
2518 dq->descriptors[i].rx_to_hw.tail_address =
2519 vlib_get_buffer_data_physical_address (vm,
2520 dq->descriptor_buffer_indices
2528 dq->tx.head_index_write_back = vlib_physmem_alloc (vm,
2531 CLIB_CACHE_LINE_BYTES);
2533 for (i = 0; i < dq->n_descriptors; i++)
2534 dq->descriptors[i].tx = xm->tx_descriptor_template;
2536 vec_validate (xm->tx_buffers_pending_free, dq->n_descriptors - 1);
2540 ixge_dma_regs_t *dr = get_dma_regs (xd, rt, queue_index);
2544 vlib_physmem_virtual_to_physical (vm, xm->physmem_region,
2546 dr->descriptor_address[0] = a & 0xFFFFFFFF;
2547 dr->descriptor_address[1] = a >> (u64) 32;
2548 dr->n_descriptor_bytes = dq->n_descriptors * sizeof (dq->descriptors[0]);
2549 dq->head_index = dq->tail_index = 0;
2553 ASSERT ((xm->n_bytes_in_rx_buffer / 1024) < 32);
2554 dr->rx_split_control =
2555 ( /* buffer size */ ((xm->n_bytes_in_rx_buffer / 1024) << 0)
2556 | ( /* lo free descriptor threshold (units of 64 descriptors) */
2557 (1 << 22)) | ( /* descriptor type: advanced one buffer */
2558 (1 << 25)) | ( /* drop if no descriptors available */
2561 /* Give hardware all but last 16 cache lines' worth of descriptors. */
2562 dq->tail_index = dq->n_descriptors -
2563 16 * xm->n_descriptors_per_cache_line;
2567 /* Make sure its initialized before hardware can get to it. */
2568 dq->tx.head_index_write_back[0] = dq->head_index;
2570 a = vlib_physmem_virtual_to_physical (vm, xm->physmem_region,
2571 dq->tx.head_index_write_back);
2572 dr->tx.head_index_write_back_address[0] = /* enable bit */ 1 | a;
2573 dr->tx.head_index_write_back_address[1] = (u64) a >> (u64) 32;
2576 /* DMA on 82599 does not work with [13] rx data write relaxed ordering
2577 and [12] undocumented set. */
2579 dr->dca_control &= ~((1 << 13) | (1 << 12));
2581 CLIB_MEMORY_BARRIER ();
2585 xd->regs->tx_dma_control |= (1 << 0);
2586 dr->control |= ((32 << 0) /* prefetch threshold */
2587 | (64 << 8) /* host threshold */
2588 | (0 << 16) /* writeback threshold */ );
2591 /* Enable this queue and wait for hardware to initialize
2592 before adding to tail. */
2595 dr->control |= 1 << 25;
2596 while (!(dr->control & (1 << 25)))
2600 /* Set head/tail indices and enable DMA. */
2601 dr->head_index = dq->head_index;
2602 dr->tail_index = dq->tail_index;
2609 ixge_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hw, u32 flags)
2614 ixge_main_t *xm = &ixge_main;
2616 xd = vec_elt_at_index (xm->devices, hw->dev_instance);
2619 old = r->filter_control;
2621 if (flags & ETHERNET_INTERFACE_FLAG_ACCEPT_ALL)
2622 r->filter_control = old | (1 << 9) /* unicast promiscuous */ ;
2624 r->filter_control = old & ~(1 << 9);
2630 ixge_device_init (ixge_main_t * xm)
2632 vnet_main_t *vnm = vnet_get_main ();
2635 /* Reset chip(s). */
2636 vec_foreach (xd, xm->devices)
2638 ixge_regs_t *r = xd->regs;
2639 const u32 reset_bit = (1 << 26) | (1 << 3);
2641 r->control |= reset_bit;
2643 /* No need to suspend. Timed to take ~1e-6 secs */
2644 while (r->control & reset_bit)
2647 /* Software loaded. */
2648 r->extended_control |= (1 << 28);
2652 /* Register ethernet interface. */
2656 clib_error_t *error;
2658 addr32[0] = r->rx_ethernet_address0[0][0];
2659 addr32[1] = r->rx_ethernet_address0[0][1];
2660 for (i = 0; i < 6; i++)
2661 addr8[i] = addr32[i / 4] >> ((i % 4) * 8);
2663 error = ethernet_register_interface
2664 (vnm, ixge_device_class.index, xd->device_index,
2665 /* ethernet address */ addr8,
2666 &xd->vlib_hw_if_index, ixge_flag_change);
2668 clib_error_report (error);
2672 vnet_sw_interface_t *sw =
2673 vnet_get_hw_sw_interface (vnm, xd->vlib_hw_if_index);
2674 xd->vlib_sw_if_index = sw->sw_if_index;
2677 ixge_dma_init (xd, VLIB_RX, /* queue_index */ 0);
2679 xm->n_descriptors[VLIB_TX] = 20 * VLIB_FRAME_SIZE;
2681 ixge_dma_init (xd, VLIB_TX, /* queue_index */ 0);
2683 /* RX/TX queue 0 gets mapped to interrupt bits 0 & 8. */
2684 r->interrupt.queue_mapping[0] = (( /* valid bit */ (1 << 7) |
2685 ixge_rx_queue_to_interrupt (0)) << 0);
2687 r->interrupt.queue_mapping[0] |= (( /* valid bit */ (1 << 7) |
2688 ixge_tx_queue_to_interrupt (0)) << 8);
2690 /* No use in getting too many interrupts.
2691 Limit them to one every 3/4 ring size at line rate
2693 No need for this since kernel/vlib main loop provides adequate interrupt
2697 f64 line_rate_max_pps =
2698 10e9 / (8 * (64 + /* interframe padding */ 20));
2699 ixge_throttle_queue_interrupt (r, 0,
2700 .75 * xm->n_descriptors[VLIB_RX] /
2704 /* Accept all multicast and broadcast packets. Should really add them
2705 to the dst_ethernet_address register array. */
2706 r->filter_control |= (1 << 10) | (1 << 8);
2708 /* Enable frames up to size in mac frame size register. */
2709 r->xge_mac.control |= 1 << 2;
2710 r->xge_mac.rx_max_frame_size = (9216 + 14) << 16;
2712 /* Enable all interrupts. */
2713 if (!IXGE_ALWAYS_POLL)
2714 r->interrupt.enable_write_1_to_set = ~0;
2719 ixge_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
2721 vnet_main_t *vnm = vnet_get_main ();
2722 ixge_main_t *xm = &ixge_main;
2724 uword event_type, *event_data = 0;
2725 f64 timeout, link_debounce_deadline;
2727 ixge_device_init (xm);
2729 /* Clear all counters. */
2730 vec_foreach (xd, xm->devices)
2732 ixge_update_counters (xd);
2733 memset (xd->counters, 0, sizeof (xd->counters));
2737 link_debounce_deadline = 1e70;
2741 /* 36 bit stat counters could overflow in ~50 secs.
2742 We poll every 30 secs to be conservative. */
2743 vlib_process_wait_for_event_or_clock (vm, timeout);
2745 event_type = vlib_process_get_events (vm, &event_data);
2749 case EVENT_SET_FLAGS:
2751 link_debounce_deadline = vlib_time_now (vm) + 1e-3;
2756 /* No events found: timer expired. */
2757 if (vlib_time_now (vm) > link_debounce_deadline)
2759 vec_foreach (xd, xm->devices)
2761 ixge_regs_t *r = xd->regs;
2762 u32 v = r->xge_mac.link_status;
2763 uword is_up = (v & (1 << 30)) != 0;
2765 vnet_hw_interface_set_flags
2766 (vnm, xd->vlib_hw_if_index,
2767 is_up ? VNET_HW_INTERFACE_FLAG_LINK_UP : 0);
2769 link_debounce_deadline = 1e70;
2779 _vec_len (event_data) = 0;
2781 /* Query stats every 30 secs. */
2783 f64 now = vlib_time_now (vm);
2784 if (now - xm->time_last_stats_update > 30)
2786 xm->time_last_stats_update = now;
2787 vec_foreach (xd, xm->devices) ixge_update_counters (xd);
2795 static vlib_node_registration_t ixge_process_node = {
2796 .function = ixge_process,
2797 .type = VLIB_NODE_TYPE_PROCESS,
2798 .name = "ixge-process",
2802 ixge_init (vlib_main_t * vm)
2804 ixge_main_t *xm = &ixge_main;
2805 clib_error_t *error;
2808 memset (&xm->tx_descriptor_template, 0,
2809 sizeof (xm->tx_descriptor_template));
2810 memset (&xm->tx_descriptor_template_mask, 0,
2811 sizeof (xm->tx_descriptor_template_mask));
2812 xm->tx_descriptor_template.status0 =
2813 (IXGE_TX_DESCRIPTOR_STATUS0_ADVANCED |
2814 IXGE_TX_DESCRIPTOR_STATUS0_IS_ADVANCED |
2815 IXGE_TX_DESCRIPTOR_STATUS0_INSERT_FCS);
2816 xm->tx_descriptor_template_mask.status0 = 0xffff;
2817 xm->tx_descriptor_template_mask.status1 = 0x00003fff;
2819 xm->tx_descriptor_template_mask.status0 &=
2820 ~(IXGE_TX_DESCRIPTOR_STATUS0_IS_END_OF_PACKET
2821 | IXGE_TX_DESCRIPTOR_STATUS0_REPORT_STATUS);
2822 xm->tx_descriptor_template_mask.status1 &=
2823 ~(IXGE_TX_DESCRIPTOR_STATUS1_DONE);
2825 error = vlib_call_init_function (vm, pci_bus_init);
2830 VLIB_INIT_FUNCTION (ixge_init);
2834 ixge_pci_intr_handler (vlib_pci_dev_handle_t h)
2836 ixge_main_t *xm = &ixge_main;
2837 vlib_main_t *vm = xm->vlib_main;
2838 uword private_data = vlib_pci_get_private_data (h);
2840 vlib_node_set_interrupt_pending (vm, ixge_input_node.index);
2842 /* Let node know which device is interrupting. */
2844 vlib_node_runtime_t *rt =
2845 vlib_node_get_runtime (vm, ixge_input_node.index);
2846 rt->runtime_data[0] |= 1 << private_data;
2850 static clib_error_t *
2851 ixge_pci_init (vlib_main_t * vm, vlib_pci_dev_handle_t h)
2853 ixge_main_t *xm = &ixge_main;
2854 clib_error_t *error = 0;
2857 vlib_pci_addr_t *addr = vlib_pci_get_addr (h);
2858 vlib_pci_device_info_t *d = vlib_pci_get_device_info (addr, 0);
2860 /* Allocate physmem region for DMA buffers */
2861 if (xm->physmem_region_allocated == 0)
2863 error = vlib_physmem_region_alloc (vm, "ixge decriptors", 2 << 20, 0,
2864 VLIB_PHYSMEM_F_INIT_MHEAP,
2865 &xm->physmem_region);
2866 xm->physmem_region_allocated = 1;
2871 error = vlib_pci_map_resource (h, 0, &r);
2875 vec_add2 (xm->devices, xd, 1);
2877 if (vec_len (xm->devices) == 1)
2879 ixge_input_node.function = ixge_input_multiarch_select ();
2882 xd->pci_dev_handle = h;
2883 xd->device_id = d->device_id;
2885 xd->device_index = xd - xm->devices;
2886 xd->pci_function = addr->function;
2887 xd->per_interface_next_index = ~0;
2889 vlib_pci_set_private_data (h, xd->device_index);
2891 /* Chip found so enable node. */
2893 vlib_node_set_state (vm, ixge_input_node.index,
2895 ? VLIB_NODE_STATE_POLLING
2896 : VLIB_NODE_STATE_INTERRUPT));
2898 //dev->private_data = xd->device_index;
2901 if (vec_len (xm->devices) == 1)
2903 vlib_register_node (vm, &ixge_process_node);
2904 xm->process_node_index = ixge_process_node.index;
2907 error = vlib_pci_bus_master_enable (h);
2912 return vlib_pci_intr_enable (h);
2916 PCI_REGISTER_DEVICE (ixge_pci_device_registration,static) = {
2917 .init_function = ixge_pci_init,
2918 .interrupt_handler = ixge_pci_intr_handler,
2919 .supported_devices = {
2920 #define _(t,i) { .vendor_id = PCI_VENDOR_ID_INTEL, .device_id = i, },
2921 foreach_ixge_pci_device_id
2929 ixge_set_next_node (ixge_rx_next_t next, char *name)
2931 vlib_node_registration_t *r = &ixge_input_node;
2935 case IXGE_RX_NEXT_IP4_INPUT:
2936 case IXGE_RX_NEXT_IP6_INPUT:
2937 case IXGE_RX_NEXT_ETHERNET_INPUT:
2938 r->next_nodes[next] = name;
2942 clib_warning ("%s: illegal next %d\n", __FUNCTION__, next);
2948 VLIB_PLUGIN_REGISTER () = {
2949 .version = VPP_BUILD_VER,
2950 .default_disabled = 1,
2951 .description = "Intel 82599 Family Native Driver (experimental)",
2958 * fd.io coding-style-patch-verification: ON
2961 * eval: (c-set-style "gnu")