4 * Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <netinet/in.h>
37 #include <rte_malloc.h>
38 #include <rte_ethdev.h>
39 #include <rte_ethdev_vdev.h>
43 #include <rte_ip_frag.h>
44 #include <rte_devargs.h>
45 #include <rte_kvargs.h>
46 #include <rte_bus_vdev.h>
47 #include <rte_alarm.h>
48 #include <rte_cycles.h>
50 #include "rte_eth_bond.h"
51 #include "rte_eth_bond_private.h"
52 #include "rte_eth_bond_8023ad_private.h"
54 #define REORDER_PERIOD_MS 10
55 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
57 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
59 /* Table for statistics in mode 5 TLB */
60 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
63 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
65 size_t vlan_offset = 0;
67 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto ||
68 rte_cpu_to_be_16(ETHER_TYPE_QINQ) == *proto) {
69 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
71 vlan_offset = sizeof(struct vlan_hdr);
72 *proto = vlan_hdr->eth_proto;
74 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
75 vlan_hdr = vlan_hdr + 1;
76 *proto = vlan_hdr->eth_proto;
77 vlan_offset += sizeof(struct vlan_hdr);
84 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
86 struct bond_dev_private *internals;
88 uint16_t num_rx_total = 0;
90 uint16_t active_slave;
93 /* Cast to structure, containing bonded device's port id and queue id */
94 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
95 internals = bd_rx_q->dev_private;
96 slave_count = internals->active_slave_count;
97 active_slave = internals->active_slave;
99 for (i = 0; i < slave_count && nb_pkts; i++) {
100 uint16_t num_rx_slave;
102 /* Offset of pointer to *bufs increases as packets are received
103 * from other slaves */
105 rte_eth_rx_burst(internals->active_slaves[active_slave],
107 bufs + num_rx_total, nb_pkts);
108 num_rx_total += num_rx_slave;
109 nb_pkts -= num_rx_slave;
110 if (++active_slave == slave_count)
114 if (++internals->active_slave == slave_count)
115 internals->active_slave = 0;
120 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
123 struct bond_dev_private *internals;
125 /* Cast to structure, containing bonded device's port id and queue id */
126 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
128 internals = bd_rx_q->dev_private;
130 return rte_eth_rx_burst(internals->current_primary_port,
131 bd_rx_q->queue_id, bufs, nb_pkts);
134 static inline uint8_t
135 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
137 const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
139 return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
140 (ethertype == ether_type_slow_be &&
141 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
144 /*****************************************************************************
145 * Flow director's setup for mode 4 optimization
148 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
149 .dst.addr_bytes = { 0 },
150 .src.addr_bytes = { 0 },
151 .type = RTE_BE16(ETHER_TYPE_SLOW),
154 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
155 .dst.addr_bytes = { 0 },
156 .src.addr_bytes = { 0 },
160 static struct rte_flow_item flow_item_8023ad[] = {
162 .type = RTE_FLOW_ITEM_TYPE_ETH,
163 .spec = &flow_item_eth_type_8023ad,
165 .mask = &flow_item_eth_mask_type_8023ad,
168 .type = RTE_FLOW_ITEM_TYPE_END,
175 const struct rte_flow_attr flow_attr_8023ad = {
184 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
185 uint16_t slave_port) {
186 struct rte_eth_dev_info slave_info;
187 struct rte_flow_error error;
188 struct bond_dev_private *internals = (struct bond_dev_private *)
189 (bond_dev->data->dev_private);
191 const struct rte_flow_action_queue lacp_queue_conf = {
195 const struct rte_flow_action actions[] = {
197 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
198 .conf = &lacp_queue_conf
201 .type = RTE_FLOW_ACTION_TYPE_END,
205 int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
206 flow_item_8023ad, actions, &error);
208 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
209 __func__, error.message, slave_port,
210 internals->mode4.dedicated_queues.rx_qid);
214 rte_eth_dev_info_get(slave_port, &slave_info);
215 if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
216 slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
218 "%s: Slave %d capabilities doesn't allow to allocate additional queues",
219 __func__, slave_port);
227 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
228 struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
229 struct bond_dev_private *internals = (struct bond_dev_private *)
230 (bond_dev->data->dev_private);
231 struct rte_eth_dev_info bond_info;
234 /* Verify if all slaves in bonding supports flow director and */
235 if (internals->slave_count > 0) {
236 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
238 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
239 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
241 for (idx = 0; idx < internals->slave_count; idx++) {
242 if (bond_ethdev_8023ad_flow_verify(bond_dev,
243 internals->slaves[idx].port_id) != 0)
252 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
254 struct rte_flow_error error;
255 struct bond_dev_private *internals = (struct bond_dev_private *)
256 (bond_dev->data->dev_private);
258 struct rte_flow_action_queue lacp_queue_conf = {
259 .index = internals->mode4.dedicated_queues.rx_qid,
262 const struct rte_flow_action actions[] = {
264 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
265 .conf = &lacp_queue_conf
268 .type = RTE_FLOW_ACTION_TYPE_END,
272 internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
273 &flow_attr_8023ad, flow_item_8023ad, actions, &error);
274 if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
275 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
276 "(slave_port=%d queue_id=%d)",
277 error.message, slave_port,
278 internals->mode4.dedicated_queues.rx_qid);
286 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
289 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
290 struct bond_dev_private *internals = bd_rx_q->dev_private;
291 uint16_t num_rx_total = 0; /* Total number of received packets */
292 uint16_t slaves[RTE_MAX_ETHPORTS];
293 uint16_t slave_count;
294 uint16_t active_slave;
297 /* Copy slave list to protect against slave up/down changes during tx
299 slave_count = internals->active_slave_count;
300 active_slave = internals->active_slave;
301 memcpy(slaves, internals->active_slaves,
302 sizeof(internals->active_slaves[0]) * slave_count);
304 for (i = 0; i < slave_count && nb_pkts; i++) {
305 uint16_t num_rx_slave;
307 /* Read packets from this slave */
308 num_rx_slave = rte_eth_rx_burst(slaves[active_slave],
310 bufs + num_rx_total, nb_pkts);
311 num_rx_total += num_rx_slave;
312 nb_pkts -= num_rx_slave;
314 if (++active_slave == slave_count)
318 if (++internals->active_slave == slave_count)
319 internals->active_slave = 0;
325 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
328 struct bond_dev_private *internals;
329 struct bond_tx_queue *bd_tx_q;
331 uint16_t num_of_slaves;
332 uint16_t slaves[RTE_MAX_ETHPORTS];
333 /* positions in slaves, not ID */
334 uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
335 uint8_t distributing_count;
337 uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
338 uint16_t i, op_slave_idx;
340 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
342 /* Total amount of packets in slave_bufs */
343 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
344 /* Slow packets placed in each slave */
346 if (unlikely(nb_pkts == 0))
349 bd_tx_q = (struct bond_tx_queue *)queue;
350 internals = bd_tx_q->dev_private;
352 /* Copy slave list to protect against slave up/down changes during tx
354 num_of_slaves = internals->active_slave_count;
355 if (num_of_slaves < 1)
358 memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) *
361 distributing_count = 0;
362 for (i = 0; i < num_of_slaves; i++) {
363 struct port *port = &mode_8023ad_ports[slaves[i]];
364 if (ACTOR_STATE(port, DISTRIBUTING))
365 distributing_offsets[distributing_count++] = i;
368 if (likely(distributing_count > 0)) {
369 /* Populate slaves mbuf with the packets which are to be sent */
370 for (i = 0; i < nb_pkts; i++) {
371 /* Select output slave using hash based on xmit policy */
372 op_slave_idx = internals->xmit_hash(bufs[i],
375 /* Populate slave mbuf arrays with mbufs for that slave.
376 * Use only slaves that are currently distributing.
378 uint8_t slave_offset =
379 distributing_offsets[op_slave_idx];
380 slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] =
382 slave_nb_pkts[slave_offset]++;
386 /* Send packet burst on each slave device */
387 for (i = 0; i < num_of_slaves; i++) {
388 if (slave_nb_pkts[i] == 0)
391 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
392 slave_bufs[i], slave_nb_pkts[i]);
394 num_tx_total += num_tx_slave;
395 num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
397 /* If tx burst fails move packets to end of bufs */
398 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
399 uint16_t j = nb_pkts - num_tx_fail_total;
400 for ( ; num_tx_slave < slave_nb_pkts[i]; j++,
402 bufs[j] = slave_bufs[i][num_tx_slave];
411 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
414 /* Cast to structure, containing bonded device's port id and queue id */
415 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
416 struct bond_dev_private *internals = bd_rx_q->dev_private;
417 struct ether_addr bond_mac;
419 struct ether_hdr *hdr;
421 const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
422 uint16_t num_rx_total = 0; /* Total number of received packets */
423 uint16_t slaves[RTE_MAX_ETHPORTS];
424 uint16_t slave_count, idx;
426 uint8_t collecting; /* current slave collecting status */
427 const uint8_t promisc = internals->promiscuous_en;
431 rte_eth_macaddr_get(internals->port_id, &bond_mac);
432 /* Copy slave list to protect against slave up/down changes during tx
434 slave_count = internals->active_slave_count;
435 memcpy(slaves, internals->active_slaves,
436 sizeof(internals->active_slaves[0]) * slave_count);
438 idx = internals->active_slave;
439 if (idx >= slave_count) {
440 internals->active_slave = 0;
443 for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
445 collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[idx]],
448 /* Read packets from this slave */
449 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
450 &bufs[num_rx_total], nb_pkts - num_rx_total);
452 for (k = j; k < 2 && k < num_rx_total; k++)
453 rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
455 /* Handle slow protocol packets. */
456 while (j < num_rx_total) {
458 /* If packet is not pure L2 and is known, skip it */
459 if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) {
464 if (j + 3 < num_rx_total)
465 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
467 hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
468 subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
470 /* Remove packet from array if it is slow packet or slave is not
471 * in collecting state or bonding interface is not in promiscuous
472 * mode and packet address does not match. */
473 if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]) ||
474 !collecting || (!promisc &&
475 !is_multicast_ether_addr(&hdr->d_addr) &&
476 !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
478 if (hdr->ether_type == ether_type_slow_be) {
479 bond_mode_8023ad_handle_slow_pkt(
480 internals, slaves[idx], bufs[j]);
482 rte_pktmbuf_free(bufs[j]);
484 /* Packet is managed by mode 4 or dropped, shift the array */
486 if (j < num_rx_total) {
487 memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
493 if (unlikely(++idx == slave_count))
497 if (++internals->active_slave == slave_count)
498 internals->active_slave = 0;
503 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
504 uint32_t burstnumberRX;
505 uint32_t burstnumberTX;
507 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
510 arp_op_name(uint16_t arp_op, char *buf)
514 snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
517 snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
519 case ARP_OP_REVREQUEST:
520 snprintf(buf, sizeof("Reverse ARP Request"), "%s",
521 "Reverse ARP Request");
523 case ARP_OP_REVREPLY:
524 snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
525 "Reverse ARP Reply");
527 case ARP_OP_INVREQUEST:
528 snprintf(buf, sizeof("Peer Identify Request"), "%s",
529 "Peer Identify Request");
531 case ARP_OP_INVREPLY:
532 snprintf(buf, sizeof("Peer Identify Reply"), "%s",
533 "Peer Identify Reply");
538 snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
542 #define MaxIPv4String 16
544 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
548 ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
549 snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
550 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
554 #define MAX_CLIENTS_NUMBER 128
555 uint8_t active_clients;
556 struct client_stats_t {
559 uint32_t ipv4_rx_packets;
560 uint32_t ipv4_tx_packets;
562 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
565 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
569 for (; i < MAX_CLIENTS_NUMBER; i++) {
570 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port)) {
571 /* Just update RX packets number for this client */
572 if (TXorRXindicator == &burstnumberRX)
573 client_stats[i].ipv4_rx_packets++;
575 client_stats[i].ipv4_tx_packets++;
579 /* We have a new client. Insert him to the table, and increment stats */
580 if (TXorRXindicator == &burstnumberRX)
581 client_stats[active_clients].ipv4_rx_packets++;
583 client_stats[active_clients].ipv4_tx_packets++;
584 client_stats[active_clients].ipv4_addr = addr;
585 client_stats[active_clients].port = port;
590 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
591 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
592 RTE_LOG(DEBUG, PMD, \
595 "SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
597 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
603 eth_h->s_addr.addr_bytes[0], \
604 eth_h->s_addr.addr_bytes[1], \
605 eth_h->s_addr.addr_bytes[2], \
606 eth_h->s_addr.addr_bytes[3], \
607 eth_h->s_addr.addr_bytes[4], \
608 eth_h->s_addr.addr_bytes[5], \
610 eth_h->d_addr.addr_bytes[0], \
611 eth_h->d_addr.addr_bytes[1], \
612 eth_h->d_addr.addr_bytes[2], \
613 eth_h->d_addr.addr_bytes[3], \
614 eth_h->d_addr.addr_bytes[4], \
615 eth_h->d_addr.addr_bytes[5], \
622 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
623 uint16_t port, uint32_t __attribute__((unused)) *burstnumber)
625 struct ipv4_hdr *ipv4_h;
626 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
627 struct arp_hdr *arp_h;
634 uint16_t ether_type = eth_h->ether_type;
635 uint16_t offset = get_vlan_offset(eth_h, ðer_type);
637 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
638 snprintf(buf, 16, "%s", info);
641 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
642 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
643 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
644 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
645 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
646 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
648 update_client_stats(ipv4_h->src_addr, port, burstnumber);
650 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
651 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
652 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
653 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
654 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
655 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
656 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
663 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
665 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
666 struct bond_dev_private *internals = bd_tx_q->dev_private;
667 struct ether_hdr *eth_h;
668 uint16_t ether_type, offset;
669 uint16_t nb_recv_pkts;
672 nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
674 for (i = 0; i < nb_recv_pkts; i++) {
675 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
676 ether_type = eth_h->ether_type;
677 offset = get_vlan_offset(eth_h, ðer_type);
679 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
680 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
681 mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
683 bond_mode_alb_arp_recv(eth_h, offset, internals);
685 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
686 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
687 mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
695 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
698 struct bond_dev_private *internals;
699 struct bond_tx_queue *bd_tx_q;
701 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
702 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
704 uint16_t num_of_slaves;
705 uint16_t slaves[RTE_MAX_ETHPORTS];
707 uint16_t num_tx_total = 0, num_tx_slave;
709 static int slave_idx = 0;
710 int i, cslave_idx = 0, tx_fail_total = 0;
712 bd_tx_q = (struct bond_tx_queue *)queue;
713 internals = bd_tx_q->dev_private;
715 /* Copy slave list to protect against slave up/down changes during tx
717 num_of_slaves = internals->active_slave_count;
718 memcpy(slaves, internals->active_slaves,
719 sizeof(internals->active_slaves[0]) * num_of_slaves);
721 if (num_of_slaves < 1)
724 /* Populate slaves mbuf with which packets are to be sent on it */
725 for (i = 0; i < nb_pkts; i++) {
726 cslave_idx = (slave_idx + i) % num_of_slaves;
727 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
730 /* increment current slave index so the next call to tx burst starts on the
732 slave_idx = ++cslave_idx;
734 /* Send packet burst on each slave device */
735 for (i = 0; i < num_of_slaves; i++) {
736 if (slave_nb_pkts[i] > 0) {
737 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
738 slave_bufs[i], slave_nb_pkts[i]);
740 /* if tx burst fails move packets to end of bufs */
741 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
742 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
744 tx_fail_total += tx_fail_slave;
746 memcpy(&bufs[nb_pkts - tx_fail_total],
747 &slave_bufs[i][num_tx_slave],
748 tx_fail_slave * sizeof(bufs[0]));
750 num_tx_total += num_tx_slave;
758 bond_ethdev_tx_burst_active_backup(void *queue,
759 struct rte_mbuf **bufs, uint16_t nb_pkts)
761 struct bond_dev_private *internals;
762 struct bond_tx_queue *bd_tx_q;
764 bd_tx_q = (struct bond_tx_queue *)queue;
765 internals = bd_tx_q->dev_private;
767 if (internals->active_slave_count < 1)
770 return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
774 static inline uint16_t
775 ether_hash(struct ether_hdr *eth_hdr)
777 unaligned_uint16_t *word_src_addr =
778 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
779 unaligned_uint16_t *word_dst_addr =
780 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
782 return (word_src_addr[0] ^ word_dst_addr[0]) ^
783 (word_src_addr[1] ^ word_dst_addr[1]) ^
784 (word_src_addr[2] ^ word_dst_addr[2]);
787 static inline uint32_t
788 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
790 return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
793 static inline uint32_t
794 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
796 unaligned_uint32_t *word_src_addr =
797 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
798 unaligned_uint32_t *word_dst_addr =
799 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
801 return (word_src_addr[0] ^ word_dst_addr[0]) ^
802 (word_src_addr[1] ^ word_dst_addr[1]) ^
803 (word_src_addr[2] ^ word_dst_addr[2]) ^
804 (word_src_addr[3] ^ word_dst_addr[3]);
808 xmit_l2_hash(const struct rte_mbuf *buf, uint8_t slave_count)
810 struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
812 uint32_t hash = ether_hash(eth_hdr);
814 return (hash ^= hash >> 8) % slave_count;
818 xmit_l23_hash(const struct rte_mbuf *buf, uint8_t slave_count)
820 struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
821 uint16_t proto = eth_hdr->ether_type;
822 size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
823 uint32_t hash, l3hash = 0;
825 hash = ether_hash(eth_hdr);
827 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
828 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
829 ((char *)(eth_hdr + 1) + vlan_offset);
830 l3hash = ipv4_hash(ipv4_hdr);
832 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
833 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
834 ((char *)(eth_hdr + 1) + vlan_offset);
835 l3hash = ipv6_hash(ipv6_hdr);
838 hash = hash ^ l3hash;
842 return hash % slave_count;
846 xmit_l34_hash(const struct rte_mbuf *buf, uint8_t slave_count)
848 struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
849 uint16_t proto = eth_hdr->ether_type;
850 size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
852 struct udp_hdr *udp_hdr = NULL;
853 struct tcp_hdr *tcp_hdr = NULL;
854 uint32_t hash, l3hash = 0, l4hash = 0;
856 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
857 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
858 ((char *)(eth_hdr + 1) + vlan_offset);
859 size_t ip_hdr_offset;
861 l3hash = ipv4_hash(ipv4_hdr);
863 /* there is no L4 header in fragmented packet */
864 if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr) == 0)) {
865 ip_hdr_offset = (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) *
868 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
869 tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr +
871 l4hash = HASH_L4_PORTS(tcp_hdr);
872 } else if (ipv4_hdr->next_proto_id == IPPROTO_UDP) {
873 udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr +
875 l4hash = HASH_L4_PORTS(udp_hdr);
878 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
879 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
880 ((char *)(eth_hdr + 1) + vlan_offset);
881 l3hash = ipv6_hash(ipv6_hdr);
883 if (ipv6_hdr->proto == IPPROTO_TCP) {
884 tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
885 l4hash = HASH_L4_PORTS(tcp_hdr);
886 } else if (ipv6_hdr->proto == IPPROTO_UDP) {
887 udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
888 l4hash = HASH_L4_PORTS(udp_hdr);
892 hash = l3hash ^ l4hash;
896 return hash % slave_count;
900 uint64_t bwg_left_int;
901 uint64_t bwg_left_remainder;
906 bond_tlb_activate_slave(struct bond_dev_private *internals) {
909 for (i = 0; i < internals->active_slave_count; i++) {
910 tlb_last_obytets[internals->active_slaves[i]] = 0;
915 bandwidth_cmp(const void *a, const void *b)
917 const struct bwg_slave *bwg_a = a;
918 const struct bwg_slave *bwg_b = b;
919 int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
920 int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
921 (int64_t)bwg_a->bwg_left_remainder;
935 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
936 struct bwg_slave *bwg_slave)
938 struct rte_eth_link link_status;
940 rte_eth_link_get_nowait(port_id, &link_status);
941 uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
944 link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
945 bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
946 bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
950 bond_ethdev_update_tlb_slave_cb(void *arg)
952 struct bond_dev_private *internals = arg;
953 struct rte_eth_stats slave_stats;
954 struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
958 uint8_t update_stats = 0;
961 internals->slave_update_idx++;
964 if (internals->slave_update_idx >= REORDER_PERIOD_MS)
967 for (i = 0; i < internals->active_slave_count; i++) {
968 slave_id = internals->active_slaves[i];
969 rte_eth_stats_get(slave_id, &slave_stats);
970 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
971 bandwidth_left(slave_id, tx_bytes,
972 internals->slave_update_idx, &bwg_array[i]);
973 bwg_array[i].slave = slave_id;
976 tlb_last_obytets[slave_id] = slave_stats.obytes;
980 if (update_stats == 1)
981 internals->slave_update_idx = 0;
984 qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
985 for (i = 0; i < slave_count; i++)
986 internals->tlb_slaves_order[i] = bwg_array[i].slave;
988 rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
989 (struct bond_dev_private *)internals);
993 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
995 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
996 struct bond_dev_private *internals = bd_tx_q->dev_private;
998 struct rte_eth_dev *primary_port =
999 &rte_eth_devices[internals->primary_port];
1000 uint16_t num_tx_total = 0;
1003 uint16_t num_of_slaves = internals->active_slave_count;
1004 uint16_t slaves[RTE_MAX_ETHPORTS];
1006 struct ether_hdr *ether_hdr;
1007 struct ether_addr primary_slave_addr;
1008 struct ether_addr active_slave_addr;
1010 if (num_of_slaves < 1)
1011 return num_tx_total;
1013 memcpy(slaves, internals->tlb_slaves_order,
1014 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
1017 ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
1020 for (i = 0; i < 3; i++)
1021 rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
1024 for (i = 0; i < num_of_slaves; i++) {
1025 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
1026 for (j = num_tx_total; j < nb_pkts; j++) {
1027 if (j + 3 < nb_pkts)
1028 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
1030 ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
1031 if (is_same_ether_addr(ðer_hdr->s_addr, &primary_slave_addr))
1032 ether_addr_copy(&active_slave_addr, ðer_hdr->s_addr);
1033 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1034 mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
1038 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1039 bufs + num_tx_total, nb_pkts - num_tx_total);
1041 if (num_tx_total == nb_pkts)
1045 return num_tx_total;
1049 bond_tlb_disable(struct bond_dev_private *internals)
1051 rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
1055 bond_tlb_enable(struct bond_dev_private *internals)
1057 bond_ethdev_update_tlb_slave_cb(internals);
1061 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1063 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1064 struct bond_dev_private *internals = bd_tx_q->dev_private;
1066 struct ether_hdr *eth_h;
1067 uint16_t ether_type, offset;
1069 struct client_data *client_info;
1072 * We create transmit buffers for every slave and one additional to send
1073 * through tlb. In worst case every packet will be send on one port.
1075 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
1076 uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
1079 * We create separate transmit buffers for update packets as they won't
1080 * be counted in num_tx_total.
1082 struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1083 uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1085 struct rte_mbuf *upd_pkt;
1088 uint16_t num_send, num_not_send = 0;
1089 uint16_t num_tx_total = 0;
1094 /* Search tx buffer for ARP packets and forward them to alb */
1095 for (i = 0; i < nb_pkts; i++) {
1096 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
1097 ether_type = eth_h->ether_type;
1098 offset = get_vlan_offset(eth_h, ðer_type);
1100 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
1101 slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1103 /* Change src mac in eth header */
1104 rte_eth_macaddr_get(slave_idx, ð_h->s_addr);
1106 /* Add packet to slave tx buffer */
1107 slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1108 slave_bufs_pkts[slave_idx]++;
1110 /* If packet is not ARP, send it with TLB policy */
1111 slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1113 slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1117 /* Update connected client ARP tables */
1118 if (internals->mode6.ntt) {
1119 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1120 client_info = &internals->mode6.client_table[i];
1122 if (client_info->in_use) {
1123 /* Allocate new packet to send ARP update on current slave */
1124 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1125 if (upd_pkt == NULL) {
1126 RTE_LOG(ERR, PMD, "Failed to allocate ARP packet from pool\n");
1129 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
1130 + client_info->vlan_count * sizeof(struct vlan_hdr);
1131 upd_pkt->data_len = pkt_size;
1132 upd_pkt->pkt_len = pkt_size;
1134 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1137 /* Add packet to update tx buffer */
1138 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1139 update_bufs_pkts[slave_idx]++;
1142 internals->mode6.ntt = 0;
1145 /* Send ARP packets on proper slaves */
1146 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1147 if (slave_bufs_pkts[i] > 0) {
1148 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1149 slave_bufs[i], slave_bufs_pkts[i]);
1150 for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1151 bufs[nb_pkts - 1 - num_not_send - j] =
1152 slave_bufs[i][nb_pkts - 1 - j];
1155 num_tx_total += num_send;
1156 num_not_send += slave_bufs_pkts[i] - num_send;
1158 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1159 /* Print TX stats including update packets */
1160 for (j = 0; j < slave_bufs_pkts[i]; j++) {
1161 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
1162 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1168 /* Send update packets on proper slaves */
1169 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1170 if (update_bufs_pkts[i] > 0) {
1171 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1172 update_bufs_pkts[i]);
1173 for (j = num_send; j < update_bufs_pkts[i]; j++) {
1174 rte_pktmbuf_free(update_bufs[i][j]);
1176 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1177 for (j = 0; j < update_bufs_pkts[i]; j++) {
1178 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
1179 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1185 /* Send non-ARP packets using tlb policy */
1186 if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1187 num_send = bond_ethdev_tx_burst_tlb(queue,
1188 slave_bufs[RTE_MAX_ETHPORTS],
1189 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1191 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1192 bufs[nb_pkts - 1 - num_not_send - j] =
1193 slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1196 num_tx_total += num_send;
1199 return num_tx_total;
1203 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1206 struct bond_dev_private *internals;
1207 struct bond_tx_queue *bd_tx_q;
1209 uint16_t num_of_slaves;
1210 uint16_t slaves[RTE_MAX_ETHPORTS];
1212 uint16_t num_tx_total = 0, num_tx_slave = 0, tx_fail_total = 0;
1216 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
1217 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
1219 bd_tx_q = (struct bond_tx_queue *)queue;
1220 internals = bd_tx_q->dev_private;
1222 /* Copy slave list to protect against slave up/down changes during tx
1224 num_of_slaves = internals->active_slave_count;
1225 memcpy(slaves, internals->active_slaves,
1226 sizeof(internals->active_slaves[0]) * num_of_slaves);
1228 if (num_of_slaves < 1)
1229 return num_tx_total;
1231 /* Populate slaves mbuf with the packets which are to be sent on it */
1232 for (i = 0; i < nb_pkts; i++) {
1233 /* Select output slave using hash based on xmit policy */
1234 op_slave_id = internals->xmit_hash(bufs[i], num_of_slaves);
1236 /* Populate slave mbuf arrays with mbufs for that slave */
1237 slave_bufs[op_slave_id][slave_nb_pkts[op_slave_id]++] = bufs[i];
1240 /* Send packet burst on each slave device */
1241 for (i = 0; i < num_of_slaves; i++) {
1242 if (slave_nb_pkts[i] > 0) {
1243 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1244 slave_bufs[i], slave_nb_pkts[i]);
1246 /* if tx burst fails move packets to end of bufs */
1247 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
1248 int slave_tx_fail_count = slave_nb_pkts[i] - num_tx_slave;
1250 tx_fail_total += slave_tx_fail_count;
1251 memcpy(&bufs[nb_pkts - tx_fail_total],
1252 &slave_bufs[i][num_tx_slave],
1253 slave_tx_fail_count * sizeof(bufs[0]));
1256 num_tx_total += num_tx_slave;
1260 return num_tx_total;
1264 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1267 struct bond_dev_private *internals;
1268 struct bond_tx_queue *bd_tx_q;
1270 uint16_t num_of_slaves;
1271 uint16_t slaves[RTE_MAX_ETHPORTS];
1272 /* positions in slaves, not ID */
1273 uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
1274 uint8_t distributing_count;
1276 uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
1277 uint16_t i, j, op_slave_idx;
1278 const uint16_t buffs_size = nb_pkts + BOND_MODE_8023AX_SLAVE_TX_PKTS + 1;
1280 /* Allocate additional packets in case 8023AD mode. */
1281 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][buffs_size];
1282 void *slow_pkts[BOND_MODE_8023AX_SLAVE_TX_PKTS] = { NULL };
1284 /* Total amount of packets in slave_bufs */
1285 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
1286 /* Slow packets placed in each slave */
1287 uint8_t slave_slow_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
1289 bd_tx_q = (struct bond_tx_queue *)queue;
1290 internals = bd_tx_q->dev_private;
1292 /* Copy slave list to protect against slave up/down changes during tx
1294 num_of_slaves = internals->active_slave_count;
1295 if (num_of_slaves < 1)
1296 return num_tx_total;
1298 memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) * num_of_slaves);
1300 distributing_count = 0;
1301 for (i = 0; i < num_of_slaves; i++) {
1302 struct port *port = &mode_8023ad_ports[slaves[i]];
1304 slave_slow_nb_pkts[i] = rte_ring_dequeue_burst(port->tx_ring,
1305 slow_pkts, BOND_MODE_8023AX_SLAVE_TX_PKTS,
1307 slave_nb_pkts[i] = slave_slow_nb_pkts[i];
1309 for (j = 0; j < slave_slow_nb_pkts[i]; j++)
1310 slave_bufs[i][j] = slow_pkts[j];
1312 if (ACTOR_STATE(port, DISTRIBUTING))
1313 distributing_offsets[distributing_count++] = i;
1316 if (likely(distributing_count > 0)) {
1317 /* Populate slaves mbuf with the packets which are to be sent on it */
1318 for (i = 0; i < nb_pkts; i++) {
1319 /* Select output slave using hash based on xmit policy */
1320 op_slave_idx = internals->xmit_hash(bufs[i], distributing_count);
1322 /* Populate slave mbuf arrays with mbufs for that slave. Use only
1323 * slaves that are currently distributing. */
1324 uint8_t slave_offset = distributing_offsets[op_slave_idx];
1325 slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] = bufs[i];
1326 slave_nb_pkts[slave_offset]++;
1330 /* Send packet burst on each slave device */
1331 for (i = 0; i < num_of_slaves; i++) {
1332 if (slave_nb_pkts[i] == 0)
1335 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1336 slave_bufs[i], slave_nb_pkts[i]);
1338 /* If tx burst fails drop slow packets */
1339 for ( ; num_tx_slave < slave_slow_nb_pkts[i]; num_tx_slave++)
1340 rte_pktmbuf_free(slave_bufs[i][num_tx_slave]);
1342 num_tx_total += num_tx_slave - slave_slow_nb_pkts[i];
1343 num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
1345 /* If tx burst fails move packets to end of bufs */
1346 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
1347 uint16_t j = nb_pkts - num_tx_fail_total;
1348 for ( ; num_tx_slave < slave_nb_pkts[i]; j++, num_tx_slave++)
1349 bufs[j] = slave_bufs[i][num_tx_slave];
1353 return num_tx_total;
1357 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1360 struct bond_dev_private *internals;
1361 struct bond_tx_queue *bd_tx_q;
1363 uint8_t tx_failed_flag = 0, num_of_slaves;
1364 uint16_t slaves[RTE_MAX_ETHPORTS];
1366 uint16_t max_nb_of_tx_pkts = 0;
1368 int slave_tx_total[RTE_MAX_ETHPORTS];
1369 int i, most_successful_tx_slave = -1;
1371 bd_tx_q = (struct bond_tx_queue *)queue;
1372 internals = bd_tx_q->dev_private;
1374 /* Copy slave list to protect against slave up/down changes during tx
1376 num_of_slaves = internals->active_slave_count;
1377 memcpy(slaves, internals->active_slaves,
1378 sizeof(internals->active_slaves[0]) * num_of_slaves);
1380 if (num_of_slaves < 1)
1383 /* Increment reference count on mbufs */
1384 for (i = 0; i < nb_pkts; i++)
1385 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1387 /* Transmit burst on each active slave */
1388 for (i = 0; i < num_of_slaves; i++) {
1389 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1392 if (unlikely(slave_tx_total[i] < nb_pkts))
1395 /* record the value and slave index for the slave which transmits the
1396 * maximum number of packets */
1397 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1398 max_nb_of_tx_pkts = slave_tx_total[i];
1399 most_successful_tx_slave = i;
1403 /* if slaves fail to transmit packets from burst, the calling application
1404 * is not expected to know about multiple references to packets so we must
1405 * handle failures of all packets except those of the most successful slave
1407 if (unlikely(tx_failed_flag))
1408 for (i = 0; i < num_of_slaves; i++)
1409 if (i != most_successful_tx_slave)
1410 while (slave_tx_total[i] < nb_pkts)
1411 rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1413 return max_nb_of_tx_pkts;
1417 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1419 struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1421 if (bond_ctx->mode == BONDING_MODE_8023AD) {
1423 * If in mode 4 then save the link properties of the first
1424 * slave, all subsequent slaves must match these properties
1426 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1428 bond_link->link_autoneg = slave_link->link_autoneg;
1429 bond_link->link_duplex = slave_link->link_duplex;
1430 bond_link->link_speed = slave_link->link_speed;
1433 * In any other mode the link properties are set to default
1434 * values of AUTONEG/DUPLEX
1436 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1437 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1442 link_properties_valid(struct rte_eth_dev *ethdev,
1443 struct rte_eth_link *slave_link)
1445 struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1447 if (bond_ctx->mode == BONDING_MODE_8023AD) {
1448 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1450 if (bond_link->link_duplex != slave_link->link_duplex ||
1451 bond_link->link_autoneg != slave_link->link_autoneg ||
1452 bond_link->link_speed != slave_link->link_speed)
1460 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1462 struct ether_addr *mac_addr;
1464 if (eth_dev == NULL) {
1465 RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__);
1469 if (dst_mac_addr == NULL) {
1470 RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__);
1474 mac_addr = eth_dev->data->mac_addrs;
1476 ether_addr_copy(mac_addr, dst_mac_addr);
1481 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1483 struct ether_addr *mac_addr;
1485 if (eth_dev == NULL) {
1486 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1490 if (new_mac_addr == NULL) {
1491 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1495 mac_addr = eth_dev->data->mac_addrs;
1497 /* If new MAC is different to current MAC then update */
1498 if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1499 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1505 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1507 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1510 /* Update slave devices MAC addresses */
1511 if (internals->slave_count < 1)
1514 switch (internals->mode) {
1515 case BONDING_MODE_ROUND_ROBIN:
1516 case BONDING_MODE_BALANCE:
1517 case BONDING_MODE_BROADCAST:
1518 for (i = 0; i < internals->slave_count; i++) {
1519 if (rte_eth_dev_default_mac_addr_set(
1520 internals->slaves[i].port_id,
1521 bonded_eth_dev->data->mac_addrs)) {
1522 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1523 internals->slaves[i].port_id);
1528 case BONDING_MODE_8023AD:
1529 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1531 case BONDING_MODE_ACTIVE_BACKUP:
1532 case BONDING_MODE_TLB:
1533 case BONDING_MODE_ALB:
1535 for (i = 0; i < internals->slave_count; i++) {
1536 if (internals->slaves[i].port_id ==
1537 internals->current_primary_port) {
1538 if (rte_eth_dev_default_mac_addr_set(
1539 internals->primary_port,
1540 bonded_eth_dev->data->mac_addrs)) {
1541 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1542 internals->current_primary_port);
1546 if (rte_eth_dev_default_mac_addr_set(
1547 internals->slaves[i].port_id,
1548 &internals->slaves[i].persisted_mac_addr)) {
1549 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1550 internals->slaves[i].port_id);
1561 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1563 struct bond_dev_private *internals;
1565 internals = eth_dev->data->dev_private;
1568 case BONDING_MODE_ROUND_ROBIN:
1569 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1570 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1572 case BONDING_MODE_ACTIVE_BACKUP:
1573 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1574 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1576 case BONDING_MODE_BALANCE:
1577 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1578 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1580 case BONDING_MODE_BROADCAST:
1581 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1582 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1584 case BONDING_MODE_8023AD:
1585 if (bond_mode_8023ad_enable(eth_dev) != 0)
1588 if (internals->mode4.dedicated_queues.enabled == 0) {
1589 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1590 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1591 RTE_LOG(WARNING, PMD,
1592 "Using mode 4, it is necessary to do TX burst "
1593 "and RX burst at least every 100ms.\n");
1595 /* Use flow director's optimization */
1596 eth_dev->rx_pkt_burst =
1597 bond_ethdev_rx_burst_8023ad_fast_queue;
1598 eth_dev->tx_pkt_burst =
1599 bond_ethdev_tx_burst_8023ad_fast_queue;
1602 case BONDING_MODE_TLB:
1603 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1604 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1606 case BONDING_MODE_ALB:
1607 if (bond_mode_alb_enable(eth_dev) != 0)
1610 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1611 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1617 internals->mode = mode;
1624 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1625 struct rte_eth_dev *slave_eth_dev)
1628 struct bond_dev_private *internals = (struct bond_dev_private *)
1629 bonded_eth_dev->data->dev_private;
1630 struct port *port = &mode_8023ad_ports[slave_eth_dev->data->port_id];
1632 if (port->slow_pool == NULL) {
1634 int slave_id = slave_eth_dev->data->port_id;
1636 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1638 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1639 250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1640 slave_eth_dev->data->numa_node);
1642 /* Any memory allocation failure in initialization is critical because
1643 * resources can't be free, so reinitialization is impossible. */
1644 if (port->slow_pool == NULL) {
1645 rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1646 slave_id, mem_name, rte_strerror(rte_errno));
1650 if (internals->mode4.dedicated_queues.enabled == 1) {
1651 /* Configure slow Rx queue */
1653 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1654 internals->mode4.dedicated_queues.rx_qid, 128,
1655 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1656 NULL, port->slow_pool);
1659 "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1660 slave_eth_dev->data->port_id,
1661 internals->mode4.dedicated_queues.rx_qid,
1666 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1667 internals->mode4.dedicated_queues.tx_qid, 512,
1668 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1672 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1673 slave_eth_dev->data->port_id,
1674 internals->mode4.dedicated_queues.tx_qid,
1683 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1684 struct rte_eth_dev *slave_eth_dev)
1686 struct bond_rx_queue *bd_rx_q;
1687 struct bond_tx_queue *bd_tx_q;
1688 uint16_t nb_rx_queues;
1689 uint16_t nb_tx_queues;
1693 struct rte_flow_error flow_error;
1695 struct bond_dev_private *internals = (struct bond_dev_private *)
1696 bonded_eth_dev->data->dev_private;
1699 rte_eth_dev_stop(slave_eth_dev->data->port_id);
1701 /* Enable interrupts on slave device if supported */
1702 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1703 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1705 /* If RSS is enabled for bonding, try to enable it for slaves */
1706 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1707 if (internals->rss_key_len != 0) {
1708 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1709 internals->rss_key_len;
1710 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1713 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1716 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1717 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1718 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1719 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1722 slave_eth_dev->data->dev_conf.rxmode.hw_vlan_filter =
1723 bonded_eth_dev->data->dev_conf.rxmode.hw_vlan_filter;
1725 nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1726 nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1728 if (internals->mode == BONDING_MODE_8023AD) {
1729 if (internals->mode4.dedicated_queues.enabled == 1) {
1735 /* Configure device */
1736 errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1737 nb_rx_queues, nb_tx_queues,
1738 &(slave_eth_dev->data->dev_conf));
1740 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u , err (%d)",
1741 slave_eth_dev->data->port_id, errval);
1745 /* Setup Rx Queues */
1746 for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1747 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1749 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1750 bd_rx_q->nb_rx_desc,
1751 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1752 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1755 "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1756 slave_eth_dev->data->port_id, q_id, errval);
1761 /* Setup Tx Queues */
1762 for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1763 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1765 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1766 bd_tx_q->nb_tx_desc,
1767 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1771 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1772 slave_eth_dev->data->port_id, q_id, errval);
1777 if (internals->mode == BONDING_MODE_8023AD &&
1778 internals->mode4.dedicated_queues.enabled == 1) {
1779 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1783 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1784 slave_eth_dev->data->port_id) != 0) {
1786 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1787 slave_eth_dev->data->port_id, q_id, errval);
1791 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1792 rte_flow_destroy(slave_eth_dev->data->port_id,
1793 internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1796 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1797 slave_eth_dev->data->port_id);
1801 errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1803 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1804 slave_eth_dev->data->port_id, errval);
1808 /* If RSS is enabled for bonding, synchronize RETA */
1809 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1811 struct bond_dev_private *internals;
1813 internals = bonded_eth_dev->data->dev_private;
1815 for (i = 0; i < internals->slave_count; i++) {
1816 if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1817 errval = rte_eth_dev_rss_reta_update(
1818 slave_eth_dev->data->port_id,
1819 &internals->reta_conf[0],
1820 internals->slaves[i].reta_size);
1822 RTE_LOG(WARNING, PMD,
1823 "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1824 " RSS Configuration for bonding may be inconsistent.\n",
1825 slave_eth_dev->data->port_id, errval);
1832 /* If lsc interrupt is set, check initial slave's link status */
1833 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1834 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1835 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1836 RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1844 slave_remove(struct bond_dev_private *internals,
1845 struct rte_eth_dev *slave_eth_dev)
1849 for (i = 0; i < internals->slave_count; i++)
1850 if (internals->slaves[i].port_id ==
1851 slave_eth_dev->data->port_id)
1854 if (i < (internals->slave_count - 1))
1855 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1856 sizeof(internals->slaves[0]) *
1857 (internals->slave_count - i - 1));
1859 internals->slave_count--;
1861 /* force reconfiguration of slave interfaces */
1862 _rte_eth_dev_reset(slave_eth_dev);
1866 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1869 slave_add(struct bond_dev_private *internals,
1870 struct rte_eth_dev *slave_eth_dev)
1872 struct bond_slave_details *slave_details =
1873 &internals->slaves[internals->slave_count];
1875 slave_details->port_id = slave_eth_dev->data->port_id;
1876 slave_details->last_link_status = 0;
1878 /* Mark slave devices that don't support interrupts so we can
1879 * compensate when we start the bond
1881 if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1882 slave_details->link_status_poll_enabled = 1;
1885 slave_details->link_status_wait_to_complete = 0;
1886 /* clean tlb_last_obytes when adding port for bonding device */
1887 memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1888 sizeof(struct ether_addr));
1892 bond_ethdev_primary_set(struct bond_dev_private *internals,
1893 uint16_t slave_port_id)
1897 if (internals->active_slave_count < 1)
1898 internals->current_primary_port = slave_port_id;
1900 /* Search bonded device slave ports for new proposed primary port */
1901 for (i = 0; i < internals->active_slave_count; i++) {
1902 if (internals->active_slaves[i] == slave_port_id)
1903 internals->current_primary_port = slave_port_id;
1908 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
1911 bond_ethdev_start(struct rte_eth_dev *eth_dev)
1913 struct bond_dev_private *internals;
1916 /* slave eth dev will be started by bonded device */
1917 if (check_for_bonded_ethdev(eth_dev)) {
1918 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
1919 eth_dev->data->port_id);
1923 eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
1924 eth_dev->data->dev_started = 1;
1926 internals = eth_dev->data->dev_private;
1928 if (internals->slave_count == 0) {
1929 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
1933 if (internals->user_defined_mac == 0) {
1934 struct ether_addr *new_mac_addr = NULL;
1936 for (i = 0; i < internals->slave_count; i++)
1937 if (internals->slaves[i].port_id == internals->primary_port)
1938 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
1940 if (new_mac_addr == NULL)
1943 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
1944 RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
1945 eth_dev->data->port_id);
1950 /* If bonded device is configure in promiscuous mode then re-apply config */
1951 if (internals->promiscuous_en)
1952 bond_ethdev_promiscuous_enable(eth_dev);
1954 if (internals->mode == BONDING_MODE_8023AD) {
1955 if (internals->mode4.dedicated_queues.enabled == 1) {
1956 internals->mode4.dedicated_queues.rx_qid =
1957 eth_dev->data->nb_rx_queues;
1958 internals->mode4.dedicated_queues.tx_qid =
1959 eth_dev->data->nb_tx_queues;
1964 /* Reconfigure each slave device if starting bonded device */
1965 for (i = 0; i < internals->slave_count; i++) {
1966 struct rte_eth_dev *slave_ethdev =
1967 &(rte_eth_devices[internals->slaves[i].port_id]);
1968 if (slave_configure(eth_dev, slave_ethdev) != 0) {
1970 "bonded port (%d) failed to reconfigure slave device (%d)",
1971 eth_dev->data->port_id,
1972 internals->slaves[i].port_id);
1975 /* We will need to poll for link status if any slave doesn't
1976 * support interrupts
1978 if (internals->slaves[i].link_status_poll_enabled)
1979 internals->link_status_polling_enabled = 1;
1982 /* start polling if needed */
1983 if (internals->link_status_polling_enabled) {
1985 internals->link_status_polling_interval_ms * 1000,
1986 bond_ethdev_slave_link_status_change_monitor,
1987 (void *)&rte_eth_devices[internals->port_id]);
1990 /* Update all slave devices MACs*/
1991 if (mac_address_slaves_update(eth_dev) != 0)
1994 if (internals->user_defined_primary_port)
1995 bond_ethdev_primary_set(internals, internals->primary_port);
1997 if (internals->mode == BONDING_MODE_8023AD)
1998 bond_mode_8023ad_start(eth_dev);
2000 if (internals->mode == BONDING_MODE_TLB ||
2001 internals->mode == BONDING_MODE_ALB)
2002 bond_tlb_enable(internals);
2007 eth_dev->data->dev_started = 0;
2012 bond_ethdev_free_queues(struct rte_eth_dev *dev)
2016 if (dev->data->rx_queues != NULL) {
2017 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2018 rte_free(dev->data->rx_queues[i]);
2019 dev->data->rx_queues[i] = NULL;
2021 dev->data->nb_rx_queues = 0;
2024 if (dev->data->tx_queues != NULL) {
2025 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2026 rte_free(dev->data->tx_queues[i]);
2027 dev->data->tx_queues[i] = NULL;
2029 dev->data->nb_tx_queues = 0;
2034 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2036 struct bond_dev_private *internals = eth_dev->data->dev_private;
2039 if (internals->mode == BONDING_MODE_8023AD) {
2043 bond_mode_8023ad_stop(eth_dev);
2045 /* Discard all messages to/from mode 4 state machines */
2046 for (i = 0; i < internals->active_slave_count; i++) {
2047 port = &mode_8023ad_ports[internals->active_slaves[i]];
2049 RTE_ASSERT(port->rx_ring != NULL);
2050 while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2051 rte_pktmbuf_free(pkt);
2053 RTE_ASSERT(port->tx_ring != NULL);
2054 while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2055 rte_pktmbuf_free(pkt);
2059 if (internals->mode == BONDING_MODE_TLB ||
2060 internals->mode == BONDING_MODE_ALB) {
2061 bond_tlb_disable(internals);
2062 for (i = 0; i < internals->active_slave_count; i++)
2063 tlb_last_obytets[internals->active_slaves[i]] = 0;
2066 eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2067 eth_dev->data->dev_started = 0;
2069 internals->link_status_polling_enabled = 0;
2070 for (i = 0; i < internals->slave_count; i++) {
2071 uint16_t slave_id = internals->slaves[i].port_id;
2072 if (find_slave_by_id(internals->active_slaves,
2073 internals->active_slave_count, slave_id) !=
2074 internals->active_slave_count) {
2075 internals->slaves[i].last_link_status = 0;
2076 rte_eth_dev_stop(slave_id);
2077 deactivate_slave(eth_dev, slave_id);
2083 bond_ethdev_close(struct rte_eth_dev *dev)
2085 struct bond_dev_private *internals = dev->data->dev_private;
2086 uint8_t bond_port_id = internals->port_id;
2089 RTE_LOG(INFO, EAL, "Closing bonded device %s\n", dev->device->name);
2090 while (internals->slave_count != skipped) {
2091 uint16_t port_id = internals->slaves[skipped].port_id;
2093 rte_eth_dev_stop(port_id);
2095 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2097 "Failed to remove port %d from bonded device "
2098 "%s\n", port_id, dev->device->name);
2102 bond_ethdev_free_queues(dev);
2103 rte_bitmap_reset(internals->vlan_filter_bmp);
2106 /* forward declaration */
2107 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2110 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2112 struct bond_dev_private *internals = dev->data->dev_private;
2114 uint16_t max_nb_rx_queues = UINT16_MAX;
2115 uint16_t max_nb_tx_queues = UINT16_MAX;
2117 dev_info->max_mac_addrs = 1;
2119 dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2120 internals->candidate_max_rx_pktlen :
2121 ETHER_MAX_JUMBO_FRAME_LEN;
2123 /* Max number of tx/rx queues that the bonded device can support is the
2124 * minimum values of the bonded slaves, as all slaves must be capable
2125 * of supporting the same number of tx/rx queues.
2127 if (internals->slave_count > 0) {
2128 struct rte_eth_dev_info slave_info;
2131 for (idx = 0; idx < internals->slave_count; idx++) {
2132 rte_eth_dev_info_get(internals->slaves[idx].port_id,
2135 if (slave_info.max_rx_queues < max_nb_rx_queues)
2136 max_nb_rx_queues = slave_info.max_rx_queues;
2138 if (slave_info.max_tx_queues < max_nb_tx_queues)
2139 max_nb_tx_queues = slave_info.max_tx_queues;
2143 dev_info->max_rx_queues = max_nb_rx_queues;
2144 dev_info->max_tx_queues = max_nb_tx_queues;
2147 * If dedicated hw queues enabled for link bonding device in LACP mode
2148 * then we need to reduce the maximum number of data path queues by 1.
2150 if (internals->mode == BONDING_MODE_8023AD &&
2151 internals->mode4.dedicated_queues.enabled == 1) {
2152 dev_info->max_rx_queues--;
2153 dev_info->max_tx_queues--;
2156 dev_info->min_rx_bufsize = 0;
2158 dev_info->rx_offload_capa = internals->rx_offload_capa;
2159 dev_info->tx_offload_capa = internals->tx_offload_capa;
2160 dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2162 dev_info->reta_size = internals->reta_size;
2166 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2170 struct bond_dev_private *internals = dev->data->dev_private;
2172 /* don't do this while a slave is being added */
2173 rte_spinlock_lock(&internals->lock);
2176 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2178 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2180 for (i = 0; i < internals->slave_count; i++) {
2181 uint16_t port_id = internals->slaves[i].port_id;
2183 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2185 RTE_LOG(WARNING, PMD,
2186 "Setting VLAN filter on slave port %u not supported.\n",
2190 rte_spinlock_unlock(&internals->lock);
2195 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2196 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2197 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2199 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2200 rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2201 0, dev->data->numa_node);
2202 if (bd_rx_q == NULL)
2205 bd_rx_q->queue_id = rx_queue_id;
2206 bd_rx_q->dev_private = dev->data->dev_private;
2208 bd_rx_q->nb_rx_desc = nb_rx_desc;
2210 memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2211 bd_rx_q->mb_pool = mb_pool;
2213 dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2219 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2220 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2221 const struct rte_eth_txconf *tx_conf)
2223 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)
2224 rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2225 0, dev->data->numa_node);
2227 if (bd_tx_q == NULL)
2230 bd_tx_q->queue_id = tx_queue_id;
2231 bd_tx_q->dev_private = dev->data->dev_private;
2233 bd_tx_q->nb_tx_desc = nb_tx_desc;
2234 memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2236 dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2242 bond_ethdev_rx_queue_release(void *queue)
2251 bond_ethdev_tx_queue_release(void *queue)
2260 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2262 struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2263 struct bond_dev_private *internals;
2265 /* Default value for polling slave found is true as we don't want to
2266 * disable the polling thread if we cannot get the lock */
2267 int i, polling_slave_found = 1;
2272 bonded_ethdev = (struct rte_eth_dev *)cb_arg;
2273 internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
2275 if (!bonded_ethdev->data->dev_started ||
2276 !internals->link_status_polling_enabled)
2279 /* If device is currently being configured then don't check slaves link
2280 * status, wait until next period */
2281 if (rte_spinlock_trylock(&internals->lock)) {
2282 if (internals->slave_count > 0)
2283 polling_slave_found = 0;
2285 for (i = 0; i < internals->slave_count; i++) {
2286 if (!internals->slaves[i].link_status_poll_enabled)
2289 slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2290 polling_slave_found = 1;
2292 /* Update slave link status */
2293 (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2294 internals->slaves[i].link_status_wait_to_complete);
2296 /* if link status has changed since last checked then call lsc
2298 if (slave_ethdev->data->dev_link.link_status !=
2299 internals->slaves[i].last_link_status) {
2300 internals->slaves[i].last_link_status =
2301 slave_ethdev->data->dev_link.link_status;
2303 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2304 RTE_ETH_EVENT_INTR_LSC,
2305 &bonded_ethdev->data->port_id,
2309 rte_spinlock_unlock(&internals->lock);
2312 if (polling_slave_found)
2313 /* Set alarm to continue monitoring link status of slave ethdev's */
2314 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2315 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2319 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2321 void (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2323 struct bond_dev_private *bond_ctx;
2324 struct rte_eth_link slave_link;
2328 bond_ctx = ethdev->data->dev_private;
2330 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2332 if (ethdev->data->dev_started == 0 ||
2333 bond_ctx->active_slave_count == 0) {
2334 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2338 ethdev->data->dev_link.link_status = ETH_LINK_UP;
2340 if (wait_to_complete)
2341 link_update = rte_eth_link_get;
2343 link_update = rte_eth_link_get_nowait;
2345 switch (bond_ctx->mode) {
2346 case BONDING_MODE_BROADCAST:
2348 * Setting link speed to UINT32_MAX to ensure we pick up the
2349 * value of the first active slave
2351 ethdev->data->dev_link.link_speed = UINT32_MAX;
2354 * link speed is minimum value of all the slaves link speed as
2355 * packet loss will occur on this slave if transmission at rates
2356 * greater than this are attempted
2358 for (idx = 1; idx < bond_ctx->active_slave_count; idx++) {
2359 link_update(bond_ctx->active_slaves[0], &slave_link);
2361 if (slave_link.link_speed <
2362 ethdev->data->dev_link.link_speed)
2363 ethdev->data->dev_link.link_speed =
2364 slave_link.link_speed;
2367 case BONDING_MODE_ACTIVE_BACKUP:
2368 /* Current primary slave */
2369 link_update(bond_ctx->current_primary_port, &slave_link);
2371 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2373 case BONDING_MODE_8023AD:
2374 ethdev->data->dev_link.link_autoneg =
2375 bond_ctx->mode4.slave_link.link_autoneg;
2376 ethdev->data->dev_link.link_duplex =
2377 bond_ctx->mode4.slave_link.link_duplex;
2378 /* fall through to update link speed */
2379 case BONDING_MODE_ROUND_ROBIN:
2380 case BONDING_MODE_BALANCE:
2381 case BONDING_MODE_TLB:
2382 case BONDING_MODE_ALB:
2385 * In theses mode the maximum theoretical link speed is the sum
2388 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2390 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2391 link_update(bond_ctx->active_slaves[idx], &slave_link);
2393 ethdev->data->dev_link.link_speed +=
2394 slave_link.link_speed;
2404 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2406 struct bond_dev_private *internals = dev->data->dev_private;
2407 struct rte_eth_stats slave_stats;
2410 for (i = 0; i < internals->slave_count; i++) {
2411 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2413 stats->ipackets += slave_stats.ipackets;
2414 stats->opackets += slave_stats.opackets;
2415 stats->ibytes += slave_stats.ibytes;
2416 stats->obytes += slave_stats.obytes;
2417 stats->imissed += slave_stats.imissed;
2418 stats->ierrors += slave_stats.ierrors;
2419 stats->oerrors += slave_stats.oerrors;
2420 stats->rx_nombuf += slave_stats.rx_nombuf;
2422 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2423 stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2424 stats->q_opackets[j] += slave_stats.q_opackets[j];
2425 stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2426 stats->q_obytes[j] += slave_stats.q_obytes[j];
2427 stats->q_errors[j] += slave_stats.q_errors[j];
2436 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2438 struct bond_dev_private *internals = dev->data->dev_private;
2441 for (i = 0; i < internals->slave_count; i++)
2442 rte_eth_stats_reset(internals->slaves[i].port_id);
2446 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2448 struct bond_dev_private *internals = eth_dev->data->dev_private;
2451 internals->promiscuous_en = 1;
2453 switch (internals->mode) {
2454 /* Promiscuous mode is propagated to all slaves */
2455 case BONDING_MODE_ROUND_ROBIN:
2456 case BONDING_MODE_BALANCE:
2457 case BONDING_MODE_BROADCAST:
2458 for (i = 0; i < internals->slave_count; i++)
2459 rte_eth_promiscuous_enable(internals->slaves[i].port_id);
2461 /* In mode4 promiscus mode is managed when slave is added/removed */
2462 case BONDING_MODE_8023AD:
2464 /* Promiscuous mode is propagated only to primary slave */
2465 case BONDING_MODE_ACTIVE_BACKUP:
2466 case BONDING_MODE_TLB:
2467 case BONDING_MODE_ALB:
2469 rte_eth_promiscuous_enable(internals->current_primary_port);
2474 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2476 struct bond_dev_private *internals = dev->data->dev_private;
2479 internals->promiscuous_en = 0;
2481 switch (internals->mode) {
2482 /* Promiscuous mode is propagated to all slaves */
2483 case BONDING_MODE_ROUND_ROBIN:
2484 case BONDING_MODE_BALANCE:
2485 case BONDING_MODE_BROADCAST:
2486 for (i = 0; i < internals->slave_count; i++)
2487 rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2489 /* In mode4 promiscus mode is set managed when slave is added/removed */
2490 case BONDING_MODE_8023AD:
2492 /* Promiscuous mode is propagated only to primary slave */
2493 case BONDING_MODE_ACTIVE_BACKUP:
2494 case BONDING_MODE_TLB:
2495 case BONDING_MODE_ALB:
2497 rte_eth_promiscuous_disable(internals->current_primary_port);
2502 bond_ethdev_delayed_lsc_propagation(void *arg)
2507 _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2508 RTE_ETH_EVENT_INTR_LSC, NULL, NULL);
2512 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2513 void *param, void *ret_param __rte_unused)
2515 struct rte_eth_dev *bonded_eth_dev;
2516 struct bond_dev_private *internals;
2517 struct rte_eth_link link;
2520 int i, valid_slave = 0;
2522 uint8_t lsc_flag = 0;
2524 if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2527 bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
2529 if (check_for_bonded_ethdev(bonded_eth_dev))
2532 internals = bonded_eth_dev->data->dev_private;
2534 /* If the device isn't started don't handle interrupts */
2535 if (!bonded_eth_dev->data->dev_started)
2538 /* verify that port_id is a valid slave of bonded port */
2539 for (i = 0; i < internals->slave_count; i++) {
2540 if (internals->slaves[i].port_id == port_id) {
2549 /* Synchronize lsc callback parallel calls either by real link event
2550 * from the slaves PMDs or by the bonding PMD itself.
2552 rte_spinlock_lock(&internals->lsc_lock);
2554 /* Search for port in active port list */
2555 active_pos = find_slave_by_id(internals->active_slaves,
2556 internals->active_slave_count, port_id);
2558 rte_eth_link_get_nowait(port_id, &link);
2559 if (link.link_status) {
2560 if (active_pos < internals->active_slave_count)
2563 /* if no active slave ports then set this port to be primary port */
2564 if (internals->active_slave_count < 1) {
2565 /* If first active slave, then change link status */
2566 bonded_eth_dev->data->dev_link.link_status = ETH_LINK_UP;
2567 internals->current_primary_port = port_id;
2570 mac_address_slaves_update(bonded_eth_dev);
2573 activate_slave(bonded_eth_dev, port_id);
2575 /* If user has defined the primary port then default to using it */
2576 if (internals->user_defined_primary_port &&
2577 internals->primary_port == port_id)
2578 bond_ethdev_primary_set(internals, port_id);
2580 if (active_pos == internals->active_slave_count)
2583 /* Remove from active slave list */
2584 deactivate_slave(bonded_eth_dev, port_id);
2586 if (internals->active_slave_count < 1)
2589 /* Update primary id, take first active slave from list or if none
2590 * available set to -1 */
2591 if (port_id == internals->current_primary_port) {
2592 if (internals->active_slave_count > 0)
2593 bond_ethdev_primary_set(internals,
2594 internals->active_slaves[0]);
2596 internals->current_primary_port = internals->primary_port;
2602 * Update bonded device link properties after any change to active
2605 bond_ethdev_link_update(bonded_eth_dev, 0);
2608 /* Cancel any possible outstanding interrupts if delays are enabled */
2609 if (internals->link_up_delay_ms > 0 ||
2610 internals->link_down_delay_ms > 0)
2611 rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2614 if (bonded_eth_dev->data->dev_link.link_status) {
2615 if (internals->link_up_delay_ms > 0)
2616 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2617 bond_ethdev_delayed_lsc_propagation,
2618 (void *)bonded_eth_dev);
2620 _rte_eth_dev_callback_process(bonded_eth_dev,
2621 RTE_ETH_EVENT_INTR_LSC,
2625 if (internals->link_down_delay_ms > 0)
2626 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2627 bond_ethdev_delayed_lsc_propagation,
2628 (void *)bonded_eth_dev);
2630 _rte_eth_dev_callback_process(bonded_eth_dev,
2631 RTE_ETH_EVENT_INTR_LSC,
2636 rte_spinlock_unlock(&internals->lsc_lock);
2642 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2643 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2647 int slave_reta_size;
2648 unsigned reta_count;
2649 struct bond_dev_private *internals = dev->data->dev_private;
2651 if (reta_size != internals->reta_size)
2654 /* Copy RETA table */
2655 reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2657 for (i = 0; i < reta_count; i++) {
2658 internals->reta_conf[i].mask = reta_conf[i].mask;
2659 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2660 if ((reta_conf[i].mask >> j) & 0x01)
2661 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2664 /* Fill rest of array */
2665 for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2666 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2667 sizeof(internals->reta_conf[0]) * reta_count);
2669 /* Propagate RETA over slaves */
2670 for (i = 0; i < internals->slave_count; i++) {
2671 slave_reta_size = internals->slaves[i].reta_size;
2672 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2673 &internals->reta_conf[0], slave_reta_size);
2682 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2683 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2686 struct bond_dev_private *internals = dev->data->dev_private;
2688 if (reta_size != internals->reta_size)
2691 /* Copy RETA table */
2692 for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2693 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2694 if ((reta_conf[i].mask >> j) & 0x01)
2695 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2701 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2702 struct rte_eth_rss_conf *rss_conf)
2705 struct bond_dev_private *internals = dev->data->dev_private;
2706 struct rte_eth_rss_conf bond_rss_conf;
2708 memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2710 bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2712 if (bond_rss_conf.rss_hf != 0)
2713 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2715 if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2716 sizeof(internals->rss_key)) {
2717 if (bond_rss_conf.rss_key_len == 0)
2718 bond_rss_conf.rss_key_len = 40;
2719 internals->rss_key_len = bond_rss_conf.rss_key_len;
2720 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2721 internals->rss_key_len);
2724 for (i = 0; i < internals->slave_count; i++) {
2725 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2735 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2736 struct rte_eth_rss_conf *rss_conf)
2738 struct bond_dev_private *internals = dev->data->dev_private;
2740 rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2741 rss_conf->rss_key_len = internals->rss_key_len;
2742 if (rss_conf->rss_key)
2743 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2748 const struct eth_dev_ops default_dev_ops = {
2749 .dev_start = bond_ethdev_start,
2750 .dev_stop = bond_ethdev_stop,
2751 .dev_close = bond_ethdev_close,
2752 .dev_configure = bond_ethdev_configure,
2753 .dev_infos_get = bond_ethdev_info,
2754 .vlan_filter_set = bond_ethdev_vlan_filter_set,
2755 .rx_queue_setup = bond_ethdev_rx_queue_setup,
2756 .tx_queue_setup = bond_ethdev_tx_queue_setup,
2757 .rx_queue_release = bond_ethdev_rx_queue_release,
2758 .tx_queue_release = bond_ethdev_tx_queue_release,
2759 .link_update = bond_ethdev_link_update,
2760 .stats_get = bond_ethdev_stats_get,
2761 .stats_reset = bond_ethdev_stats_reset,
2762 .promiscuous_enable = bond_ethdev_promiscuous_enable,
2763 .promiscuous_disable = bond_ethdev_promiscuous_disable,
2764 .reta_update = bond_ethdev_rss_reta_update,
2765 .reta_query = bond_ethdev_rss_reta_query,
2766 .rss_hash_update = bond_ethdev_rss_hash_update,
2767 .rss_hash_conf_get = bond_ethdev_rss_hash_conf_get
2771 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
2773 const char *name = rte_vdev_device_name(dev);
2774 uint8_t socket_id = dev->device.numa_node;
2775 struct bond_dev_private *internals = NULL;
2776 struct rte_eth_dev *eth_dev = NULL;
2777 uint32_t vlan_filter_bmp_size;
2779 /* now do all data allocation - for eth_dev structure, dummy pci driver
2780 * and internal (private) data
2783 /* reserve an ethdev entry */
2784 eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
2785 if (eth_dev == NULL) {
2786 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
2790 internals = eth_dev->data->dev_private;
2791 eth_dev->data->nb_rx_queues = (uint16_t)1;
2792 eth_dev->data->nb_tx_queues = (uint16_t)1;
2794 eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN, 0,
2796 if (eth_dev->data->mac_addrs == NULL) {
2797 RTE_BOND_LOG(ERR, "Unable to malloc mac_addrs");
2801 eth_dev->dev_ops = &default_dev_ops;
2802 eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC;
2804 rte_spinlock_init(&internals->lock);
2805 rte_spinlock_init(&internals->lsc_lock);
2807 internals->port_id = eth_dev->data->port_id;
2808 internals->mode = BONDING_MODE_INVALID;
2809 internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
2810 internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
2811 internals->xmit_hash = xmit_l2_hash;
2812 internals->user_defined_mac = 0;
2814 internals->link_status_polling_enabled = 0;
2816 internals->link_status_polling_interval_ms =
2817 DEFAULT_POLLING_INTERVAL_10_MS;
2818 internals->link_down_delay_ms = 0;
2819 internals->link_up_delay_ms = 0;
2821 internals->slave_count = 0;
2822 internals->active_slave_count = 0;
2823 internals->rx_offload_capa = 0;
2824 internals->tx_offload_capa = 0;
2825 internals->candidate_max_rx_pktlen = 0;
2826 internals->max_rx_pktlen = 0;
2828 /* Initially allow to choose any offload type */
2829 internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
2831 memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
2832 memset(internals->slaves, 0, sizeof(internals->slaves));
2834 /* Set mode 4 default configuration */
2835 bond_mode_8023ad_setup(eth_dev, NULL);
2836 if (bond_ethdev_mode_set(eth_dev, mode)) {
2837 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode too %d",
2838 eth_dev->data->port_id, mode);
2842 vlan_filter_bmp_size =
2843 rte_bitmap_get_memory_footprint(ETHER_MAX_VLAN_ID + 1);
2844 internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
2845 RTE_CACHE_LINE_SIZE);
2846 if (internals->vlan_filter_bmpmem == NULL) {
2848 "Failed to allocate vlan bitmap for bonded device %u\n",
2849 eth_dev->data->port_id);
2853 internals->vlan_filter_bmp = rte_bitmap_init(ETHER_MAX_VLAN_ID + 1,
2854 internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
2855 if (internals->vlan_filter_bmp == NULL) {
2857 "Failed to init vlan bitmap for bonded device %u\n",
2858 eth_dev->data->port_id);
2859 rte_free(internals->vlan_filter_bmpmem);
2863 return eth_dev->data->port_id;
2866 rte_free(internals);
2867 if (eth_dev != NULL) {
2868 rte_free(eth_dev->data->mac_addrs);
2869 rte_eth_dev_release_port(eth_dev);
2875 bond_probe(struct rte_vdev_device *dev)
2878 struct bond_dev_private *internals;
2879 struct rte_kvargs *kvlist;
2880 uint8_t bonding_mode, socket_id/*, agg_mode*/;
2881 int arg_count, port_id;
2887 name = rte_vdev_device_name(dev);
2888 RTE_LOG(INFO, EAL, "Initializing pmd_bond for %s\n", name);
2890 kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
2891 pmd_bond_init_valid_arguments);
2895 /* Parse link bonding mode */
2896 if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
2897 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
2898 &bond_ethdev_parse_slave_mode_kvarg,
2899 &bonding_mode) != 0) {
2900 RTE_LOG(ERR, EAL, "Invalid mode for bonded device %s\n",
2905 RTE_LOG(ERR, EAL, "Mode must be specified only once for bonded "
2906 "device %s\n", name);
2910 /* Parse socket id to create bonding device on */
2911 arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
2912 if (arg_count == 1) {
2913 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
2914 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
2916 RTE_LOG(ERR, EAL, "Invalid socket Id specified for "
2917 "bonded device %s\n", name);
2920 } else if (arg_count > 1) {
2921 RTE_LOG(ERR, EAL, "Socket Id can be specified only once for "
2922 "bonded device %s\n", name);
2925 socket_id = rte_socket_id();
2928 dev->device.numa_node = socket_id;
2930 /* Create link bonding eth device */
2931 port_id = bond_alloc(dev, bonding_mode);
2933 RTE_LOG(ERR, EAL, "Failed to create socket %s in mode %u on "
2934 "socket %u.\n", name, bonding_mode, socket_id);
2937 internals = rte_eth_devices[port_id].data->dev_private;
2938 internals->kvlist = kvlist;
2941 if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
2942 if (rte_kvargs_process(kvlist,
2943 PMD_BOND_AGG_MODE_KVARG,
2944 &bond_ethdev_parse_slave_agg_mode_kvarg,
2947 "Failed to parse agg selection mode for bonded device %s\n",
2952 if (internals->mode == BONDING_MODE_8023AD) {
2953 int ret = rte_eth_bond_8023ad_agg_selection_set(port_id,
2957 "Invalid args for agg selection set "
2958 "for bonded device %s", name);
2963 rte_eth_bond_8023ad_agg_selection_set(port_id, AGG_STABLE);
2966 RTE_LOG(INFO, EAL, "Create bonded device %s on port %d in mode %u on "
2967 "socket %u.\n", name, port_id, bonding_mode, socket_id);
2971 rte_kvargs_free(kvlist);
2977 bond_remove(struct rte_vdev_device *dev)
2979 struct rte_eth_dev *eth_dev;
2980 struct bond_dev_private *internals;
2986 name = rte_vdev_device_name(dev);
2987 RTE_LOG(INFO, EAL, "Uninitializing pmd_bond for %s\n", name);
2989 /* now free all data allocation - for eth_dev structure,
2990 * dummy pci driver and internal (private) data
2993 /* find an ethdev entry */
2994 eth_dev = rte_eth_dev_allocated(name);
2995 if (eth_dev == NULL)
2998 RTE_ASSERT(eth_dev->device == &dev->device);
3000 internals = eth_dev->data->dev_private;
3001 if (internals->slave_count != 0)
3004 if (eth_dev->data->dev_started == 1) {
3005 bond_ethdev_stop(eth_dev);
3006 bond_ethdev_close(eth_dev);
3009 eth_dev->dev_ops = NULL;
3010 eth_dev->rx_pkt_burst = NULL;
3011 eth_dev->tx_pkt_burst = NULL;
3013 internals = eth_dev->data->dev_private;
3014 /* Try to release mempool used in mode6. If the bond
3015 * device is not mode6, free the NULL is not problem.
3017 rte_mempool_free(internals->mode6.mempool);
3018 rte_bitmap_free(internals->vlan_filter_bmp);
3019 rte_free(internals->vlan_filter_bmpmem);
3020 rte_free(eth_dev->data->dev_private);
3021 rte_free(eth_dev->data->mac_addrs);
3023 rte_eth_dev_release_port(eth_dev);
3028 /* this part will resolve the slave portids after all the other pdev and vdev
3029 * have been allocated */
3031 bond_ethdev_configure(struct rte_eth_dev *dev)
3033 const char *name = dev->device->name;
3034 struct bond_dev_private *internals = dev->data->dev_private;
3035 struct rte_kvargs *kvlist = internals->kvlist;
3037 uint16_t port_id = dev - rte_eth_devices;
3040 static const uint8_t default_rss_key[40] = {
3041 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3042 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3043 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3044 0xBE, 0xAC, 0x01, 0xFA
3050 * If RSS is enabled, fill table with default values and
3051 * set key to the the value specified in port RSS configuration.
3052 * Fall back to default RSS key if the key is not specified
3054 if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
3055 if (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key != NULL) {
3056 internals->rss_key_len =
3057 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
3058 memcpy(internals->rss_key,
3059 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key,
3060 internals->rss_key_len);
3062 internals->rss_key_len = sizeof(default_rss_key);
3063 memcpy(internals->rss_key, default_rss_key,
3064 internals->rss_key_len);
3067 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3068 internals->reta_conf[i].mask = ~0LL;
3069 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3070 internals->reta_conf[i].reta[j] =
3071 (i * RTE_RETA_GROUP_SIZE + j) %
3072 dev->data->nb_rx_queues;
3076 /* set the max_rx_pktlen */
3077 internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3080 * if no kvlist, it means that this bonded device has been created
3081 * through the bonding api.
3086 /* Parse MAC address for bonded device */
3087 arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3088 if (arg_count == 1) {
3089 struct ether_addr bond_mac;
3091 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3092 &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3093 RTE_LOG(INFO, EAL, "Invalid mac address for bonded device %s\n",
3098 /* Set MAC address */
3099 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3101 "Failed to set mac address on bonded device %s\n",
3105 } else if (arg_count > 1) {
3107 "MAC address can be specified only once for bonded device %s\n",
3112 /* Parse/set balance mode transmit policy */
3113 arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3114 if (arg_count == 1) {
3115 uint8_t xmit_policy;
3117 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3118 &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3121 "Invalid xmit policy specified for bonded device %s\n",
3126 /* Set balance mode transmit policy*/
3127 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3129 "Failed to set balance xmit policy on bonded device %s\n",
3133 } else if (arg_count > 1) {
3135 "Transmit policy can be specified only once for bonded device"
3140 if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3141 if (rte_kvargs_process(kvlist,
3142 PMD_BOND_AGG_MODE_KVARG,
3143 &bond_ethdev_parse_slave_agg_mode_kvarg,
3146 "Failed to parse agg selection mode for bonded device %s\n",
3149 if (internals->mode == BONDING_MODE_8023AD)
3150 rte_eth_bond_8023ad_agg_selection_set(port_id,
3154 /* Parse/add slave ports to bonded device */
3155 if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3156 struct bond_ethdev_slave_ports slave_ports;
3159 memset(&slave_ports, 0, sizeof(slave_ports));
3161 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3162 &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3164 "Failed to parse slave ports for bonded device %s\n",
3169 for (i = 0; i < slave_ports.slave_count; i++) {
3170 if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3172 "Failed to add port %d as slave to bonded device %s\n",
3173 slave_ports.slaves[i], name);
3178 RTE_LOG(INFO, EAL, "No slaves specified for bonded device %s\n", name);
3182 /* Parse/set primary slave port id*/
3183 arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3184 if (arg_count == 1) {
3185 uint16_t primary_slave_port_id;
3187 if (rte_kvargs_process(kvlist,
3188 PMD_BOND_PRIMARY_SLAVE_KVARG,
3189 &bond_ethdev_parse_primary_slave_port_id_kvarg,
3190 &primary_slave_port_id) < 0) {
3192 "Invalid primary slave port id specified for bonded device"
3197 /* Set balance mode transmit policy*/
3198 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3201 "Failed to set primary slave port %d on bonded device %s\n",
3202 primary_slave_port_id, name);
3205 } else if (arg_count > 1) {
3207 "Primary slave can be specified only once for bonded device"
3212 /* Parse link status monitor polling interval */
3213 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3214 if (arg_count == 1) {
3215 uint32_t lsc_poll_interval_ms;
3217 if (rte_kvargs_process(kvlist,
3218 PMD_BOND_LSC_POLL_PERIOD_KVARG,
3219 &bond_ethdev_parse_time_ms_kvarg,
3220 &lsc_poll_interval_ms) < 0) {
3222 "Invalid lsc polling interval value specified for bonded"
3223 " device %s\n", name);
3227 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3230 "Failed to set lsc monitor polling interval (%u ms) on"
3231 " bonded device %s\n", lsc_poll_interval_ms, name);
3234 } else if (arg_count > 1) {
3236 "LSC polling interval can be specified only once for bonded"
3237 " device %s\n", name);
3241 /* Parse link up interrupt propagation delay */
3242 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3243 if (arg_count == 1) {
3244 uint32_t link_up_delay_ms;
3246 if (rte_kvargs_process(kvlist,
3247 PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3248 &bond_ethdev_parse_time_ms_kvarg,
3249 &link_up_delay_ms) < 0) {
3251 "Invalid link up propagation delay value specified for"
3252 " bonded device %s\n", name);
3256 /* Set balance mode transmit policy*/
3257 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3260 "Failed to set link up propagation delay (%u ms) on bonded"
3261 " device %s\n", link_up_delay_ms, name);
3264 } else if (arg_count > 1) {
3266 "Link up propagation delay can be specified only once for"
3267 " bonded device %s\n", name);
3271 /* Parse link down interrupt propagation delay */
3272 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3273 if (arg_count == 1) {
3274 uint32_t link_down_delay_ms;
3276 if (rte_kvargs_process(kvlist,
3277 PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3278 &bond_ethdev_parse_time_ms_kvarg,
3279 &link_down_delay_ms) < 0) {
3281 "Invalid link down propagation delay value specified for"
3282 " bonded device %s\n", name);
3286 /* Set balance mode transmit policy*/
3287 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3290 "Failed to set link down propagation delay (%u ms) on"
3291 " bonded device %s\n", link_down_delay_ms, name);
3294 } else if (arg_count > 1) {
3296 "Link down propagation delay can be specified only once for"
3297 " bonded device %s\n", name);
3304 struct rte_vdev_driver pmd_bond_drv = {
3305 .probe = bond_probe,
3306 .remove = bond_remove,
3309 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3310 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3312 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3316 "xmit_policy=[l2 | l23 | l34] "
3317 "agg_mode=[count | stable | bandwidth] "
3320 "lsc_poll_period_ms=<int> "
3322 "down_delay=<int>");