4 * Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <netinet/in.h>
37 #include <rte_malloc.h>
38 #include <rte_ethdev.h>
39 #include <rte_ethdev_vdev.h>
43 #include <rte_ip_frag.h>
44 #include <rte_devargs.h>
45 #include <rte_kvargs.h>
46 #include <rte_bus_vdev.h>
47 #include <rte_alarm.h>
48 #include <rte_cycles.h>
50 #include "rte_eth_bond.h"
51 #include "rte_eth_bond_private.h"
52 #include "rte_eth_bond_8023ad_private.h"
54 #define REORDER_PERIOD_MS 10
55 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
57 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
59 /* Table for statistics in mode 5 TLB */
60 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
63 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
65 size_t vlan_offset = 0;
67 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
68 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
70 vlan_offset = sizeof(struct vlan_hdr);
71 *proto = vlan_hdr->eth_proto;
73 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
74 vlan_hdr = vlan_hdr + 1;
75 *proto = vlan_hdr->eth_proto;
76 vlan_offset += sizeof(struct vlan_hdr);
83 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
85 struct bond_dev_private *internals;
87 uint16_t num_rx_slave = 0;
88 uint16_t num_rx_total = 0;
92 /* Cast to structure, containing bonded device's port id and queue id */
93 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
95 internals = bd_rx_q->dev_private;
98 for (i = 0; i < internals->active_slave_count && nb_pkts; i++) {
99 /* Offset of pointer to *bufs increases as packets are received
100 * from other slaves */
101 num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
102 bd_rx_q->queue_id, bufs + num_rx_total, nb_pkts);
104 num_rx_total += num_rx_slave;
105 nb_pkts -= num_rx_slave;
113 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
116 struct bond_dev_private *internals;
118 /* Cast to structure, containing bonded device's port id and queue id */
119 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
121 internals = bd_rx_q->dev_private;
123 return rte_eth_rx_burst(internals->current_primary_port,
124 bd_rx_q->queue_id, bufs, nb_pkts);
127 static inline uint8_t
128 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
130 const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
132 return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
133 (ethertype == ether_type_slow_be &&
134 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
137 /*****************************************************************************
138 * Flow director's setup for mode 4 optimization
141 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
142 .dst.addr_bytes = { 0 },
143 .src.addr_bytes = { 0 },
144 .type = RTE_BE16(ETHER_TYPE_SLOW),
147 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
148 .dst.addr_bytes = { 0 },
149 .src.addr_bytes = { 0 },
153 static struct rte_flow_item flow_item_8023ad[] = {
155 .type = RTE_FLOW_ITEM_TYPE_ETH,
156 .spec = &flow_item_eth_type_8023ad,
158 .mask = &flow_item_eth_mask_type_8023ad,
161 .type = RTE_FLOW_ITEM_TYPE_END,
168 const struct rte_flow_attr flow_attr_8023ad = {
177 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
178 uint16_t slave_port) {
179 struct rte_eth_dev_info slave_info;
180 struct rte_flow_error error;
181 struct bond_dev_private *internals = (struct bond_dev_private *)
182 (bond_dev->data->dev_private);
184 const struct rte_flow_action_queue lacp_queue_conf = {
188 const struct rte_flow_action actions[] = {
190 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
191 .conf = &lacp_queue_conf
194 .type = RTE_FLOW_ACTION_TYPE_END,
198 int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
199 flow_item_8023ad, actions, &error);
201 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
202 __func__, error.message, slave_port,
203 internals->mode4.dedicated_queues.rx_qid);
207 rte_eth_dev_info_get(slave_port, &slave_info);
208 if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
209 slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
211 "%s: Slave %d capabilities doesn't allow to allocate additional queues",
212 __func__, slave_port);
220 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
221 struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
222 struct bond_dev_private *internals = (struct bond_dev_private *)
223 (bond_dev->data->dev_private);
224 struct rte_eth_dev_info bond_info;
227 /* Verify if all slaves in bonding supports flow director and */
228 if (internals->slave_count > 0) {
229 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
231 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
232 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
234 for (idx = 0; idx < internals->slave_count; idx++) {
235 if (bond_ethdev_8023ad_flow_verify(bond_dev,
236 internals->slaves[idx].port_id) != 0)
245 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
247 struct rte_flow_error error;
248 struct bond_dev_private *internals = (struct bond_dev_private *)
249 (bond_dev->data->dev_private);
251 struct rte_flow_action_queue lacp_queue_conf = {
252 .index = internals->mode4.dedicated_queues.rx_qid,
255 const struct rte_flow_action actions[] = {
257 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
258 .conf = &lacp_queue_conf
261 .type = RTE_FLOW_ACTION_TYPE_END,
265 internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
266 &flow_attr_8023ad, flow_item_8023ad, actions, &error);
267 if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
268 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
269 "(slave_port=%d queue_id=%d)",
270 error.message, slave_port,
271 internals->mode4.dedicated_queues.rx_qid);
279 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
282 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
283 struct bond_dev_private *internals = bd_rx_q->dev_private;
284 uint16_t num_rx_total = 0; /* Total number of received packets */
285 uint16_t slaves[RTE_MAX_ETHPORTS];
286 uint16_t slave_count;
290 /* Copy slave list to protect against slave up/down changes during tx
292 slave_count = internals->active_slave_count;
293 memcpy(slaves, internals->active_slaves,
294 sizeof(internals->active_slaves[0]) * slave_count);
296 for (i = 0, idx = internals->active_slave;
297 i < slave_count && num_rx_total < nb_pkts; i++, idx++) {
298 idx = idx % slave_count;
300 /* Read packets from this slave */
301 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
302 &bufs[num_rx_total], nb_pkts - num_rx_total);
305 internals->active_slave = idx;
311 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
314 struct bond_dev_private *internals;
315 struct bond_tx_queue *bd_tx_q;
317 uint16_t num_of_slaves;
318 uint16_t slaves[RTE_MAX_ETHPORTS];
319 /* positions in slaves, not ID */
320 uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
321 uint8_t distributing_count;
323 uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
324 uint16_t i, op_slave_idx;
326 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
328 /* Total amount of packets in slave_bufs */
329 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
330 /* Slow packets placed in each slave */
332 if (unlikely(nb_pkts == 0))
335 bd_tx_q = (struct bond_tx_queue *)queue;
336 internals = bd_tx_q->dev_private;
338 /* Copy slave list to protect against slave up/down changes during tx
340 num_of_slaves = internals->active_slave_count;
341 if (num_of_slaves < 1)
344 memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) *
347 distributing_count = 0;
348 for (i = 0; i < num_of_slaves; i++) {
349 struct port *port = &mode_8023ad_ports[slaves[i]];
350 if (ACTOR_STATE(port, DISTRIBUTING))
351 distributing_offsets[distributing_count++] = i;
354 if (likely(distributing_count > 0)) {
355 /* Populate slaves mbuf with the packets which are to be sent */
356 for (i = 0; i < nb_pkts; i++) {
357 /* Select output slave using hash based on xmit policy */
358 op_slave_idx = internals->xmit_hash(bufs[i],
361 /* Populate slave mbuf arrays with mbufs for that slave.
362 * Use only slaves that are currently distributing.
364 uint8_t slave_offset =
365 distributing_offsets[op_slave_idx];
366 slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] =
368 slave_nb_pkts[slave_offset]++;
372 /* Send packet burst on each slave device */
373 for (i = 0; i < num_of_slaves; i++) {
374 if (slave_nb_pkts[i] == 0)
377 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
378 slave_bufs[i], slave_nb_pkts[i]);
380 num_tx_total += num_tx_slave;
381 num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
383 /* If tx burst fails move packets to end of bufs */
384 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
385 uint16_t j = nb_pkts - num_tx_fail_total;
386 for ( ; num_tx_slave < slave_nb_pkts[i]; j++,
388 bufs[j] = slave_bufs[i][num_tx_slave];
397 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
400 /* Cast to structure, containing bonded device's port id and queue id */
401 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
402 struct bond_dev_private *internals = bd_rx_q->dev_private;
403 struct ether_addr bond_mac;
405 struct ether_hdr *hdr;
407 const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
408 uint16_t num_rx_total = 0; /* Total number of received packets */
409 uint16_t slaves[RTE_MAX_ETHPORTS];
410 uint16_t slave_count, idx;
412 uint8_t collecting; /* current slave collecting status */
413 const uint8_t promisc = internals->promiscuous_en;
417 rte_eth_macaddr_get(internals->port_id, &bond_mac);
418 /* Copy slave list to protect against slave up/down changes during tx
420 slave_count = internals->active_slave_count;
421 memcpy(slaves, internals->active_slaves,
422 sizeof(internals->active_slaves[0]) * slave_count);
424 idx = internals->active_slave;
425 if (idx >= slave_count) {
426 internals->active_slave = 0;
429 for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
431 collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[idx]],
434 /* Read packets from this slave */
435 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
436 &bufs[num_rx_total], nb_pkts - num_rx_total);
438 for (k = j; k < 2 && k < num_rx_total; k++)
439 rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
441 /* Handle slow protocol packets. */
442 while (j < num_rx_total) {
444 /* If packet is not pure L2 and is known, skip it */
445 if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) {
450 if (j + 3 < num_rx_total)
451 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
453 hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
454 subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
456 /* Remove packet from array if it is slow packet or slave is not
457 * in collecting state or bonding interface is not in promiscuous
458 * mode and packet address does not match. */
459 if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]) ||
460 !collecting || (!promisc &&
461 !is_multicast_ether_addr(&hdr->d_addr) &&
462 !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
464 if (hdr->ether_type == ether_type_slow_be) {
465 bond_mode_8023ad_handle_slow_pkt(
466 internals, slaves[idx], bufs[j]);
468 rte_pktmbuf_free(bufs[j]);
470 /* Packet is managed by mode 4 or dropped, shift the array */
472 if (j < num_rx_total) {
473 memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
479 if (unlikely(++idx == slave_count))
483 internals->active_slave = idx;
487 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
488 uint32_t burstnumberRX;
489 uint32_t burstnumberTX;
491 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
494 arp_op_name(uint16_t arp_op, char *buf)
498 snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
501 snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
503 case ARP_OP_REVREQUEST:
504 snprintf(buf, sizeof("Reverse ARP Request"), "%s",
505 "Reverse ARP Request");
507 case ARP_OP_REVREPLY:
508 snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
509 "Reverse ARP Reply");
511 case ARP_OP_INVREQUEST:
512 snprintf(buf, sizeof("Peer Identify Request"), "%s",
513 "Peer Identify Request");
515 case ARP_OP_INVREPLY:
516 snprintf(buf, sizeof("Peer Identify Reply"), "%s",
517 "Peer Identify Reply");
522 snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
526 #define MaxIPv4String 16
528 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
532 ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
533 snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
534 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
538 #define MAX_CLIENTS_NUMBER 128
539 uint8_t active_clients;
540 struct client_stats_t {
543 uint32_t ipv4_rx_packets;
544 uint32_t ipv4_tx_packets;
546 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
549 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
553 for (; i < MAX_CLIENTS_NUMBER; i++) {
554 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port)) {
555 /* Just update RX packets number for this client */
556 if (TXorRXindicator == &burstnumberRX)
557 client_stats[i].ipv4_rx_packets++;
559 client_stats[i].ipv4_tx_packets++;
563 /* We have a new client. Insert him to the table, and increment stats */
564 if (TXorRXindicator == &burstnumberRX)
565 client_stats[active_clients].ipv4_rx_packets++;
567 client_stats[active_clients].ipv4_tx_packets++;
568 client_stats[active_clients].ipv4_addr = addr;
569 client_stats[active_clients].port = port;
574 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
575 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
576 RTE_LOG(DEBUG, PMD, \
579 "SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
581 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
587 eth_h->s_addr.addr_bytes[0], \
588 eth_h->s_addr.addr_bytes[1], \
589 eth_h->s_addr.addr_bytes[2], \
590 eth_h->s_addr.addr_bytes[3], \
591 eth_h->s_addr.addr_bytes[4], \
592 eth_h->s_addr.addr_bytes[5], \
594 eth_h->d_addr.addr_bytes[0], \
595 eth_h->d_addr.addr_bytes[1], \
596 eth_h->d_addr.addr_bytes[2], \
597 eth_h->d_addr.addr_bytes[3], \
598 eth_h->d_addr.addr_bytes[4], \
599 eth_h->d_addr.addr_bytes[5], \
606 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
607 uint16_t port, uint32_t __attribute__((unused)) *burstnumber)
609 struct ipv4_hdr *ipv4_h;
610 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
611 struct arp_hdr *arp_h;
618 uint16_t ether_type = eth_h->ether_type;
619 uint16_t offset = get_vlan_offset(eth_h, ðer_type);
621 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
622 snprintf(buf, 16, "%s", info);
625 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
626 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
627 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
628 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
629 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
630 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
632 update_client_stats(ipv4_h->src_addr, port, burstnumber);
634 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
635 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
636 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
637 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
638 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
639 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
640 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
647 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
649 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
650 struct bond_dev_private *internals = bd_tx_q->dev_private;
651 struct ether_hdr *eth_h;
652 uint16_t ether_type, offset;
653 uint16_t nb_recv_pkts;
656 nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
658 for (i = 0; i < nb_recv_pkts; i++) {
659 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
660 ether_type = eth_h->ether_type;
661 offset = get_vlan_offset(eth_h, ðer_type);
663 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
664 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
665 mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
667 bond_mode_alb_arp_recv(eth_h, offset, internals);
669 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
670 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
671 mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
679 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
682 struct bond_dev_private *internals;
683 struct bond_tx_queue *bd_tx_q;
685 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
686 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
688 uint16_t num_of_slaves;
689 uint16_t slaves[RTE_MAX_ETHPORTS];
691 uint16_t num_tx_total = 0, num_tx_slave;
693 static int slave_idx = 0;
694 int i, cslave_idx = 0, tx_fail_total = 0;
696 bd_tx_q = (struct bond_tx_queue *)queue;
697 internals = bd_tx_q->dev_private;
699 /* Copy slave list to protect against slave up/down changes during tx
701 num_of_slaves = internals->active_slave_count;
702 memcpy(slaves, internals->active_slaves,
703 sizeof(internals->active_slaves[0]) * num_of_slaves);
705 if (num_of_slaves < 1)
708 /* Populate slaves mbuf with which packets are to be sent on it */
709 for (i = 0; i < nb_pkts; i++) {
710 cslave_idx = (slave_idx + i) % num_of_slaves;
711 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
714 /* increment current slave index so the next call to tx burst starts on the
716 slave_idx = ++cslave_idx;
718 /* Send packet burst on each slave device */
719 for (i = 0; i < num_of_slaves; i++) {
720 if (slave_nb_pkts[i] > 0) {
721 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
722 slave_bufs[i], slave_nb_pkts[i]);
724 /* if tx burst fails move packets to end of bufs */
725 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
726 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
728 tx_fail_total += tx_fail_slave;
730 memcpy(&bufs[nb_pkts - tx_fail_total],
731 &slave_bufs[i][num_tx_slave],
732 tx_fail_slave * sizeof(bufs[0]));
734 num_tx_total += num_tx_slave;
742 bond_ethdev_tx_burst_active_backup(void *queue,
743 struct rte_mbuf **bufs, uint16_t nb_pkts)
745 struct bond_dev_private *internals;
746 struct bond_tx_queue *bd_tx_q;
748 bd_tx_q = (struct bond_tx_queue *)queue;
749 internals = bd_tx_q->dev_private;
751 if (internals->active_slave_count < 1)
754 return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
758 static inline uint16_t
759 ether_hash(struct ether_hdr *eth_hdr)
761 unaligned_uint16_t *word_src_addr =
762 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
763 unaligned_uint16_t *word_dst_addr =
764 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
766 return (word_src_addr[0] ^ word_dst_addr[0]) ^
767 (word_src_addr[1] ^ word_dst_addr[1]) ^
768 (word_src_addr[2] ^ word_dst_addr[2]);
771 static inline uint32_t
772 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
774 return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
777 static inline uint32_t
778 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
780 unaligned_uint32_t *word_src_addr =
781 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
782 unaligned_uint32_t *word_dst_addr =
783 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
785 return (word_src_addr[0] ^ word_dst_addr[0]) ^
786 (word_src_addr[1] ^ word_dst_addr[1]) ^
787 (word_src_addr[2] ^ word_dst_addr[2]) ^
788 (word_src_addr[3] ^ word_dst_addr[3]);
792 xmit_l2_hash(const struct rte_mbuf *buf, uint8_t slave_count)
794 struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
796 uint32_t hash = ether_hash(eth_hdr);
798 return (hash ^= hash >> 8) % slave_count;
802 xmit_l23_hash(const struct rte_mbuf *buf, uint8_t slave_count)
804 struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
805 uint16_t proto = eth_hdr->ether_type;
806 size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
807 uint32_t hash, l3hash = 0;
809 hash = ether_hash(eth_hdr);
811 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
812 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
813 ((char *)(eth_hdr + 1) + vlan_offset);
814 l3hash = ipv4_hash(ipv4_hdr);
816 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
817 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
818 ((char *)(eth_hdr + 1) + vlan_offset);
819 l3hash = ipv6_hash(ipv6_hdr);
822 hash = hash ^ l3hash;
826 return hash % slave_count;
830 xmit_l34_hash(const struct rte_mbuf *buf, uint8_t slave_count)
832 struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
833 uint16_t proto = eth_hdr->ether_type;
834 size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
836 struct udp_hdr *udp_hdr = NULL;
837 struct tcp_hdr *tcp_hdr = NULL;
838 uint32_t hash, l3hash = 0, l4hash = 0;
840 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
841 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
842 ((char *)(eth_hdr + 1) + vlan_offset);
843 size_t ip_hdr_offset;
845 l3hash = ipv4_hash(ipv4_hdr);
847 /* there is no L4 header in fragmented packet */
848 if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr) == 0)) {
849 ip_hdr_offset = (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) *
852 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
853 tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr +
855 l4hash = HASH_L4_PORTS(tcp_hdr);
856 } else if (ipv4_hdr->next_proto_id == IPPROTO_UDP) {
857 udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr +
859 l4hash = HASH_L4_PORTS(udp_hdr);
862 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
863 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
864 ((char *)(eth_hdr + 1) + vlan_offset);
865 l3hash = ipv6_hash(ipv6_hdr);
867 if (ipv6_hdr->proto == IPPROTO_TCP) {
868 tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
869 l4hash = HASH_L4_PORTS(tcp_hdr);
870 } else if (ipv6_hdr->proto == IPPROTO_UDP) {
871 udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
872 l4hash = HASH_L4_PORTS(udp_hdr);
876 hash = l3hash ^ l4hash;
880 return hash % slave_count;
884 uint64_t bwg_left_int;
885 uint64_t bwg_left_remainder;
890 bond_tlb_activate_slave(struct bond_dev_private *internals) {
893 for (i = 0; i < internals->active_slave_count; i++) {
894 tlb_last_obytets[internals->active_slaves[i]] = 0;
899 bandwidth_cmp(const void *a, const void *b)
901 const struct bwg_slave *bwg_a = a;
902 const struct bwg_slave *bwg_b = b;
903 int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
904 int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
905 (int64_t)bwg_a->bwg_left_remainder;
919 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
920 struct bwg_slave *bwg_slave)
922 struct rte_eth_link link_status;
924 rte_eth_link_get_nowait(port_id, &link_status);
925 uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
928 link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
929 bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
930 bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
934 bond_ethdev_update_tlb_slave_cb(void *arg)
936 struct bond_dev_private *internals = arg;
937 struct rte_eth_stats slave_stats;
938 struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
942 uint8_t update_stats = 0;
945 internals->slave_update_idx++;
948 if (internals->slave_update_idx >= REORDER_PERIOD_MS)
951 for (i = 0; i < internals->active_slave_count; i++) {
952 slave_id = internals->active_slaves[i];
953 rte_eth_stats_get(slave_id, &slave_stats);
954 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
955 bandwidth_left(slave_id, tx_bytes,
956 internals->slave_update_idx, &bwg_array[i]);
957 bwg_array[i].slave = slave_id;
960 tlb_last_obytets[slave_id] = slave_stats.obytes;
964 if (update_stats == 1)
965 internals->slave_update_idx = 0;
968 qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
969 for (i = 0; i < slave_count; i++)
970 internals->tlb_slaves_order[i] = bwg_array[i].slave;
972 rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
973 (struct bond_dev_private *)internals);
977 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
979 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
980 struct bond_dev_private *internals = bd_tx_q->dev_private;
982 struct rte_eth_dev *primary_port =
983 &rte_eth_devices[internals->primary_port];
984 uint16_t num_tx_total = 0;
987 uint16_t num_of_slaves = internals->active_slave_count;
988 uint16_t slaves[RTE_MAX_ETHPORTS];
990 struct ether_hdr *ether_hdr;
991 struct ether_addr primary_slave_addr;
992 struct ether_addr active_slave_addr;
994 if (num_of_slaves < 1)
997 memcpy(slaves, internals->tlb_slaves_order,
998 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
1001 ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
1004 for (i = 0; i < 3; i++)
1005 rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
1008 for (i = 0; i < num_of_slaves; i++) {
1009 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
1010 for (j = num_tx_total; j < nb_pkts; j++) {
1011 if (j + 3 < nb_pkts)
1012 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
1014 ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
1015 if (is_same_ether_addr(ðer_hdr->s_addr, &primary_slave_addr))
1016 ether_addr_copy(&active_slave_addr, ðer_hdr->s_addr);
1017 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1018 mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
1022 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1023 bufs + num_tx_total, nb_pkts - num_tx_total);
1025 if (num_tx_total == nb_pkts)
1029 return num_tx_total;
1033 bond_tlb_disable(struct bond_dev_private *internals)
1035 rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
1039 bond_tlb_enable(struct bond_dev_private *internals)
1041 bond_ethdev_update_tlb_slave_cb(internals);
1045 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1047 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1048 struct bond_dev_private *internals = bd_tx_q->dev_private;
1050 struct ether_hdr *eth_h;
1051 uint16_t ether_type, offset;
1053 struct client_data *client_info;
1056 * We create transmit buffers for every slave and one additional to send
1057 * through tlb. In worst case every packet will be send on one port.
1059 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
1060 uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
1063 * We create separate transmit buffers for update packets as they won't
1064 * be counted in num_tx_total.
1066 struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1067 uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1069 struct rte_mbuf *upd_pkt;
1072 uint16_t num_send, num_not_send = 0;
1073 uint16_t num_tx_total = 0;
1078 /* Search tx buffer for ARP packets and forward them to alb */
1079 for (i = 0; i < nb_pkts; i++) {
1080 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
1081 ether_type = eth_h->ether_type;
1082 offset = get_vlan_offset(eth_h, ðer_type);
1084 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
1085 slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1087 /* Change src mac in eth header */
1088 rte_eth_macaddr_get(slave_idx, ð_h->s_addr);
1090 /* Add packet to slave tx buffer */
1091 slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1092 slave_bufs_pkts[slave_idx]++;
1094 /* If packet is not ARP, send it with TLB policy */
1095 slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1097 slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1101 /* Update connected client ARP tables */
1102 if (internals->mode6.ntt) {
1103 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1104 client_info = &internals->mode6.client_table[i];
1106 if (client_info->in_use) {
1107 /* Allocate new packet to send ARP update on current slave */
1108 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1109 if (upd_pkt == NULL) {
1110 RTE_LOG(ERR, PMD, "Failed to allocate ARP packet from pool\n");
1113 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
1114 + client_info->vlan_count * sizeof(struct vlan_hdr);
1115 upd_pkt->data_len = pkt_size;
1116 upd_pkt->pkt_len = pkt_size;
1118 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1121 /* Add packet to update tx buffer */
1122 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1123 update_bufs_pkts[slave_idx]++;
1126 internals->mode6.ntt = 0;
1129 /* Send ARP packets on proper slaves */
1130 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1131 if (slave_bufs_pkts[i] > 0) {
1132 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1133 slave_bufs[i], slave_bufs_pkts[i]);
1134 for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1135 bufs[nb_pkts - 1 - num_not_send - j] =
1136 slave_bufs[i][nb_pkts - 1 - j];
1139 num_tx_total += num_send;
1140 num_not_send += slave_bufs_pkts[i] - num_send;
1142 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1143 /* Print TX stats including update packets */
1144 for (j = 0; j < slave_bufs_pkts[i]; j++) {
1145 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
1146 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1152 /* Send update packets on proper slaves */
1153 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1154 if (update_bufs_pkts[i] > 0) {
1155 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1156 update_bufs_pkts[i]);
1157 for (j = num_send; j < update_bufs_pkts[i]; j++) {
1158 rte_pktmbuf_free(update_bufs[i][j]);
1160 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1161 for (j = 0; j < update_bufs_pkts[i]; j++) {
1162 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
1163 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1169 /* Send non-ARP packets using tlb policy */
1170 if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1171 num_send = bond_ethdev_tx_burst_tlb(queue,
1172 slave_bufs[RTE_MAX_ETHPORTS],
1173 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1175 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1176 bufs[nb_pkts - 1 - num_not_send - j] =
1177 slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1180 num_tx_total += num_send;
1183 return num_tx_total;
1187 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1190 struct bond_dev_private *internals;
1191 struct bond_tx_queue *bd_tx_q;
1193 uint16_t num_of_slaves;
1194 uint16_t slaves[RTE_MAX_ETHPORTS];
1196 uint16_t num_tx_total = 0, num_tx_slave = 0, tx_fail_total = 0;
1200 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
1201 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
1203 bd_tx_q = (struct bond_tx_queue *)queue;
1204 internals = bd_tx_q->dev_private;
1206 /* Copy slave list to protect against slave up/down changes during tx
1208 num_of_slaves = internals->active_slave_count;
1209 memcpy(slaves, internals->active_slaves,
1210 sizeof(internals->active_slaves[0]) * num_of_slaves);
1212 if (num_of_slaves < 1)
1213 return num_tx_total;
1215 /* Populate slaves mbuf with the packets which are to be sent on it */
1216 for (i = 0; i < nb_pkts; i++) {
1217 /* Select output slave using hash based on xmit policy */
1218 op_slave_id = internals->xmit_hash(bufs[i], num_of_slaves);
1220 /* Populate slave mbuf arrays with mbufs for that slave */
1221 slave_bufs[op_slave_id][slave_nb_pkts[op_slave_id]++] = bufs[i];
1224 /* Send packet burst on each slave device */
1225 for (i = 0; i < num_of_slaves; i++) {
1226 if (slave_nb_pkts[i] > 0) {
1227 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1228 slave_bufs[i], slave_nb_pkts[i]);
1230 /* if tx burst fails move packets to end of bufs */
1231 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
1232 int slave_tx_fail_count = slave_nb_pkts[i] - num_tx_slave;
1234 tx_fail_total += slave_tx_fail_count;
1235 memcpy(&bufs[nb_pkts - tx_fail_total],
1236 &slave_bufs[i][num_tx_slave],
1237 slave_tx_fail_count * sizeof(bufs[0]));
1240 num_tx_total += num_tx_slave;
1244 return num_tx_total;
1248 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1251 struct bond_dev_private *internals;
1252 struct bond_tx_queue *bd_tx_q;
1254 uint16_t num_of_slaves;
1255 uint16_t slaves[RTE_MAX_ETHPORTS];
1256 /* positions in slaves, not ID */
1257 uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
1258 uint8_t distributing_count;
1260 uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
1261 uint16_t i, j, op_slave_idx;
1262 const uint16_t buffs_size = nb_pkts + BOND_MODE_8023AX_SLAVE_TX_PKTS + 1;
1264 /* Allocate additional packets in case 8023AD mode. */
1265 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][buffs_size];
1266 void *slow_pkts[BOND_MODE_8023AX_SLAVE_TX_PKTS] = { NULL };
1268 /* Total amount of packets in slave_bufs */
1269 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
1270 /* Slow packets placed in each slave */
1271 uint8_t slave_slow_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
1273 bd_tx_q = (struct bond_tx_queue *)queue;
1274 internals = bd_tx_q->dev_private;
1276 /* Copy slave list to protect against slave up/down changes during tx
1278 num_of_slaves = internals->active_slave_count;
1279 if (num_of_slaves < 1)
1280 return num_tx_total;
1282 memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) * num_of_slaves);
1284 distributing_count = 0;
1285 for (i = 0; i < num_of_slaves; i++) {
1286 struct port *port = &mode_8023ad_ports[slaves[i]];
1288 slave_slow_nb_pkts[i] = rte_ring_dequeue_burst(port->tx_ring,
1289 slow_pkts, BOND_MODE_8023AX_SLAVE_TX_PKTS,
1291 slave_nb_pkts[i] = slave_slow_nb_pkts[i];
1293 for (j = 0; j < slave_slow_nb_pkts[i]; j++)
1294 slave_bufs[i][j] = slow_pkts[j];
1296 if (ACTOR_STATE(port, DISTRIBUTING))
1297 distributing_offsets[distributing_count++] = i;
1300 if (likely(distributing_count > 0)) {
1301 /* Populate slaves mbuf with the packets which are to be sent on it */
1302 for (i = 0; i < nb_pkts; i++) {
1303 /* Select output slave using hash based on xmit policy */
1304 op_slave_idx = internals->xmit_hash(bufs[i], distributing_count);
1306 /* Populate slave mbuf arrays with mbufs for that slave. Use only
1307 * slaves that are currently distributing. */
1308 uint8_t slave_offset = distributing_offsets[op_slave_idx];
1309 slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] = bufs[i];
1310 slave_nb_pkts[slave_offset]++;
1314 /* Send packet burst on each slave device */
1315 for (i = 0; i < num_of_slaves; i++) {
1316 if (slave_nb_pkts[i] == 0)
1319 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1320 slave_bufs[i], slave_nb_pkts[i]);
1322 /* If tx burst fails drop slow packets */
1323 for ( ; num_tx_slave < slave_slow_nb_pkts[i]; num_tx_slave++)
1324 rte_pktmbuf_free(slave_bufs[i][num_tx_slave]);
1326 num_tx_total += num_tx_slave - slave_slow_nb_pkts[i];
1327 num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
1329 /* If tx burst fails move packets to end of bufs */
1330 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
1331 uint16_t j = nb_pkts - num_tx_fail_total;
1332 for ( ; num_tx_slave < slave_nb_pkts[i]; j++, num_tx_slave++)
1333 bufs[j] = slave_bufs[i][num_tx_slave];
1337 return num_tx_total;
1341 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1344 struct bond_dev_private *internals;
1345 struct bond_tx_queue *bd_tx_q;
1347 uint8_t tx_failed_flag = 0, num_of_slaves;
1348 uint16_t slaves[RTE_MAX_ETHPORTS];
1350 uint16_t max_nb_of_tx_pkts = 0;
1352 int slave_tx_total[RTE_MAX_ETHPORTS];
1353 int i, most_successful_tx_slave = -1;
1355 bd_tx_q = (struct bond_tx_queue *)queue;
1356 internals = bd_tx_q->dev_private;
1358 /* Copy slave list to protect against slave up/down changes during tx
1360 num_of_slaves = internals->active_slave_count;
1361 memcpy(slaves, internals->active_slaves,
1362 sizeof(internals->active_slaves[0]) * num_of_slaves);
1364 if (num_of_slaves < 1)
1367 /* Increment reference count on mbufs */
1368 for (i = 0; i < nb_pkts; i++)
1369 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1371 /* Transmit burst on each active slave */
1372 for (i = 0; i < num_of_slaves; i++) {
1373 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1376 if (unlikely(slave_tx_total[i] < nb_pkts))
1379 /* record the value and slave index for the slave which transmits the
1380 * maximum number of packets */
1381 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1382 max_nb_of_tx_pkts = slave_tx_total[i];
1383 most_successful_tx_slave = i;
1387 /* if slaves fail to transmit packets from burst, the calling application
1388 * is not expected to know about multiple references to packets so we must
1389 * handle failures of all packets except those of the most successful slave
1391 if (unlikely(tx_failed_flag))
1392 for (i = 0; i < num_of_slaves; i++)
1393 if (i != most_successful_tx_slave)
1394 while (slave_tx_total[i] < nb_pkts)
1395 rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1397 return max_nb_of_tx_pkts;
1401 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1403 struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1405 if (bond_ctx->mode == BONDING_MODE_8023AD) {
1407 * If in mode 4 then save the link properties of the first
1408 * slave, all subsequent slaves must match these properties
1410 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1412 bond_link->link_autoneg = slave_link->link_autoneg;
1413 bond_link->link_duplex = slave_link->link_duplex;
1414 bond_link->link_speed = slave_link->link_speed;
1417 * In any other mode the link properties are set to default
1418 * values of AUTONEG/DUPLEX
1420 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1421 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1426 link_properties_valid(struct rte_eth_dev *ethdev,
1427 struct rte_eth_link *slave_link)
1429 struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1431 if (bond_ctx->mode == BONDING_MODE_8023AD) {
1432 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1434 if (bond_link->link_duplex != slave_link->link_duplex ||
1435 bond_link->link_autoneg != slave_link->link_autoneg ||
1436 bond_link->link_speed != slave_link->link_speed)
1444 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1446 struct ether_addr *mac_addr;
1448 if (eth_dev == NULL) {
1449 RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__);
1453 if (dst_mac_addr == NULL) {
1454 RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__);
1458 mac_addr = eth_dev->data->mac_addrs;
1460 ether_addr_copy(mac_addr, dst_mac_addr);
1465 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1467 struct ether_addr *mac_addr;
1469 if (eth_dev == NULL) {
1470 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1474 if (new_mac_addr == NULL) {
1475 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1479 mac_addr = eth_dev->data->mac_addrs;
1481 /* If new MAC is different to current MAC then update */
1482 if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1483 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1489 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1491 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1494 /* Update slave devices MAC addresses */
1495 if (internals->slave_count < 1)
1498 switch (internals->mode) {
1499 case BONDING_MODE_ROUND_ROBIN:
1500 case BONDING_MODE_BALANCE:
1501 case BONDING_MODE_BROADCAST:
1502 for (i = 0; i < internals->slave_count; i++) {
1503 if (rte_eth_dev_default_mac_addr_set(
1504 internals->slaves[i].port_id,
1505 bonded_eth_dev->data->mac_addrs)) {
1506 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1507 internals->slaves[i].port_id);
1512 case BONDING_MODE_8023AD:
1513 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1515 case BONDING_MODE_ACTIVE_BACKUP:
1516 case BONDING_MODE_TLB:
1517 case BONDING_MODE_ALB:
1519 for (i = 0; i < internals->slave_count; i++) {
1520 if (internals->slaves[i].port_id ==
1521 internals->current_primary_port) {
1522 if (rte_eth_dev_default_mac_addr_set(
1523 internals->primary_port,
1524 bonded_eth_dev->data->mac_addrs)) {
1525 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1526 internals->current_primary_port);
1530 if (rte_eth_dev_default_mac_addr_set(
1531 internals->slaves[i].port_id,
1532 &internals->slaves[i].persisted_mac_addr)) {
1533 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1534 internals->slaves[i].port_id);
1545 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1547 struct bond_dev_private *internals;
1549 internals = eth_dev->data->dev_private;
1552 case BONDING_MODE_ROUND_ROBIN:
1553 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1554 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1556 case BONDING_MODE_ACTIVE_BACKUP:
1557 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1558 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1560 case BONDING_MODE_BALANCE:
1561 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1562 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1564 case BONDING_MODE_BROADCAST:
1565 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1566 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1568 case BONDING_MODE_8023AD:
1569 if (bond_mode_8023ad_enable(eth_dev) != 0)
1572 if (internals->mode4.dedicated_queues.enabled == 0) {
1573 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1574 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1575 RTE_LOG(WARNING, PMD,
1576 "Using mode 4, it is necessary to do TX burst "
1577 "and RX burst at least every 100ms.\n");
1579 /* Use flow director's optimization */
1580 eth_dev->rx_pkt_burst =
1581 bond_ethdev_rx_burst_8023ad_fast_queue;
1582 eth_dev->tx_pkt_burst =
1583 bond_ethdev_tx_burst_8023ad_fast_queue;
1586 case BONDING_MODE_TLB:
1587 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1588 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1590 case BONDING_MODE_ALB:
1591 if (bond_mode_alb_enable(eth_dev) != 0)
1594 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1595 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1601 internals->mode = mode;
1608 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1609 struct rte_eth_dev *slave_eth_dev)
1612 struct bond_dev_private *internals = (struct bond_dev_private *)
1613 bonded_eth_dev->data->dev_private;
1614 struct port *port = &mode_8023ad_ports[slave_eth_dev->data->port_id];
1616 if (port->slow_pool == NULL) {
1618 int slave_id = slave_eth_dev->data->port_id;
1620 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1622 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1623 250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1624 slave_eth_dev->data->numa_node);
1626 /* Any memory allocation failure in initialization is critical because
1627 * resources can't be free, so reinitialization is impossible. */
1628 if (port->slow_pool == NULL) {
1629 rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1630 slave_id, mem_name, rte_strerror(rte_errno));
1634 if (internals->mode4.dedicated_queues.enabled == 1) {
1635 /* Configure slow Rx queue */
1637 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1638 internals->mode4.dedicated_queues.rx_qid, 128,
1639 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1640 NULL, port->slow_pool);
1643 "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1644 slave_eth_dev->data->port_id,
1645 internals->mode4.dedicated_queues.rx_qid,
1650 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1651 internals->mode4.dedicated_queues.tx_qid, 512,
1652 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1656 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1657 slave_eth_dev->data->port_id,
1658 internals->mode4.dedicated_queues.tx_qid,
1667 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1668 struct rte_eth_dev *slave_eth_dev)
1670 struct bond_rx_queue *bd_rx_q;
1671 struct bond_tx_queue *bd_tx_q;
1672 uint16_t nb_rx_queues;
1673 uint16_t nb_tx_queues;
1677 struct rte_flow_error flow_error;
1679 struct bond_dev_private *internals = (struct bond_dev_private *)
1680 bonded_eth_dev->data->dev_private;
1683 rte_eth_dev_stop(slave_eth_dev->data->port_id);
1685 /* Enable interrupts on slave device if supported */
1686 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1687 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1689 /* If RSS is enabled for bonding, try to enable it for slaves */
1690 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1691 if (bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len
1693 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1694 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
1695 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1696 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
1698 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1701 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1702 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1703 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1704 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1707 slave_eth_dev->data->dev_conf.rxmode.hw_vlan_filter =
1708 bonded_eth_dev->data->dev_conf.rxmode.hw_vlan_filter;
1710 nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1711 nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1713 if (internals->mode == BONDING_MODE_8023AD) {
1714 if (internals->mode4.dedicated_queues.enabled == 1) {
1720 /* Configure device */
1721 errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1722 nb_rx_queues, nb_tx_queues,
1723 &(slave_eth_dev->data->dev_conf));
1725 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u , err (%d)",
1726 slave_eth_dev->data->port_id, errval);
1730 /* Setup Rx Queues */
1731 for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1732 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1734 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1735 bd_rx_q->nb_rx_desc,
1736 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1737 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1740 "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1741 slave_eth_dev->data->port_id, q_id, errval);
1746 /* Setup Tx Queues */
1747 for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1748 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1750 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1751 bd_tx_q->nb_tx_desc,
1752 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1756 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1757 slave_eth_dev->data->port_id, q_id, errval);
1762 if (internals->mode == BONDING_MODE_8023AD &&
1763 internals->mode4.dedicated_queues.enabled == 1) {
1764 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1768 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1769 slave_eth_dev->data->port_id) != 0) {
1771 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1772 slave_eth_dev->data->port_id, q_id, errval);
1776 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1777 rte_flow_destroy(slave_eth_dev->data->port_id,
1778 internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1781 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1782 slave_eth_dev->data->port_id);
1786 errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1788 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1789 slave_eth_dev->data->port_id, errval);
1793 /* If RSS is enabled for bonding, synchronize RETA */
1794 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1796 struct bond_dev_private *internals;
1798 internals = bonded_eth_dev->data->dev_private;
1800 for (i = 0; i < internals->slave_count; i++) {
1801 if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1802 errval = rte_eth_dev_rss_reta_update(
1803 slave_eth_dev->data->port_id,
1804 &internals->reta_conf[0],
1805 internals->slaves[i].reta_size);
1807 RTE_LOG(WARNING, PMD,
1808 "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1809 " RSS Configuration for bonding may be inconsistent.\n",
1810 slave_eth_dev->data->port_id, errval);
1817 /* If lsc interrupt is set, check initial slave's link status */
1818 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1819 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1820 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1821 RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1829 slave_remove(struct bond_dev_private *internals,
1830 struct rte_eth_dev *slave_eth_dev)
1834 for (i = 0; i < internals->slave_count; i++)
1835 if (internals->slaves[i].port_id ==
1836 slave_eth_dev->data->port_id)
1839 if (i < (internals->slave_count - 1))
1840 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1841 sizeof(internals->slaves[0]) *
1842 (internals->slave_count - i - 1));
1844 internals->slave_count--;
1846 /* force reconfiguration of slave interfaces */
1847 _rte_eth_dev_reset(slave_eth_dev);
1851 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1854 slave_add(struct bond_dev_private *internals,
1855 struct rte_eth_dev *slave_eth_dev)
1857 struct bond_slave_details *slave_details =
1858 &internals->slaves[internals->slave_count];
1860 slave_details->port_id = slave_eth_dev->data->port_id;
1861 slave_details->last_link_status = 0;
1863 /* Mark slave devices that don't support interrupts so we can
1864 * compensate when we start the bond
1866 if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1867 slave_details->link_status_poll_enabled = 1;
1870 slave_details->link_status_wait_to_complete = 0;
1871 /* clean tlb_last_obytes when adding port for bonding device */
1872 memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1873 sizeof(struct ether_addr));
1877 bond_ethdev_primary_set(struct bond_dev_private *internals,
1878 uint16_t slave_port_id)
1882 if (internals->active_slave_count < 1)
1883 internals->current_primary_port = slave_port_id;
1885 /* Search bonded device slave ports for new proposed primary port */
1886 for (i = 0; i < internals->active_slave_count; i++) {
1887 if (internals->active_slaves[i] == slave_port_id)
1888 internals->current_primary_port = slave_port_id;
1893 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
1896 bond_ethdev_start(struct rte_eth_dev *eth_dev)
1898 struct bond_dev_private *internals;
1901 /* slave eth dev will be started by bonded device */
1902 if (check_for_bonded_ethdev(eth_dev)) {
1903 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
1904 eth_dev->data->port_id);
1908 eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
1909 eth_dev->data->dev_started = 1;
1911 internals = eth_dev->data->dev_private;
1913 if (internals->slave_count == 0) {
1914 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
1918 if (internals->user_defined_mac == 0) {
1919 struct ether_addr *new_mac_addr = NULL;
1921 for (i = 0; i < internals->slave_count; i++)
1922 if (internals->slaves[i].port_id == internals->primary_port)
1923 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
1925 if (new_mac_addr == NULL)
1928 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
1929 RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
1930 eth_dev->data->port_id);
1935 /* Update all slave devices MACs*/
1936 if (mac_address_slaves_update(eth_dev) != 0)
1939 /* If bonded device is configure in promiscuous mode then re-apply config */
1940 if (internals->promiscuous_en)
1941 bond_ethdev_promiscuous_enable(eth_dev);
1943 if (internals->mode == BONDING_MODE_8023AD) {
1944 if (internals->mode4.dedicated_queues.enabled == 1) {
1945 internals->mode4.dedicated_queues.rx_qid =
1946 eth_dev->data->nb_rx_queues;
1947 internals->mode4.dedicated_queues.tx_qid =
1948 eth_dev->data->nb_tx_queues;
1953 /* Reconfigure each slave device if starting bonded device */
1954 for (i = 0; i < internals->slave_count; i++) {
1955 struct rte_eth_dev *slave_ethdev =
1956 &(rte_eth_devices[internals->slaves[i].port_id]);
1957 if (slave_configure(eth_dev, slave_ethdev) != 0) {
1959 "bonded port (%d) failed to reconfigure slave device (%d)",
1960 eth_dev->data->port_id,
1961 internals->slaves[i].port_id);
1964 /* We will need to poll for link status if any slave doesn't
1965 * support interrupts
1967 if (internals->slaves[i].link_status_poll_enabled)
1968 internals->link_status_polling_enabled = 1;
1971 /* start polling if needed */
1972 if (internals->link_status_polling_enabled) {
1974 internals->link_status_polling_interval_ms * 1000,
1975 bond_ethdev_slave_link_status_change_monitor,
1976 (void *)&rte_eth_devices[internals->port_id]);
1979 if (internals->user_defined_primary_port)
1980 bond_ethdev_primary_set(internals, internals->primary_port);
1982 if (internals->mode == BONDING_MODE_8023AD)
1983 bond_mode_8023ad_start(eth_dev);
1985 if (internals->mode == BONDING_MODE_TLB ||
1986 internals->mode == BONDING_MODE_ALB)
1987 bond_tlb_enable(internals);
1992 eth_dev->data->dev_started = 0;
1997 bond_ethdev_free_queues(struct rte_eth_dev *dev)
2001 if (dev->data->rx_queues != NULL) {
2002 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2003 rte_free(dev->data->rx_queues[i]);
2004 dev->data->rx_queues[i] = NULL;
2006 dev->data->nb_rx_queues = 0;
2009 if (dev->data->tx_queues != NULL) {
2010 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2011 rte_free(dev->data->tx_queues[i]);
2012 dev->data->tx_queues[i] = NULL;
2014 dev->data->nb_tx_queues = 0;
2019 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2021 struct bond_dev_private *internals = eth_dev->data->dev_private;
2024 if (internals->mode == BONDING_MODE_8023AD) {
2028 bond_mode_8023ad_stop(eth_dev);
2030 /* Discard all messages to/from mode 4 state machines */
2031 for (i = 0; i < internals->active_slave_count; i++) {
2032 port = &mode_8023ad_ports[internals->active_slaves[i]];
2034 RTE_ASSERT(port->rx_ring != NULL);
2035 while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2036 rte_pktmbuf_free(pkt);
2038 RTE_ASSERT(port->tx_ring != NULL);
2039 while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2040 rte_pktmbuf_free(pkt);
2044 if (internals->mode == BONDING_MODE_TLB ||
2045 internals->mode == BONDING_MODE_ALB) {
2046 bond_tlb_disable(internals);
2047 for (i = 0; i < internals->active_slave_count; i++)
2048 tlb_last_obytets[internals->active_slaves[i]] = 0;
2051 internals->active_slave_count = 0;
2052 internals->link_status_polling_enabled = 0;
2053 for (i = 0; i < internals->slave_count; i++)
2054 internals->slaves[i].last_link_status = 0;
2056 eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2057 eth_dev->data->dev_started = 0;
2061 bond_ethdev_close(struct rte_eth_dev *dev)
2063 struct bond_dev_private *internals = dev->data->dev_private;
2064 uint8_t bond_port_id = internals->port_id;
2067 RTE_LOG(INFO, EAL, "Closing bonded device %s\n", dev->device->name);
2068 while (internals->slave_count != skipped) {
2069 uint16_t port_id = internals->slaves[skipped].port_id;
2071 rte_eth_dev_stop(port_id);
2073 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2075 "Failed to remove port %d from bonded device "
2076 "%s\n", port_id, dev->device->name);
2080 bond_ethdev_free_queues(dev);
2081 rte_bitmap_reset(internals->vlan_filter_bmp);
2084 /* forward declaration */
2085 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2088 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2090 struct bond_dev_private *internals = dev->data->dev_private;
2092 uint16_t max_nb_rx_queues = UINT16_MAX;
2093 uint16_t max_nb_tx_queues = UINT16_MAX;
2095 dev_info->max_mac_addrs = 1;
2097 dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2098 internals->candidate_max_rx_pktlen :
2099 ETHER_MAX_JUMBO_FRAME_LEN;
2101 /* Max number of tx/rx queues that the bonded device can support is the
2102 * minimum values of the bonded slaves, as all slaves must be capable
2103 * of supporting the same number of tx/rx queues.
2105 if (internals->slave_count > 0) {
2106 struct rte_eth_dev_info slave_info;
2109 for (idx = 0; idx < internals->slave_count; idx++) {
2110 rte_eth_dev_info_get(internals->slaves[idx].port_id,
2113 if (slave_info.max_rx_queues < max_nb_rx_queues)
2114 max_nb_rx_queues = slave_info.max_rx_queues;
2116 if (slave_info.max_tx_queues < max_nb_tx_queues)
2117 max_nb_tx_queues = slave_info.max_tx_queues;
2121 dev_info->max_rx_queues = max_nb_rx_queues;
2122 dev_info->max_tx_queues = max_nb_tx_queues;
2125 * If dedicated hw queues enabled for link bonding device in LACP mode
2126 * then we need to reduce the maximum number of data path queues by 1.
2128 if (internals->mode == BONDING_MODE_8023AD &&
2129 internals->mode4.dedicated_queues.enabled == 1) {
2130 dev_info->max_rx_queues--;
2131 dev_info->max_tx_queues--;
2134 dev_info->min_rx_bufsize = 0;
2136 dev_info->rx_offload_capa = internals->rx_offload_capa;
2137 dev_info->tx_offload_capa = internals->tx_offload_capa;
2138 dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2140 dev_info->reta_size = internals->reta_size;
2144 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2148 struct bond_dev_private *internals = dev->data->dev_private;
2150 /* don't do this while a slave is being added */
2151 rte_spinlock_lock(&internals->lock);
2154 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2156 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2158 for (i = 0; i < internals->slave_count; i++) {
2159 uint16_t port_id = internals->slaves[i].port_id;
2161 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2163 RTE_LOG(WARNING, PMD,
2164 "Setting VLAN filter on slave port %u not supported.\n",
2168 rte_spinlock_unlock(&internals->lock);
2173 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2174 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2175 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2177 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2178 rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2179 0, dev->data->numa_node);
2180 if (bd_rx_q == NULL)
2183 bd_rx_q->queue_id = rx_queue_id;
2184 bd_rx_q->dev_private = dev->data->dev_private;
2186 bd_rx_q->nb_rx_desc = nb_rx_desc;
2188 memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2189 bd_rx_q->mb_pool = mb_pool;
2191 dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2197 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2198 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2199 const struct rte_eth_txconf *tx_conf)
2201 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)
2202 rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2203 0, dev->data->numa_node);
2205 if (bd_tx_q == NULL)
2208 bd_tx_q->queue_id = tx_queue_id;
2209 bd_tx_q->dev_private = dev->data->dev_private;
2211 bd_tx_q->nb_tx_desc = nb_tx_desc;
2212 memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2214 dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2220 bond_ethdev_rx_queue_release(void *queue)
2229 bond_ethdev_tx_queue_release(void *queue)
2238 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2240 struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2241 struct bond_dev_private *internals;
2243 /* Default value for polling slave found is true as we don't want to
2244 * disable the polling thread if we cannot get the lock */
2245 int i, polling_slave_found = 1;
2250 bonded_ethdev = (struct rte_eth_dev *)cb_arg;
2251 internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
2253 if (!bonded_ethdev->data->dev_started ||
2254 !internals->link_status_polling_enabled)
2257 /* If device is currently being configured then don't check slaves link
2258 * status, wait until next period */
2259 if (rte_spinlock_trylock(&internals->lock)) {
2260 if (internals->slave_count > 0)
2261 polling_slave_found = 0;
2263 for (i = 0; i < internals->slave_count; i++) {
2264 if (!internals->slaves[i].link_status_poll_enabled)
2267 slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2268 polling_slave_found = 1;
2270 /* Update slave link status */
2271 (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2272 internals->slaves[i].link_status_wait_to_complete);
2274 /* if link status has changed since last checked then call lsc
2276 if (slave_ethdev->data->dev_link.link_status !=
2277 internals->slaves[i].last_link_status) {
2278 internals->slaves[i].last_link_status =
2279 slave_ethdev->data->dev_link.link_status;
2281 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2282 RTE_ETH_EVENT_INTR_LSC,
2283 &bonded_ethdev->data->port_id,
2287 rte_spinlock_unlock(&internals->lock);
2290 if (polling_slave_found)
2291 /* Set alarm to continue monitoring link status of slave ethdev's */
2292 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2293 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2297 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2299 void (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2301 struct bond_dev_private *bond_ctx;
2302 struct rte_eth_link slave_link;
2306 bond_ctx = ethdev->data->dev_private;
2308 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2310 if (ethdev->data->dev_started == 0 ||
2311 bond_ctx->active_slave_count == 0) {
2312 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2316 ethdev->data->dev_link.link_status = ETH_LINK_UP;
2318 if (wait_to_complete)
2319 link_update = rte_eth_link_get;
2321 link_update = rte_eth_link_get_nowait;
2323 switch (bond_ctx->mode) {
2324 case BONDING_MODE_BROADCAST:
2326 * Setting link speed to UINT32_MAX to ensure we pick up the
2327 * value of the first active slave
2329 ethdev->data->dev_link.link_speed = UINT32_MAX;
2332 * link speed is minimum value of all the slaves link speed as
2333 * packet loss will occur on this slave if transmission at rates
2334 * greater than this are attempted
2336 for (idx = 1; idx < bond_ctx->active_slave_count; idx++) {
2337 link_update(bond_ctx->active_slaves[0], &slave_link);
2339 if (slave_link.link_speed <
2340 ethdev->data->dev_link.link_speed)
2341 ethdev->data->dev_link.link_speed =
2342 slave_link.link_speed;
2345 case BONDING_MODE_ACTIVE_BACKUP:
2346 /* Current primary slave */
2347 link_update(bond_ctx->current_primary_port, &slave_link);
2349 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2351 case BONDING_MODE_8023AD:
2352 ethdev->data->dev_link.link_autoneg =
2353 bond_ctx->mode4.slave_link.link_autoneg;
2354 ethdev->data->dev_link.link_duplex =
2355 bond_ctx->mode4.slave_link.link_duplex;
2356 /* fall through to update link speed */
2357 case BONDING_MODE_ROUND_ROBIN:
2358 case BONDING_MODE_BALANCE:
2359 case BONDING_MODE_TLB:
2360 case BONDING_MODE_ALB:
2363 * In theses mode the maximum theoretical link speed is the sum
2366 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2368 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2369 link_update(bond_ctx->active_slaves[idx], &slave_link);
2371 ethdev->data->dev_link.link_speed +=
2372 slave_link.link_speed;
2382 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2384 struct bond_dev_private *internals = dev->data->dev_private;
2385 struct rte_eth_stats slave_stats;
2388 for (i = 0; i < internals->slave_count; i++) {
2389 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2391 stats->ipackets += slave_stats.ipackets;
2392 stats->opackets += slave_stats.opackets;
2393 stats->ibytes += slave_stats.ibytes;
2394 stats->obytes += slave_stats.obytes;
2395 stats->imissed += slave_stats.imissed;
2396 stats->ierrors += slave_stats.ierrors;
2397 stats->oerrors += slave_stats.oerrors;
2398 stats->rx_nombuf += slave_stats.rx_nombuf;
2400 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2401 stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2402 stats->q_opackets[j] += slave_stats.q_opackets[j];
2403 stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2404 stats->q_obytes[j] += slave_stats.q_obytes[j];
2405 stats->q_errors[j] += slave_stats.q_errors[j];
2414 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2416 struct bond_dev_private *internals = dev->data->dev_private;
2419 for (i = 0; i < internals->slave_count; i++)
2420 rte_eth_stats_reset(internals->slaves[i].port_id);
2424 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2426 struct bond_dev_private *internals = eth_dev->data->dev_private;
2429 internals->promiscuous_en = 1;
2431 switch (internals->mode) {
2432 /* Promiscuous mode is propagated to all slaves */
2433 case BONDING_MODE_ROUND_ROBIN:
2434 case BONDING_MODE_BALANCE:
2435 case BONDING_MODE_BROADCAST:
2436 for (i = 0; i < internals->slave_count; i++)
2437 rte_eth_promiscuous_enable(internals->slaves[i].port_id);
2439 /* In mode4 promiscus mode is managed when slave is added/removed */
2440 case BONDING_MODE_8023AD:
2442 /* Promiscuous mode is propagated only to primary slave */
2443 case BONDING_MODE_ACTIVE_BACKUP:
2444 case BONDING_MODE_TLB:
2445 case BONDING_MODE_ALB:
2447 rte_eth_promiscuous_enable(internals->current_primary_port);
2452 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2454 struct bond_dev_private *internals = dev->data->dev_private;
2457 internals->promiscuous_en = 0;
2459 switch (internals->mode) {
2460 /* Promiscuous mode is propagated to all slaves */
2461 case BONDING_MODE_ROUND_ROBIN:
2462 case BONDING_MODE_BALANCE:
2463 case BONDING_MODE_BROADCAST:
2464 for (i = 0; i < internals->slave_count; i++)
2465 rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2467 /* In mode4 promiscus mode is set managed when slave is added/removed */
2468 case BONDING_MODE_8023AD:
2470 /* Promiscuous mode is propagated only to primary slave */
2471 case BONDING_MODE_ACTIVE_BACKUP:
2472 case BONDING_MODE_TLB:
2473 case BONDING_MODE_ALB:
2475 rte_eth_promiscuous_disable(internals->current_primary_port);
2480 bond_ethdev_delayed_lsc_propagation(void *arg)
2485 _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2486 RTE_ETH_EVENT_INTR_LSC, NULL, NULL);
2490 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2491 void *param, void *ret_param __rte_unused)
2493 struct rte_eth_dev *bonded_eth_dev;
2494 struct bond_dev_private *internals;
2495 struct rte_eth_link link;
2498 int i, valid_slave = 0;
2500 uint8_t lsc_flag = 0;
2502 if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2505 bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
2507 if (check_for_bonded_ethdev(bonded_eth_dev))
2510 internals = bonded_eth_dev->data->dev_private;
2512 /* If the device isn't started don't handle interrupts */
2513 if (!bonded_eth_dev->data->dev_started)
2516 /* verify that port_id is a valid slave of bonded port */
2517 for (i = 0; i < internals->slave_count; i++) {
2518 if (internals->slaves[i].port_id == port_id) {
2527 /* Synchronize lsc callback parallel calls either by real link event
2528 * from the slaves PMDs or by the bonding PMD itself.
2530 rte_spinlock_lock(&internals->lsc_lock);
2532 /* Search for port in active port list */
2533 active_pos = find_slave_by_id(internals->active_slaves,
2534 internals->active_slave_count, port_id);
2536 rte_eth_link_get_nowait(port_id, &link);
2537 if (link.link_status) {
2538 if (active_pos < internals->active_slave_count) {
2539 rte_spinlock_unlock(&internals->lsc_lock);
2543 /* if no active slave ports then set this port to be primary port */
2544 if (internals->active_slave_count < 1) {
2545 /* If first active slave, then change link status */
2546 bonded_eth_dev->data->dev_link.link_status = ETH_LINK_UP;
2547 internals->current_primary_port = port_id;
2550 mac_address_slaves_update(bonded_eth_dev);
2553 activate_slave(bonded_eth_dev, port_id);
2555 /* If user has defined the primary port then default to using it */
2556 if (internals->user_defined_primary_port &&
2557 internals->primary_port == port_id)
2558 bond_ethdev_primary_set(internals, port_id);
2560 if (active_pos == internals->active_slave_count) {
2561 rte_spinlock_unlock(&internals->lsc_lock);
2565 /* Remove from active slave list */
2566 deactivate_slave(bonded_eth_dev, port_id);
2568 if (internals->active_slave_count < 1)
2571 /* Update primary id, take first active slave from list or if none
2572 * available set to -1 */
2573 if (port_id == internals->current_primary_port) {
2574 if (internals->active_slave_count > 0)
2575 bond_ethdev_primary_set(internals,
2576 internals->active_slaves[0]);
2578 internals->current_primary_port = internals->primary_port;
2583 * Update bonded device link properties after any change to active
2586 bond_ethdev_link_update(bonded_eth_dev, 0);
2589 /* Cancel any possible outstanding interrupts if delays are enabled */
2590 if (internals->link_up_delay_ms > 0 ||
2591 internals->link_down_delay_ms > 0)
2592 rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2595 if (bonded_eth_dev->data->dev_link.link_status) {
2596 if (internals->link_up_delay_ms > 0)
2597 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2598 bond_ethdev_delayed_lsc_propagation,
2599 (void *)bonded_eth_dev);
2601 _rte_eth_dev_callback_process(bonded_eth_dev,
2602 RTE_ETH_EVENT_INTR_LSC,
2606 if (internals->link_down_delay_ms > 0)
2607 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2608 bond_ethdev_delayed_lsc_propagation,
2609 (void *)bonded_eth_dev);
2611 _rte_eth_dev_callback_process(bonded_eth_dev,
2612 RTE_ETH_EVENT_INTR_LSC,
2617 rte_spinlock_unlock(&internals->lsc_lock);
2623 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2624 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2628 int slave_reta_size;
2629 unsigned reta_count;
2630 struct bond_dev_private *internals = dev->data->dev_private;
2632 if (reta_size != internals->reta_size)
2635 /* Copy RETA table */
2636 reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2638 for (i = 0; i < reta_count; i++) {
2639 internals->reta_conf[i].mask = reta_conf[i].mask;
2640 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2641 if ((reta_conf[i].mask >> j) & 0x01)
2642 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2645 /* Fill rest of array */
2646 for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2647 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2648 sizeof(internals->reta_conf[0]) * reta_count);
2650 /* Propagate RETA over slaves */
2651 for (i = 0; i < internals->slave_count; i++) {
2652 slave_reta_size = internals->slaves[i].reta_size;
2653 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2654 &internals->reta_conf[0], slave_reta_size);
2663 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2664 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2667 struct bond_dev_private *internals = dev->data->dev_private;
2669 if (reta_size != internals->reta_size)
2672 /* Copy RETA table */
2673 for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2674 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2675 if ((reta_conf[i].mask >> j) & 0x01)
2676 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2682 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2683 struct rte_eth_rss_conf *rss_conf)
2686 struct bond_dev_private *internals = dev->data->dev_private;
2687 struct rte_eth_rss_conf bond_rss_conf;
2689 memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2691 bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2693 if (bond_rss_conf.rss_hf != 0)
2694 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2696 if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2697 sizeof(internals->rss_key)) {
2698 if (bond_rss_conf.rss_key_len == 0)
2699 bond_rss_conf.rss_key_len = 40;
2700 internals->rss_key_len = bond_rss_conf.rss_key_len;
2701 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2702 internals->rss_key_len);
2705 for (i = 0; i < internals->slave_count; i++) {
2706 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2716 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2717 struct rte_eth_rss_conf *rss_conf)
2719 struct bond_dev_private *internals = dev->data->dev_private;
2721 rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2722 rss_conf->rss_key_len = internals->rss_key_len;
2723 if (rss_conf->rss_key)
2724 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2729 const struct eth_dev_ops default_dev_ops = {
2730 .dev_start = bond_ethdev_start,
2731 .dev_stop = bond_ethdev_stop,
2732 .dev_close = bond_ethdev_close,
2733 .dev_configure = bond_ethdev_configure,
2734 .dev_infos_get = bond_ethdev_info,
2735 .vlan_filter_set = bond_ethdev_vlan_filter_set,
2736 .rx_queue_setup = bond_ethdev_rx_queue_setup,
2737 .tx_queue_setup = bond_ethdev_tx_queue_setup,
2738 .rx_queue_release = bond_ethdev_rx_queue_release,
2739 .tx_queue_release = bond_ethdev_tx_queue_release,
2740 .link_update = bond_ethdev_link_update,
2741 .stats_get = bond_ethdev_stats_get,
2742 .stats_reset = bond_ethdev_stats_reset,
2743 .promiscuous_enable = bond_ethdev_promiscuous_enable,
2744 .promiscuous_disable = bond_ethdev_promiscuous_disable,
2745 .reta_update = bond_ethdev_rss_reta_update,
2746 .reta_query = bond_ethdev_rss_reta_query,
2747 .rss_hash_update = bond_ethdev_rss_hash_update,
2748 .rss_hash_conf_get = bond_ethdev_rss_hash_conf_get
2752 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
2754 const char *name = rte_vdev_device_name(dev);
2755 uint8_t socket_id = dev->device.numa_node;
2756 struct bond_dev_private *internals = NULL;
2757 struct rte_eth_dev *eth_dev = NULL;
2758 uint32_t vlan_filter_bmp_size;
2760 /* now do all data allocation - for eth_dev structure, dummy pci driver
2761 * and internal (private) data
2764 /* reserve an ethdev entry */
2765 eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
2766 if (eth_dev == NULL) {
2767 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
2771 internals = eth_dev->data->dev_private;
2772 eth_dev->data->nb_rx_queues = (uint16_t)1;
2773 eth_dev->data->nb_tx_queues = (uint16_t)1;
2775 eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN, 0,
2777 if (eth_dev->data->mac_addrs == NULL) {
2778 RTE_BOND_LOG(ERR, "Unable to malloc mac_addrs");
2782 eth_dev->dev_ops = &default_dev_ops;
2783 eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC;
2785 rte_spinlock_init(&internals->lock);
2786 rte_spinlock_init(&internals->lsc_lock);
2788 internals->port_id = eth_dev->data->port_id;
2789 internals->mode = BONDING_MODE_INVALID;
2790 internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
2791 internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
2792 internals->xmit_hash = xmit_l2_hash;
2793 internals->user_defined_mac = 0;
2795 internals->link_status_polling_enabled = 0;
2797 internals->link_status_polling_interval_ms =
2798 DEFAULT_POLLING_INTERVAL_10_MS;
2799 internals->link_down_delay_ms = 0;
2800 internals->link_up_delay_ms = 0;
2802 internals->slave_count = 0;
2803 internals->active_slave_count = 0;
2804 internals->rx_offload_capa = 0;
2805 internals->tx_offload_capa = 0;
2806 internals->candidate_max_rx_pktlen = 0;
2807 internals->max_rx_pktlen = 0;
2809 /* Initially allow to choose any offload type */
2810 internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
2812 memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
2813 memset(internals->slaves, 0, sizeof(internals->slaves));
2815 /* Set mode 4 default configuration */
2816 bond_mode_8023ad_setup(eth_dev, NULL);
2817 if (bond_ethdev_mode_set(eth_dev, mode)) {
2818 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode too %d",
2819 eth_dev->data->port_id, mode);
2823 vlan_filter_bmp_size =
2824 rte_bitmap_get_memory_footprint(ETHER_MAX_VLAN_ID + 1);
2825 internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
2826 RTE_CACHE_LINE_SIZE);
2827 if (internals->vlan_filter_bmpmem == NULL) {
2829 "Failed to allocate vlan bitmap for bonded device %u\n",
2830 eth_dev->data->port_id);
2834 internals->vlan_filter_bmp = rte_bitmap_init(ETHER_MAX_VLAN_ID + 1,
2835 internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
2836 if (internals->vlan_filter_bmp == NULL) {
2838 "Failed to init vlan bitmap for bonded device %u\n",
2839 eth_dev->data->port_id);
2840 rte_free(internals->vlan_filter_bmpmem);
2844 return eth_dev->data->port_id;
2847 rte_free(internals);
2848 if (eth_dev != NULL) {
2849 rte_free(eth_dev->data->mac_addrs);
2850 rte_eth_dev_release_port(eth_dev);
2856 bond_probe(struct rte_vdev_device *dev)
2859 struct bond_dev_private *internals;
2860 struct rte_kvargs *kvlist;
2861 uint8_t bonding_mode, socket_id/*, agg_mode*/;
2862 int arg_count, port_id;
2868 name = rte_vdev_device_name(dev);
2869 RTE_LOG(INFO, EAL, "Initializing pmd_bond for %s\n", name);
2871 kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
2872 pmd_bond_init_valid_arguments);
2876 /* Parse link bonding mode */
2877 if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
2878 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
2879 &bond_ethdev_parse_slave_mode_kvarg,
2880 &bonding_mode) != 0) {
2881 RTE_LOG(ERR, EAL, "Invalid mode for bonded device %s\n",
2886 RTE_LOG(ERR, EAL, "Mode must be specified only once for bonded "
2887 "device %s\n", name);
2891 /* Parse socket id to create bonding device on */
2892 arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
2893 if (arg_count == 1) {
2894 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
2895 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
2897 RTE_LOG(ERR, EAL, "Invalid socket Id specified for "
2898 "bonded device %s\n", name);
2901 } else if (arg_count > 1) {
2902 RTE_LOG(ERR, EAL, "Socket Id can be specified only once for "
2903 "bonded device %s\n", name);
2906 socket_id = rte_socket_id();
2909 dev->device.numa_node = socket_id;
2911 /* Create link bonding eth device */
2912 port_id = bond_alloc(dev, bonding_mode);
2914 RTE_LOG(ERR, EAL, "Failed to create socket %s in mode %u on "
2915 "socket %u.\n", name, bonding_mode, socket_id);
2918 internals = rte_eth_devices[port_id].data->dev_private;
2919 internals->kvlist = kvlist;
2922 if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
2923 if (rte_kvargs_process(kvlist,
2924 PMD_BOND_AGG_MODE_KVARG,
2925 &bond_ethdev_parse_slave_agg_mode_kvarg,
2928 "Failed to parse agg selection mode for bonded device %s\n",
2933 if (internals->mode == BONDING_MODE_8023AD)
2934 rte_eth_bond_8023ad_agg_selection_set(port_id,
2937 rte_eth_bond_8023ad_agg_selection_set(port_id, AGG_STABLE);
2940 RTE_LOG(INFO, EAL, "Create bonded device %s on port %d in mode %u on "
2941 "socket %u.\n", name, port_id, bonding_mode, socket_id);
2945 rte_kvargs_free(kvlist);
2951 bond_remove(struct rte_vdev_device *dev)
2953 struct rte_eth_dev *eth_dev;
2954 struct bond_dev_private *internals;
2960 name = rte_vdev_device_name(dev);
2961 RTE_LOG(INFO, EAL, "Uninitializing pmd_bond for %s\n", name);
2963 /* now free all data allocation - for eth_dev structure,
2964 * dummy pci driver and internal (private) data
2967 /* find an ethdev entry */
2968 eth_dev = rte_eth_dev_allocated(name);
2969 if (eth_dev == NULL)
2972 RTE_ASSERT(eth_dev->device == &dev->device);
2974 internals = eth_dev->data->dev_private;
2975 if (internals->slave_count != 0)
2978 if (eth_dev->data->dev_started == 1) {
2979 bond_ethdev_stop(eth_dev);
2980 bond_ethdev_close(eth_dev);
2983 eth_dev->dev_ops = NULL;
2984 eth_dev->rx_pkt_burst = NULL;
2985 eth_dev->tx_pkt_burst = NULL;
2987 internals = eth_dev->data->dev_private;
2988 /* Try to release mempool used in mode6. If the bond
2989 * device is not mode6, free the NULL is not problem.
2991 rte_mempool_free(internals->mode6.mempool);
2992 rte_bitmap_free(internals->vlan_filter_bmp);
2993 rte_free(internals->vlan_filter_bmpmem);
2994 rte_free(eth_dev->data->dev_private);
2995 rte_free(eth_dev->data->mac_addrs);
2997 rte_eth_dev_release_port(eth_dev);
3002 /* this part will resolve the slave portids after all the other pdev and vdev
3003 * have been allocated */
3005 bond_ethdev_configure(struct rte_eth_dev *dev)
3007 const char *name = dev->device->name;
3008 struct bond_dev_private *internals = dev->data->dev_private;
3009 struct rte_kvargs *kvlist = internals->kvlist;
3011 uint16_t port_id = dev - rte_eth_devices;
3014 static const uint8_t default_rss_key[40] = {
3015 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3016 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3017 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3018 0xBE, 0xAC, 0x01, 0xFA
3023 /* If RSS is enabled, fill table and key with default values */
3024 if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
3025 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = internals->rss_key;
3026 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = 0;
3027 memcpy(internals->rss_key, default_rss_key, 40);
3029 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3030 internals->reta_conf[i].mask = ~0LL;
3031 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3032 internals->reta_conf[i].reta[j] = j % dev->data->nb_rx_queues;
3036 /* set the max_rx_pktlen */
3037 internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3040 * if no kvlist, it means that this bonded device has been created
3041 * through the bonding api.
3046 /* Parse MAC address for bonded device */
3047 arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3048 if (arg_count == 1) {
3049 struct ether_addr bond_mac;
3051 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3052 &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3053 RTE_LOG(INFO, EAL, "Invalid mac address for bonded device %s\n",
3058 /* Set MAC address */
3059 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3061 "Failed to set mac address on bonded device %s\n",
3065 } else if (arg_count > 1) {
3067 "MAC address can be specified only once for bonded device %s\n",
3072 /* Parse/set balance mode transmit policy */
3073 arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3074 if (arg_count == 1) {
3075 uint8_t xmit_policy;
3077 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3078 &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3081 "Invalid xmit policy specified for bonded device %s\n",
3086 /* Set balance mode transmit policy*/
3087 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3089 "Failed to set balance xmit policy on bonded device %s\n",
3093 } else if (arg_count > 1) {
3095 "Transmit policy can be specified only once for bonded device"
3100 if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3101 if (rte_kvargs_process(kvlist,
3102 PMD_BOND_AGG_MODE_KVARG,
3103 &bond_ethdev_parse_slave_agg_mode_kvarg,
3106 "Failed to parse agg selection mode for bonded device %s\n",
3109 if (internals->mode == BONDING_MODE_8023AD)
3110 rte_eth_bond_8023ad_agg_selection_set(port_id,
3114 /* Parse/add slave ports to bonded device */
3115 if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3116 struct bond_ethdev_slave_ports slave_ports;
3119 memset(&slave_ports, 0, sizeof(slave_ports));
3121 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3122 &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3124 "Failed to parse slave ports for bonded device %s\n",
3129 for (i = 0; i < slave_ports.slave_count; i++) {
3130 if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3132 "Failed to add port %d as slave to bonded device %s\n",
3133 slave_ports.slaves[i], name);
3138 RTE_LOG(INFO, EAL, "No slaves specified for bonded device %s\n", name);
3142 /* Parse/set primary slave port id*/
3143 arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3144 if (arg_count == 1) {
3145 uint16_t primary_slave_port_id;
3147 if (rte_kvargs_process(kvlist,
3148 PMD_BOND_PRIMARY_SLAVE_KVARG,
3149 &bond_ethdev_parse_primary_slave_port_id_kvarg,
3150 &primary_slave_port_id) < 0) {
3152 "Invalid primary slave port id specified for bonded device"
3157 /* Set balance mode transmit policy*/
3158 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3161 "Failed to set primary slave port %d on bonded device %s\n",
3162 primary_slave_port_id, name);
3165 } else if (arg_count > 1) {
3167 "Primary slave can be specified only once for bonded device"
3172 /* Parse link status monitor polling interval */
3173 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3174 if (arg_count == 1) {
3175 uint32_t lsc_poll_interval_ms;
3177 if (rte_kvargs_process(kvlist,
3178 PMD_BOND_LSC_POLL_PERIOD_KVARG,
3179 &bond_ethdev_parse_time_ms_kvarg,
3180 &lsc_poll_interval_ms) < 0) {
3182 "Invalid lsc polling interval value specified for bonded"
3183 " device %s\n", name);
3187 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3190 "Failed to set lsc monitor polling interval (%u ms) on"
3191 " bonded device %s\n", lsc_poll_interval_ms, name);
3194 } else if (arg_count > 1) {
3196 "LSC polling interval can be specified only once for bonded"
3197 " device %s\n", name);
3201 /* Parse link up interrupt propagation delay */
3202 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3203 if (arg_count == 1) {
3204 uint32_t link_up_delay_ms;
3206 if (rte_kvargs_process(kvlist,
3207 PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3208 &bond_ethdev_parse_time_ms_kvarg,
3209 &link_up_delay_ms) < 0) {
3211 "Invalid link up propagation delay value specified for"
3212 " bonded device %s\n", name);
3216 /* Set balance mode transmit policy*/
3217 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3220 "Failed to set link up propagation delay (%u ms) on bonded"
3221 " device %s\n", link_up_delay_ms, name);
3224 } else if (arg_count > 1) {
3226 "Link up propagation delay can be specified only once for"
3227 " bonded device %s\n", name);
3231 /* Parse link down interrupt propagation delay */
3232 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3233 if (arg_count == 1) {
3234 uint32_t link_down_delay_ms;
3236 if (rte_kvargs_process(kvlist,
3237 PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3238 &bond_ethdev_parse_time_ms_kvarg,
3239 &link_down_delay_ms) < 0) {
3241 "Invalid link down propagation delay value specified for"
3242 " bonded device %s\n", name);
3246 /* Set balance mode transmit policy*/
3247 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3250 "Failed to set link down propagation delay (%u ms) on"
3251 " bonded device %s\n", link_down_delay_ms, name);
3254 } else if (arg_count > 1) {
3256 "Link down propagation delay can be specified only once for"
3257 " bonded device %s\n", name);
3264 struct rte_vdev_driver pmd_bond_drv = {
3265 .probe = bond_probe,
3266 .remove = bond_remove,
3269 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3270 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3272 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3276 "xmit_policy=[l2 | l23 | l34] "
3277 "agg_mode=[count | stable | bandwidth] "
3280 "lsc_poll_period_ms=<int> "
3282 "down_delay=<int>");