New upstream version 17.11.4
[deb_dpdk.git] / drivers / net / bonding / rte_eth_bond_pmd.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 #include <stdlib.h>
34 #include <netinet/in.h>
35
36 #include <rte_mbuf.h>
37 #include <rte_malloc.h>
38 #include <rte_ethdev.h>
39 #include <rte_ethdev_vdev.h>
40 #include <rte_tcp.h>
41 #include <rte_udp.h>
42 #include <rte_ip.h>
43 #include <rte_ip_frag.h>
44 #include <rte_devargs.h>
45 #include <rte_kvargs.h>
46 #include <rte_bus_vdev.h>
47 #include <rte_alarm.h>
48 #include <rte_cycles.h>
49
50 #include "rte_eth_bond.h"
51 #include "rte_eth_bond_private.h"
52 #include "rte_eth_bond_8023ad_private.h"
53
54 #define REORDER_PERIOD_MS 10
55 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
56
57 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
58
59 /* Table for statistics in mode 5 TLB */
60 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
61
62 static inline size_t
63 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
64 {
65         size_t vlan_offset = 0;
66
67         if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
68                 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
69
70                 vlan_offset = sizeof(struct vlan_hdr);
71                 *proto = vlan_hdr->eth_proto;
72
73                 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
74                         vlan_hdr = vlan_hdr + 1;
75                         *proto = vlan_hdr->eth_proto;
76                         vlan_offset += sizeof(struct vlan_hdr);
77                 }
78         }
79         return vlan_offset;
80 }
81
82 static uint16_t
83 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
84 {
85         struct bond_dev_private *internals;
86
87         uint16_t num_rx_slave = 0;
88         uint16_t num_rx_total = 0;
89
90         int i;
91
92         /* Cast to structure, containing bonded device's port id and queue id */
93         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
94
95         internals = bd_rx_q->dev_private;
96
97
98         for (i = 0; i < internals->active_slave_count && nb_pkts; i++) {
99                 /* Offset of pointer to *bufs increases as packets are received
100                  * from other slaves */
101                 num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
102                                 bd_rx_q->queue_id, bufs + num_rx_total, nb_pkts);
103                 if (num_rx_slave) {
104                         num_rx_total += num_rx_slave;
105                         nb_pkts -= num_rx_slave;
106                 }
107         }
108
109         return num_rx_total;
110 }
111
112 static uint16_t
113 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
114                 uint16_t nb_pkts)
115 {
116         struct bond_dev_private *internals;
117
118         /* Cast to structure, containing bonded device's port id and queue id */
119         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
120
121         internals = bd_rx_q->dev_private;
122
123         return rte_eth_rx_burst(internals->current_primary_port,
124                         bd_rx_q->queue_id, bufs, nb_pkts);
125 }
126
127 static inline uint8_t
128 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
129 {
130         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
131
132         return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
133                 (ethertype == ether_type_slow_be &&
134                 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
135 }
136
137 /*****************************************************************************
138  * Flow director's setup for mode 4 optimization
139  */
140
141 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
142         .dst.addr_bytes = { 0 },
143         .src.addr_bytes = { 0 },
144         .type = RTE_BE16(ETHER_TYPE_SLOW),
145 };
146
147 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
148         .dst.addr_bytes = { 0 },
149         .src.addr_bytes = { 0 },
150         .type = 0xFFFF,
151 };
152
153 static struct rte_flow_item flow_item_8023ad[] = {
154         {
155                 .type = RTE_FLOW_ITEM_TYPE_ETH,
156                 .spec = &flow_item_eth_type_8023ad,
157                 .last = NULL,
158                 .mask = &flow_item_eth_mask_type_8023ad,
159         },
160         {
161                 .type = RTE_FLOW_ITEM_TYPE_END,
162                 .spec = NULL,
163                 .last = NULL,
164                 .mask = NULL,
165         }
166 };
167
168 const struct rte_flow_attr flow_attr_8023ad = {
169         .group = 0,
170         .priority = 0,
171         .ingress = 1,
172         .egress = 0,
173         .reserved = 0,
174 };
175
176 int
177 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
178                 uint16_t slave_port) {
179         struct rte_eth_dev_info slave_info;
180         struct rte_flow_error error;
181         struct bond_dev_private *internals = (struct bond_dev_private *)
182                         (bond_dev->data->dev_private);
183
184         const struct rte_flow_action_queue lacp_queue_conf = {
185                 .index = 0,
186         };
187
188         const struct rte_flow_action actions[] = {
189                 {
190                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
191                         .conf = &lacp_queue_conf
192                 },
193                 {
194                         .type = RTE_FLOW_ACTION_TYPE_END,
195                 }
196         };
197
198         int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
199                         flow_item_8023ad, actions, &error);
200         if (ret < 0) {
201                 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
202                                 __func__, error.message, slave_port,
203                                 internals->mode4.dedicated_queues.rx_qid);
204                 return -1;
205         }
206
207         rte_eth_dev_info_get(slave_port, &slave_info);
208         if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
209                         slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
210                 RTE_BOND_LOG(ERR,
211                         "%s: Slave %d capabilities doesn't allow to allocate additional queues",
212                         __func__, slave_port);
213                 return -1;
214         }
215
216         return 0;
217 }
218
219 int
220 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
221         struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
222         struct bond_dev_private *internals = (struct bond_dev_private *)
223                         (bond_dev->data->dev_private);
224         struct rte_eth_dev_info bond_info;
225         uint16_t idx;
226
227         /* Verify if all slaves in bonding supports flow director and */
228         if (internals->slave_count > 0) {
229                 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
230
231                 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
232                 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
233
234                 for (idx = 0; idx < internals->slave_count; idx++) {
235                         if (bond_ethdev_8023ad_flow_verify(bond_dev,
236                                         internals->slaves[idx].port_id) != 0)
237                                 return -1;
238                 }
239         }
240
241         return 0;
242 }
243
244 int
245 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
246
247         struct rte_flow_error error;
248         struct bond_dev_private *internals = (struct bond_dev_private *)
249                         (bond_dev->data->dev_private);
250
251         struct rte_flow_action_queue lacp_queue_conf = {
252                 .index = internals->mode4.dedicated_queues.rx_qid,
253         };
254
255         const struct rte_flow_action actions[] = {
256                 {
257                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
258                         .conf = &lacp_queue_conf
259                 },
260                 {
261                         .type = RTE_FLOW_ACTION_TYPE_END,
262                 }
263         };
264
265         internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
266                         &flow_attr_8023ad, flow_item_8023ad, actions, &error);
267         if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
268                 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
269                                 "(slave_port=%d queue_id=%d)",
270                                 error.message, slave_port,
271                                 internals->mode4.dedicated_queues.rx_qid);
272                 return -1;
273         }
274
275         return 0;
276 }
277
278 static uint16_t
279 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
280                 uint16_t nb_pkts)
281 {
282         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
283         struct bond_dev_private *internals = bd_rx_q->dev_private;
284         uint16_t num_rx_total = 0;      /* Total number of received packets */
285         uint16_t slaves[RTE_MAX_ETHPORTS];
286         uint16_t slave_count;
287
288         uint16_t i, idx;
289
290         /* Copy slave list to protect against slave up/down changes during tx
291          * bursting */
292         slave_count = internals->active_slave_count;
293         memcpy(slaves, internals->active_slaves,
294                         sizeof(internals->active_slaves[0]) * slave_count);
295
296         for (i = 0, idx = internals->active_slave;
297                         i < slave_count && num_rx_total < nb_pkts; i++, idx++) {
298                 idx = idx % slave_count;
299
300                 /* Read packets from this slave */
301                 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
302                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
303         }
304
305         internals->active_slave = idx;
306
307         return num_rx_total;
308 }
309
310 static uint16_t
311 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
312                 uint16_t nb_pkts)
313 {
314         struct bond_dev_private *internals;
315         struct bond_tx_queue *bd_tx_q;
316
317         uint16_t num_of_slaves;
318         uint16_t slaves[RTE_MAX_ETHPORTS];
319          /* positions in slaves, not ID */
320         uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
321         uint8_t distributing_count;
322
323         uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
324         uint16_t i, op_slave_idx;
325
326         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
327
328         /* Total amount of packets in slave_bufs */
329         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
330         /* Slow packets placed in each slave */
331
332         if (unlikely(nb_pkts == 0))
333                 return 0;
334
335         bd_tx_q = (struct bond_tx_queue *)queue;
336         internals = bd_tx_q->dev_private;
337
338         /* Copy slave list to protect against slave up/down changes during tx
339          * bursting */
340         num_of_slaves = internals->active_slave_count;
341         if (num_of_slaves < 1)
342                 return num_tx_total;
343
344         memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) *
345                         num_of_slaves);
346
347         distributing_count = 0;
348         for (i = 0; i < num_of_slaves; i++) {
349                 struct port *port = &mode_8023ad_ports[slaves[i]];
350                 if (ACTOR_STATE(port, DISTRIBUTING))
351                         distributing_offsets[distributing_count++] = i;
352         }
353
354         if (likely(distributing_count > 0)) {
355                 /* Populate slaves mbuf with the packets which are to be sent */
356                 for (i = 0; i < nb_pkts; i++) {
357                         /* Select output slave using hash based on xmit policy */
358                         op_slave_idx = internals->xmit_hash(bufs[i],
359                                         distributing_count);
360
361                         /* Populate slave mbuf arrays with mbufs for that slave.
362                          * Use only slaves that are currently distributing.
363                          */
364                         uint8_t slave_offset =
365                                         distributing_offsets[op_slave_idx];
366                         slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] =
367                                         bufs[i];
368                         slave_nb_pkts[slave_offset]++;
369                 }
370         }
371
372         /* Send packet burst on each slave device */
373         for (i = 0; i < num_of_slaves; i++) {
374                 if (slave_nb_pkts[i] == 0)
375                         continue;
376
377                 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
378                                 slave_bufs[i], slave_nb_pkts[i]);
379
380                 num_tx_total += num_tx_slave;
381                 num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
382
383                 /* If tx burst fails move packets to end of bufs */
384                 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
385                         uint16_t j = nb_pkts - num_tx_fail_total;
386                         for ( ; num_tx_slave < slave_nb_pkts[i]; j++,
387                                         num_tx_slave++)
388                                 bufs[j] = slave_bufs[i][num_tx_slave];
389                 }
390         }
391
392         return num_tx_total;
393 }
394
395
396 static uint16_t
397 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
398                 uint16_t nb_pkts)
399 {
400         /* Cast to structure, containing bonded device's port id and queue id */
401         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
402         struct bond_dev_private *internals = bd_rx_q->dev_private;
403         struct ether_addr bond_mac;
404
405         struct ether_hdr *hdr;
406
407         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
408         uint16_t num_rx_total = 0;      /* Total number of received packets */
409         uint16_t slaves[RTE_MAX_ETHPORTS];
410         uint16_t slave_count, idx;
411
412         uint8_t collecting;  /* current slave collecting status */
413         const uint8_t promisc = internals->promiscuous_en;
414         uint8_t i, j, k;
415         uint8_t subtype;
416
417         rte_eth_macaddr_get(internals->port_id, &bond_mac);
418         /* Copy slave list to protect against slave up/down changes during tx
419          * bursting */
420         slave_count = internals->active_slave_count;
421         memcpy(slaves, internals->active_slaves,
422                         sizeof(internals->active_slaves[0]) * slave_count);
423
424         idx = internals->active_slave;
425         if (idx >= slave_count) {
426                 internals->active_slave = 0;
427                 idx = 0;
428         }
429         for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
430                 j = num_rx_total;
431                 collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[idx]],
432                                          COLLECTING);
433
434                 /* Read packets from this slave */
435                 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
436                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
437
438                 for (k = j; k < 2 && k < num_rx_total; k++)
439                         rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
440
441                 /* Handle slow protocol packets. */
442                 while (j < num_rx_total) {
443
444                         /* If packet is not pure L2 and is known, skip it */
445                         if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) {
446                                 j++;
447                                 continue;
448                         }
449
450                         if (j + 3 < num_rx_total)
451                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
452
453                         hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
454                         subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
455
456                         /* Remove packet from array if it is slow packet or slave is not
457                          * in collecting state or bonding interface is not in promiscuous
458                          * mode and packet address does not match. */
459                         if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]) ||
460                                 !collecting || (!promisc &&
461                                         !is_multicast_ether_addr(&hdr->d_addr) &&
462                                         !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
463
464                                 if (hdr->ether_type == ether_type_slow_be) {
465                                         bond_mode_8023ad_handle_slow_pkt(
466                                             internals, slaves[idx], bufs[j]);
467                                 } else
468                                         rte_pktmbuf_free(bufs[j]);
469
470                                 /* Packet is managed by mode 4 or dropped, shift the array */
471                                 num_rx_total--;
472                                 if (j < num_rx_total) {
473                                         memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
474                                                 (num_rx_total - j));
475                                 }
476                         } else
477                                 j++;
478                 }
479                 if (unlikely(++idx == slave_count))
480                         idx = 0;
481         }
482
483         internals->active_slave = idx;
484         return num_rx_total;
485 }
486
487 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
488 uint32_t burstnumberRX;
489 uint32_t burstnumberTX;
490
491 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
492
493 static void
494 arp_op_name(uint16_t arp_op, char *buf)
495 {
496         switch (arp_op) {
497         case ARP_OP_REQUEST:
498                 snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
499                 return;
500         case ARP_OP_REPLY:
501                 snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
502                 return;
503         case ARP_OP_REVREQUEST:
504                 snprintf(buf, sizeof("Reverse ARP Request"), "%s",
505                                 "Reverse ARP Request");
506                 return;
507         case ARP_OP_REVREPLY:
508                 snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
509                                 "Reverse ARP Reply");
510                 return;
511         case ARP_OP_INVREQUEST:
512                 snprintf(buf, sizeof("Peer Identify Request"), "%s",
513                                 "Peer Identify Request");
514                 return;
515         case ARP_OP_INVREPLY:
516                 snprintf(buf, sizeof("Peer Identify Reply"), "%s",
517                                 "Peer Identify Reply");
518                 return;
519         default:
520                 break;
521         }
522         snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
523         return;
524 }
525 #endif
526 #define MaxIPv4String   16
527 static void
528 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
529 {
530         uint32_t ipv4_addr;
531
532         ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
533         snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
534                 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
535                 ipv4_addr & 0xFF);
536 }
537
538 #define MAX_CLIENTS_NUMBER      128
539 uint8_t active_clients;
540 struct client_stats_t {
541         uint16_t port;
542         uint32_t ipv4_addr;
543         uint32_t ipv4_rx_packets;
544         uint32_t ipv4_tx_packets;
545 };
546 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
547
548 static void
549 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
550 {
551         int i = 0;
552
553         for (; i < MAX_CLIENTS_NUMBER; i++)     {
554                 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port))      {
555                         /* Just update RX packets number for this client */
556                         if (TXorRXindicator == &burstnumberRX)
557                                 client_stats[i].ipv4_rx_packets++;
558                         else
559                                 client_stats[i].ipv4_tx_packets++;
560                         return;
561                 }
562         }
563         /* We have a new client. Insert him to the table, and increment stats */
564         if (TXorRXindicator == &burstnumberRX)
565                 client_stats[active_clients].ipv4_rx_packets++;
566         else
567                 client_stats[active_clients].ipv4_tx_packets++;
568         client_stats[active_clients].ipv4_addr = addr;
569         client_stats[active_clients].port = port;
570         active_clients++;
571
572 }
573
574 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
575 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber)     \
576                 RTE_LOG(DEBUG, PMD, \
577                 "%s " \
578                 "port:%d " \
579                 "SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
580                 "SrcIP:%s " \
581                 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
582                 "DstIP:%s " \
583                 "%s " \
584                 "%d\n", \
585                 info, \
586                 port, \
587                 eth_h->s_addr.addr_bytes[0], \
588                 eth_h->s_addr.addr_bytes[1], \
589                 eth_h->s_addr.addr_bytes[2], \
590                 eth_h->s_addr.addr_bytes[3], \
591                 eth_h->s_addr.addr_bytes[4], \
592                 eth_h->s_addr.addr_bytes[5], \
593                 src_ip, \
594                 eth_h->d_addr.addr_bytes[0], \
595                 eth_h->d_addr.addr_bytes[1], \
596                 eth_h->d_addr.addr_bytes[2], \
597                 eth_h->d_addr.addr_bytes[3], \
598                 eth_h->d_addr.addr_bytes[4], \
599                 eth_h->d_addr.addr_bytes[5], \
600                 dst_ip, \
601                 arp_op, \
602                 ++burstnumber)
603 #endif
604
605 static void
606 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
607                 uint16_t port, uint32_t __attribute__((unused)) *burstnumber)
608 {
609         struct ipv4_hdr *ipv4_h;
610 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
611         struct arp_hdr *arp_h;
612         char dst_ip[16];
613         char ArpOp[24];
614         char buf[16];
615 #endif
616         char src_ip[16];
617
618         uint16_t ether_type = eth_h->ether_type;
619         uint16_t offset = get_vlan_offset(eth_h, &ether_type);
620
621 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
622         snprintf(buf, 16, "%s", info);
623 #endif
624
625         if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
626                 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
627                 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
628 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
629                 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
630                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
631 #endif
632                 update_client_stats(ipv4_h->src_addr, port, burstnumber);
633         }
634 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
635         else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
636                 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
637                 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
638                 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
639                 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
640                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
641         }
642 #endif
643 }
644 #endif
645
646 static uint16_t
647 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
648 {
649         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
650         struct bond_dev_private *internals = bd_tx_q->dev_private;
651         struct ether_hdr *eth_h;
652         uint16_t ether_type, offset;
653         uint16_t nb_recv_pkts;
654         int i;
655
656         nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
657
658         for (i = 0; i < nb_recv_pkts; i++) {
659                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
660                 ether_type = eth_h->ether_type;
661                 offset = get_vlan_offset(eth_h, &ether_type);
662
663                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
664 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
665                         mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
666 #endif
667                         bond_mode_alb_arp_recv(eth_h, offset, internals);
668                 }
669 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
670                 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
671                         mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
672 #endif
673         }
674
675         return nb_recv_pkts;
676 }
677
678 static uint16_t
679 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
680                 uint16_t nb_pkts)
681 {
682         struct bond_dev_private *internals;
683         struct bond_tx_queue *bd_tx_q;
684
685         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
686         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
687
688         uint16_t num_of_slaves;
689         uint16_t slaves[RTE_MAX_ETHPORTS];
690
691         uint16_t num_tx_total = 0, num_tx_slave;
692
693         static int slave_idx = 0;
694         int i, cslave_idx = 0, tx_fail_total = 0;
695
696         bd_tx_q = (struct bond_tx_queue *)queue;
697         internals = bd_tx_q->dev_private;
698
699         /* Copy slave list to protect against slave up/down changes during tx
700          * bursting */
701         num_of_slaves = internals->active_slave_count;
702         memcpy(slaves, internals->active_slaves,
703                         sizeof(internals->active_slaves[0]) * num_of_slaves);
704
705         if (num_of_slaves < 1)
706                 return num_tx_total;
707
708         /* Populate slaves mbuf with which packets are to be sent on it  */
709         for (i = 0; i < nb_pkts; i++) {
710                 cslave_idx = (slave_idx + i) % num_of_slaves;
711                 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
712         }
713
714         /* increment current slave index so the next call to tx burst starts on the
715          * next slave */
716         slave_idx = ++cslave_idx;
717
718         /* Send packet burst on each slave device */
719         for (i = 0; i < num_of_slaves; i++) {
720                 if (slave_nb_pkts[i] > 0) {
721                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
722                                         slave_bufs[i], slave_nb_pkts[i]);
723
724                         /* if tx burst fails move packets to end of bufs */
725                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
726                                 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
727
728                                 tx_fail_total += tx_fail_slave;
729
730                                 memcpy(&bufs[nb_pkts - tx_fail_total],
731                                                 &slave_bufs[i][num_tx_slave],
732                                                 tx_fail_slave * sizeof(bufs[0]));
733                         }
734                         num_tx_total += num_tx_slave;
735                 }
736         }
737
738         return num_tx_total;
739 }
740
741 static uint16_t
742 bond_ethdev_tx_burst_active_backup(void *queue,
743                 struct rte_mbuf **bufs, uint16_t nb_pkts)
744 {
745         struct bond_dev_private *internals;
746         struct bond_tx_queue *bd_tx_q;
747
748         bd_tx_q = (struct bond_tx_queue *)queue;
749         internals = bd_tx_q->dev_private;
750
751         if (internals->active_slave_count < 1)
752                 return 0;
753
754         return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
755                         bufs, nb_pkts);
756 }
757
758 static inline uint16_t
759 ether_hash(struct ether_hdr *eth_hdr)
760 {
761         unaligned_uint16_t *word_src_addr =
762                 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
763         unaligned_uint16_t *word_dst_addr =
764                 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
765
766         return (word_src_addr[0] ^ word_dst_addr[0]) ^
767                         (word_src_addr[1] ^ word_dst_addr[1]) ^
768                         (word_src_addr[2] ^ word_dst_addr[2]);
769 }
770
771 static inline uint32_t
772 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
773 {
774         return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
775 }
776
777 static inline uint32_t
778 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
779 {
780         unaligned_uint32_t *word_src_addr =
781                 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
782         unaligned_uint32_t *word_dst_addr =
783                 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
784
785         return (word_src_addr[0] ^ word_dst_addr[0]) ^
786                         (word_src_addr[1] ^ word_dst_addr[1]) ^
787                         (word_src_addr[2] ^ word_dst_addr[2]) ^
788                         (word_src_addr[3] ^ word_dst_addr[3]);
789 }
790
791 uint16_t
792 xmit_l2_hash(const struct rte_mbuf *buf, uint8_t slave_count)
793 {
794         struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
795
796         uint32_t hash = ether_hash(eth_hdr);
797
798         return (hash ^= hash >> 8) % slave_count;
799 }
800
801 uint16_t
802 xmit_l23_hash(const struct rte_mbuf *buf, uint8_t slave_count)
803 {
804         struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
805         uint16_t proto = eth_hdr->ether_type;
806         size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
807         uint32_t hash, l3hash = 0;
808
809         hash = ether_hash(eth_hdr);
810
811         if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
812                 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
813                                 ((char *)(eth_hdr + 1) + vlan_offset);
814                 l3hash = ipv4_hash(ipv4_hdr);
815
816         } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
817                 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
818                                 ((char *)(eth_hdr + 1) + vlan_offset);
819                 l3hash = ipv6_hash(ipv6_hdr);
820         }
821
822         hash = hash ^ l3hash;
823         hash ^= hash >> 16;
824         hash ^= hash >> 8;
825
826         return hash % slave_count;
827 }
828
829 uint16_t
830 xmit_l34_hash(const struct rte_mbuf *buf, uint8_t slave_count)
831 {
832         struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
833         uint16_t proto = eth_hdr->ether_type;
834         size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
835
836         struct udp_hdr *udp_hdr = NULL;
837         struct tcp_hdr *tcp_hdr = NULL;
838         uint32_t hash, l3hash = 0, l4hash = 0;
839
840         if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
841                 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
842                                 ((char *)(eth_hdr + 1) + vlan_offset);
843                 size_t ip_hdr_offset;
844
845                 l3hash = ipv4_hash(ipv4_hdr);
846
847                 /* there is no L4 header in fragmented packet */
848                 if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr) == 0)) {
849                         ip_hdr_offset = (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) *
850                                         IPV4_IHL_MULTIPLIER;
851
852                         if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
853                                 tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr +
854                                                 ip_hdr_offset);
855                                 l4hash = HASH_L4_PORTS(tcp_hdr);
856                         } else if (ipv4_hdr->next_proto_id == IPPROTO_UDP) {
857                                 udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr +
858                                                 ip_hdr_offset);
859                                 l4hash = HASH_L4_PORTS(udp_hdr);
860                         }
861                 }
862         } else if  (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
863                 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
864                                 ((char *)(eth_hdr + 1) + vlan_offset);
865                 l3hash = ipv6_hash(ipv6_hdr);
866
867                 if (ipv6_hdr->proto == IPPROTO_TCP) {
868                         tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
869                         l4hash = HASH_L4_PORTS(tcp_hdr);
870                 } else if (ipv6_hdr->proto == IPPROTO_UDP) {
871                         udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
872                         l4hash = HASH_L4_PORTS(udp_hdr);
873                 }
874         }
875
876         hash = l3hash ^ l4hash;
877         hash ^= hash >> 16;
878         hash ^= hash >> 8;
879
880         return hash % slave_count;
881 }
882
883 struct bwg_slave {
884         uint64_t bwg_left_int;
885         uint64_t bwg_left_remainder;
886         uint8_t slave;
887 };
888
889 void
890 bond_tlb_activate_slave(struct bond_dev_private *internals) {
891         int i;
892
893         for (i = 0; i < internals->active_slave_count; i++) {
894                 tlb_last_obytets[internals->active_slaves[i]] = 0;
895         }
896 }
897
898 static int
899 bandwidth_cmp(const void *a, const void *b)
900 {
901         const struct bwg_slave *bwg_a = a;
902         const struct bwg_slave *bwg_b = b;
903         int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
904         int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
905                         (int64_t)bwg_a->bwg_left_remainder;
906         if (diff > 0)
907                 return 1;
908         else if (diff < 0)
909                 return -1;
910         else if (diff2 > 0)
911                 return 1;
912         else if (diff2 < 0)
913                 return -1;
914         else
915                 return 0;
916 }
917
918 static void
919 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
920                 struct bwg_slave *bwg_slave)
921 {
922         struct rte_eth_link link_status;
923
924         rte_eth_link_get_nowait(port_id, &link_status);
925         uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
926         if (link_bwg == 0)
927                 return;
928         link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
929         bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
930         bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
931 }
932
933 static void
934 bond_ethdev_update_tlb_slave_cb(void *arg)
935 {
936         struct bond_dev_private *internals = arg;
937         struct rte_eth_stats slave_stats;
938         struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
939         uint8_t slave_count;
940         uint64_t tx_bytes;
941
942         uint8_t update_stats = 0;
943         uint8_t i, slave_id;
944
945         internals->slave_update_idx++;
946
947
948         if (internals->slave_update_idx >= REORDER_PERIOD_MS)
949                 update_stats = 1;
950
951         for (i = 0; i < internals->active_slave_count; i++) {
952                 slave_id = internals->active_slaves[i];
953                 rte_eth_stats_get(slave_id, &slave_stats);
954                 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
955                 bandwidth_left(slave_id, tx_bytes,
956                                 internals->slave_update_idx, &bwg_array[i]);
957                 bwg_array[i].slave = slave_id;
958
959                 if (update_stats) {
960                         tlb_last_obytets[slave_id] = slave_stats.obytes;
961                 }
962         }
963
964         if (update_stats == 1)
965                 internals->slave_update_idx = 0;
966
967         slave_count = i;
968         qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
969         for (i = 0; i < slave_count; i++)
970                 internals->tlb_slaves_order[i] = bwg_array[i].slave;
971
972         rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
973                         (struct bond_dev_private *)internals);
974 }
975
976 static uint16_t
977 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
978 {
979         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
980         struct bond_dev_private *internals = bd_tx_q->dev_private;
981
982         struct rte_eth_dev *primary_port =
983                         &rte_eth_devices[internals->primary_port];
984         uint16_t num_tx_total = 0;
985         uint16_t i, j;
986
987         uint16_t num_of_slaves = internals->active_slave_count;
988         uint16_t slaves[RTE_MAX_ETHPORTS];
989
990         struct ether_hdr *ether_hdr;
991         struct ether_addr primary_slave_addr;
992         struct ether_addr active_slave_addr;
993
994         if (num_of_slaves < 1)
995                 return num_tx_total;
996
997         memcpy(slaves, internals->tlb_slaves_order,
998                                 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
999
1000
1001         ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
1002
1003         if (nb_pkts > 3) {
1004                 for (i = 0; i < 3; i++)
1005                         rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
1006         }
1007
1008         for (i = 0; i < num_of_slaves; i++) {
1009                 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
1010                 for (j = num_tx_total; j < nb_pkts; j++) {
1011                         if (j + 3 < nb_pkts)
1012                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
1013
1014                         ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
1015                         if (is_same_ether_addr(&ether_hdr->s_addr, &primary_slave_addr))
1016                                 ether_addr_copy(&active_slave_addr, &ether_hdr->s_addr);
1017 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1018                                         mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
1019 #endif
1020                 }
1021
1022                 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1023                                 bufs + num_tx_total, nb_pkts - num_tx_total);
1024
1025                 if (num_tx_total == nb_pkts)
1026                         break;
1027         }
1028
1029         return num_tx_total;
1030 }
1031
1032 void
1033 bond_tlb_disable(struct bond_dev_private *internals)
1034 {
1035         rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
1036 }
1037
1038 void
1039 bond_tlb_enable(struct bond_dev_private *internals)
1040 {
1041         bond_ethdev_update_tlb_slave_cb(internals);
1042 }
1043
1044 static uint16_t
1045 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1046 {
1047         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1048         struct bond_dev_private *internals = bd_tx_q->dev_private;
1049
1050         struct ether_hdr *eth_h;
1051         uint16_t ether_type, offset;
1052
1053         struct client_data *client_info;
1054
1055         /*
1056          * We create transmit buffers for every slave and one additional to send
1057          * through tlb. In worst case every packet will be send on one port.
1058          */
1059         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
1060         uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
1061
1062         /*
1063          * We create separate transmit buffers for update packets as they won't
1064          * be counted in num_tx_total.
1065          */
1066         struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1067         uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1068
1069         struct rte_mbuf *upd_pkt;
1070         size_t pkt_size;
1071
1072         uint16_t num_send, num_not_send = 0;
1073         uint16_t num_tx_total = 0;
1074         uint16_t slave_idx;
1075
1076         int i, j;
1077
1078         /* Search tx buffer for ARP packets and forward them to alb */
1079         for (i = 0; i < nb_pkts; i++) {
1080                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
1081                 ether_type = eth_h->ether_type;
1082                 offset = get_vlan_offset(eth_h, &ether_type);
1083
1084                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
1085                         slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1086
1087                         /* Change src mac in eth header */
1088                         rte_eth_macaddr_get(slave_idx, &eth_h->s_addr);
1089
1090                         /* Add packet to slave tx buffer */
1091                         slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1092                         slave_bufs_pkts[slave_idx]++;
1093                 } else {
1094                         /* If packet is not ARP, send it with TLB policy */
1095                         slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1096                                         bufs[i];
1097                         slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1098                 }
1099         }
1100
1101         /* Update connected client ARP tables */
1102         if (internals->mode6.ntt) {
1103                 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1104                         client_info = &internals->mode6.client_table[i];
1105
1106                         if (client_info->in_use) {
1107                                 /* Allocate new packet to send ARP update on current slave */
1108                                 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1109                                 if (upd_pkt == NULL) {
1110                                         RTE_LOG(ERR, PMD, "Failed to allocate ARP packet from pool\n");
1111                                         continue;
1112                                 }
1113                                 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
1114                                                 + client_info->vlan_count * sizeof(struct vlan_hdr);
1115                                 upd_pkt->data_len = pkt_size;
1116                                 upd_pkt->pkt_len = pkt_size;
1117
1118                                 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1119                                                 internals);
1120
1121                                 /* Add packet to update tx buffer */
1122                                 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1123                                 update_bufs_pkts[slave_idx]++;
1124                         }
1125                 }
1126                 internals->mode6.ntt = 0;
1127         }
1128
1129         /* Send ARP packets on proper slaves */
1130         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1131                 if (slave_bufs_pkts[i] > 0) {
1132                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1133                                         slave_bufs[i], slave_bufs_pkts[i]);
1134                         for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1135                                 bufs[nb_pkts - 1 - num_not_send - j] =
1136                                                 slave_bufs[i][nb_pkts - 1 - j];
1137                         }
1138
1139                         num_tx_total += num_send;
1140                         num_not_send += slave_bufs_pkts[i] - num_send;
1141
1142 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1143         /* Print TX stats including update packets */
1144                         for (j = 0; j < slave_bufs_pkts[i]; j++) {
1145                                 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
1146                                 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1147                         }
1148 #endif
1149                 }
1150         }
1151
1152         /* Send update packets on proper slaves */
1153         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1154                 if (update_bufs_pkts[i] > 0) {
1155                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1156                                         update_bufs_pkts[i]);
1157                         for (j = num_send; j < update_bufs_pkts[i]; j++) {
1158                                 rte_pktmbuf_free(update_bufs[i][j]);
1159                         }
1160 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1161                         for (j = 0; j < update_bufs_pkts[i]; j++) {
1162                                 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
1163                                 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1164                         }
1165 #endif
1166                 }
1167         }
1168
1169         /* Send non-ARP packets using tlb policy */
1170         if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1171                 num_send = bond_ethdev_tx_burst_tlb(queue,
1172                                 slave_bufs[RTE_MAX_ETHPORTS],
1173                                 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1174
1175                 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1176                         bufs[nb_pkts - 1 - num_not_send - j] =
1177                                         slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1178                 }
1179
1180                 num_tx_total += num_send;
1181         }
1182
1183         return num_tx_total;
1184 }
1185
1186 static uint16_t
1187 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1188                 uint16_t nb_pkts)
1189 {
1190         struct bond_dev_private *internals;
1191         struct bond_tx_queue *bd_tx_q;
1192
1193         uint16_t num_of_slaves;
1194         uint16_t slaves[RTE_MAX_ETHPORTS];
1195
1196         uint16_t num_tx_total = 0, num_tx_slave = 0, tx_fail_total = 0;
1197
1198         int i, op_slave_id;
1199
1200         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
1201         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
1202
1203         bd_tx_q = (struct bond_tx_queue *)queue;
1204         internals = bd_tx_q->dev_private;
1205
1206         /* Copy slave list to protect against slave up/down changes during tx
1207          * bursting */
1208         num_of_slaves = internals->active_slave_count;
1209         memcpy(slaves, internals->active_slaves,
1210                         sizeof(internals->active_slaves[0]) * num_of_slaves);
1211
1212         if (num_of_slaves < 1)
1213                 return num_tx_total;
1214
1215         /* Populate slaves mbuf with the packets which are to be sent on it  */
1216         for (i = 0; i < nb_pkts; i++) {
1217                 /* Select output slave using hash based on xmit policy */
1218                 op_slave_id = internals->xmit_hash(bufs[i], num_of_slaves);
1219
1220                 /* Populate slave mbuf arrays with mbufs for that slave */
1221                 slave_bufs[op_slave_id][slave_nb_pkts[op_slave_id]++] = bufs[i];
1222         }
1223
1224         /* Send packet burst on each slave device */
1225         for (i = 0; i < num_of_slaves; i++) {
1226                 if (slave_nb_pkts[i] > 0) {
1227                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1228                                         slave_bufs[i], slave_nb_pkts[i]);
1229
1230                         /* if tx burst fails move packets to end of bufs */
1231                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
1232                                 int slave_tx_fail_count = slave_nb_pkts[i] - num_tx_slave;
1233
1234                                 tx_fail_total += slave_tx_fail_count;
1235                                 memcpy(&bufs[nb_pkts - tx_fail_total],
1236                                                 &slave_bufs[i][num_tx_slave],
1237                                                 slave_tx_fail_count * sizeof(bufs[0]));
1238                         }
1239
1240                         num_tx_total += num_tx_slave;
1241                 }
1242         }
1243
1244         return num_tx_total;
1245 }
1246
1247 static uint16_t
1248 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1249                 uint16_t nb_pkts)
1250 {
1251         struct bond_dev_private *internals;
1252         struct bond_tx_queue *bd_tx_q;
1253
1254         uint16_t num_of_slaves;
1255         uint16_t slaves[RTE_MAX_ETHPORTS];
1256          /* positions in slaves, not ID */
1257         uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
1258         uint8_t distributing_count;
1259
1260         uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
1261         uint16_t i, j, op_slave_idx;
1262         const uint16_t buffs_size = nb_pkts + BOND_MODE_8023AX_SLAVE_TX_PKTS + 1;
1263
1264         /* Allocate additional packets in case 8023AD mode. */
1265         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][buffs_size];
1266         void *slow_pkts[BOND_MODE_8023AX_SLAVE_TX_PKTS] = { NULL };
1267
1268         /* Total amount of packets in slave_bufs */
1269         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
1270         /* Slow packets placed in each slave */
1271         uint8_t slave_slow_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
1272
1273         bd_tx_q = (struct bond_tx_queue *)queue;
1274         internals = bd_tx_q->dev_private;
1275
1276         /* Copy slave list to protect against slave up/down changes during tx
1277          * bursting */
1278         num_of_slaves = internals->active_slave_count;
1279         if (num_of_slaves < 1)
1280                 return num_tx_total;
1281
1282         memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) * num_of_slaves);
1283
1284         distributing_count = 0;
1285         for (i = 0; i < num_of_slaves; i++) {
1286                 struct port *port = &mode_8023ad_ports[slaves[i]];
1287
1288                 slave_slow_nb_pkts[i] = rte_ring_dequeue_burst(port->tx_ring,
1289                                 slow_pkts, BOND_MODE_8023AX_SLAVE_TX_PKTS,
1290                                 NULL);
1291                 slave_nb_pkts[i] = slave_slow_nb_pkts[i];
1292
1293                 for (j = 0; j < slave_slow_nb_pkts[i]; j++)
1294                         slave_bufs[i][j] = slow_pkts[j];
1295
1296                 if (ACTOR_STATE(port, DISTRIBUTING))
1297                         distributing_offsets[distributing_count++] = i;
1298         }
1299
1300         if (likely(distributing_count > 0)) {
1301                 /* Populate slaves mbuf with the packets which are to be sent on it */
1302                 for (i = 0; i < nb_pkts; i++) {
1303                         /* Select output slave using hash based on xmit policy */
1304                         op_slave_idx = internals->xmit_hash(bufs[i], distributing_count);
1305
1306                         /* Populate slave mbuf arrays with mbufs for that slave. Use only
1307                          * slaves that are currently distributing. */
1308                         uint8_t slave_offset = distributing_offsets[op_slave_idx];
1309                         slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] = bufs[i];
1310                         slave_nb_pkts[slave_offset]++;
1311                 }
1312         }
1313
1314         /* Send packet burst on each slave device */
1315         for (i = 0; i < num_of_slaves; i++) {
1316                 if (slave_nb_pkts[i] == 0)
1317                         continue;
1318
1319                 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1320                                 slave_bufs[i], slave_nb_pkts[i]);
1321
1322                 /* If tx burst fails drop slow packets */
1323                 for ( ; num_tx_slave < slave_slow_nb_pkts[i]; num_tx_slave++)
1324                         rte_pktmbuf_free(slave_bufs[i][num_tx_slave]);
1325
1326                 num_tx_total += num_tx_slave - slave_slow_nb_pkts[i];
1327                 num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
1328
1329                 /* If tx burst fails move packets to end of bufs */
1330                 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
1331                         uint16_t j = nb_pkts - num_tx_fail_total;
1332                         for ( ; num_tx_slave < slave_nb_pkts[i]; j++, num_tx_slave++)
1333                                 bufs[j] = slave_bufs[i][num_tx_slave];
1334                 }
1335         }
1336
1337         return num_tx_total;
1338 }
1339
1340 static uint16_t
1341 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1342                 uint16_t nb_pkts)
1343 {
1344         struct bond_dev_private *internals;
1345         struct bond_tx_queue *bd_tx_q;
1346
1347         uint8_t tx_failed_flag = 0, num_of_slaves;
1348         uint16_t slaves[RTE_MAX_ETHPORTS];
1349
1350         uint16_t max_nb_of_tx_pkts = 0;
1351
1352         int slave_tx_total[RTE_MAX_ETHPORTS];
1353         int i, most_successful_tx_slave = -1;
1354
1355         bd_tx_q = (struct bond_tx_queue *)queue;
1356         internals = bd_tx_q->dev_private;
1357
1358         /* Copy slave list to protect against slave up/down changes during tx
1359          * bursting */
1360         num_of_slaves = internals->active_slave_count;
1361         memcpy(slaves, internals->active_slaves,
1362                         sizeof(internals->active_slaves[0]) * num_of_slaves);
1363
1364         if (num_of_slaves < 1)
1365                 return 0;
1366
1367         /* Increment reference count on mbufs */
1368         for (i = 0; i < nb_pkts; i++)
1369                 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1370
1371         /* Transmit burst on each active slave */
1372         for (i = 0; i < num_of_slaves; i++) {
1373                 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1374                                         bufs, nb_pkts);
1375
1376                 if (unlikely(slave_tx_total[i] < nb_pkts))
1377                         tx_failed_flag = 1;
1378
1379                 /* record the value and slave index for the slave which transmits the
1380                  * maximum number of packets */
1381                 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1382                         max_nb_of_tx_pkts = slave_tx_total[i];
1383                         most_successful_tx_slave = i;
1384                 }
1385         }
1386
1387         /* if slaves fail to transmit packets from burst, the calling application
1388          * is not expected to know about multiple references to packets so we must
1389          * handle failures of all packets except those of the most successful slave
1390          */
1391         if (unlikely(tx_failed_flag))
1392                 for (i = 0; i < num_of_slaves; i++)
1393                         if (i != most_successful_tx_slave)
1394                                 while (slave_tx_total[i] < nb_pkts)
1395                                         rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1396
1397         return max_nb_of_tx_pkts;
1398 }
1399
1400 void
1401 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1402 {
1403         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1404
1405         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1406                 /**
1407                  * If in mode 4 then save the link properties of the first
1408                  * slave, all subsequent slaves must match these properties
1409                  */
1410                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1411
1412                 bond_link->link_autoneg = slave_link->link_autoneg;
1413                 bond_link->link_duplex = slave_link->link_duplex;
1414                 bond_link->link_speed = slave_link->link_speed;
1415         } else {
1416                 /**
1417                  * In any other mode the link properties are set to default
1418                  * values of AUTONEG/DUPLEX
1419                  */
1420                 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1421                 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1422         }
1423 }
1424
1425 int
1426 link_properties_valid(struct rte_eth_dev *ethdev,
1427                 struct rte_eth_link *slave_link)
1428 {
1429         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1430
1431         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1432                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1433
1434                 if (bond_link->link_duplex != slave_link->link_duplex ||
1435                         bond_link->link_autoneg != slave_link->link_autoneg ||
1436                         bond_link->link_speed != slave_link->link_speed)
1437                         return -1;
1438         }
1439
1440         return 0;
1441 }
1442
1443 int
1444 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1445 {
1446         struct ether_addr *mac_addr;
1447
1448         if (eth_dev == NULL) {
1449                 RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__);
1450                 return -1;
1451         }
1452
1453         if (dst_mac_addr == NULL) {
1454                 RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__);
1455                 return -1;
1456         }
1457
1458         mac_addr = eth_dev->data->mac_addrs;
1459
1460         ether_addr_copy(mac_addr, dst_mac_addr);
1461         return 0;
1462 }
1463
1464 int
1465 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1466 {
1467         struct ether_addr *mac_addr;
1468
1469         if (eth_dev == NULL) {
1470                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1471                 return -1;
1472         }
1473
1474         if (new_mac_addr == NULL) {
1475                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1476                 return -1;
1477         }
1478
1479         mac_addr = eth_dev->data->mac_addrs;
1480
1481         /* If new MAC is different to current MAC then update */
1482         if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1483                 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1484
1485         return 0;
1486 }
1487
1488 int
1489 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1490 {
1491         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1492         int i;
1493
1494         /* Update slave devices MAC addresses */
1495         if (internals->slave_count < 1)
1496                 return -1;
1497
1498         switch (internals->mode) {
1499         case BONDING_MODE_ROUND_ROBIN:
1500         case BONDING_MODE_BALANCE:
1501         case BONDING_MODE_BROADCAST:
1502                 for (i = 0; i < internals->slave_count; i++) {
1503                         if (rte_eth_dev_default_mac_addr_set(
1504                                         internals->slaves[i].port_id,
1505                                         bonded_eth_dev->data->mac_addrs)) {
1506                                 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1507                                                 internals->slaves[i].port_id);
1508                                 return -1;
1509                         }
1510                 }
1511                 break;
1512         case BONDING_MODE_8023AD:
1513                 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1514                 break;
1515         case BONDING_MODE_ACTIVE_BACKUP:
1516         case BONDING_MODE_TLB:
1517         case BONDING_MODE_ALB:
1518         default:
1519                 for (i = 0; i < internals->slave_count; i++) {
1520                         if (internals->slaves[i].port_id ==
1521                                         internals->current_primary_port) {
1522                                 if (rte_eth_dev_default_mac_addr_set(
1523                                                 internals->primary_port,
1524                                                 bonded_eth_dev->data->mac_addrs)) {
1525                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1526                                                         internals->current_primary_port);
1527                                         return -1;
1528                                 }
1529                         } else {
1530                                 if (rte_eth_dev_default_mac_addr_set(
1531                                                 internals->slaves[i].port_id,
1532                                                 &internals->slaves[i].persisted_mac_addr)) {
1533                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1534                                                         internals->slaves[i].port_id);
1535                                         return -1;
1536                                 }
1537                         }
1538                 }
1539         }
1540
1541         return 0;
1542 }
1543
1544 int
1545 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1546 {
1547         struct bond_dev_private *internals;
1548
1549         internals = eth_dev->data->dev_private;
1550
1551         switch (mode) {
1552         case BONDING_MODE_ROUND_ROBIN:
1553                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1554                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1555                 break;
1556         case BONDING_MODE_ACTIVE_BACKUP:
1557                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1558                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1559                 break;
1560         case BONDING_MODE_BALANCE:
1561                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1562                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1563                 break;
1564         case BONDING_MODE_BROADCAST:
1565                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1566                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1567                 break;
1568         case BONDING_MODE_8023AD:
1569                 if (bond_mode_8023ad_enable(eth_dev) != 0)
1570                         return -1;
1571
1572                 if (internals->mode4.dedicated_queues.enabled == 0) {
1573                         eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1574                         eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1575                         RTE_LOG(WARNING, PMD,
1576                                 "Using mode 4, it is necessary to do TX burst "
1577                                 "and RX burst at least every 100ms.\n");
1578                 } else {
1579                         /* Use flow director's optimization */
1580                         eth_dev->rx_pkt_burst =
1581                                         bond_ethdev_rx_burst_8023ad_fast_queue;
1582                         eth_dev->tx_pkt_burst =
1583                                         bond_ethdev_tx_burst_8023ad_fast_queue;
1584                 }
1585                 break;
1586         case BONDING_MODE_TLB:
1587                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1588                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1589                 break;
1590         case BONDING_MODE_ALB:
1591                 if (bond_mode_alb_enable(eth_dev) != 0)
1592                         return -1;
1593
1594                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1595                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1596                 break;
1597         default:
1598                 return -1;
1599         }
1600
1601         internals->mode = mode;
1602
1603         return 0;
1604 }
1605
1606
1607 static int
1608 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1609                 struct rte_eth_dev *slave_eth_dev)
1610 {
1611         int errval = 0;
1612         struct bond_dev_private *internals = (struct bond_dev_private *)
1613                 bonded_eth_dev->data->dev_private;
1614         struct port *port = &mode_8023ad_ports[slave_eth_dev->data->port_id];
1615
1616         if (port->slow_pool == NULL) {
1617                 char mem_name[256];
1618                 int slave_id = slave_eth_dev->data->port_id;
1619
1620                 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1621                                 slave_id);
1622                 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1623                         250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1624                         slave_eth_dev->data->numa_node);
1625
1626                 /* Any memory allocation failure in initialization is critical because
1627                  * resources can't be free, so reinitialization is impossible. */
1628                 if (port->slow_pool == NULL) {
1629                         rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1630                                 slave_id, mem_name, rte_strerror(rte_errno));
1631                 }
1632         }
1633
1634         if (internals->mode4.dedicated_queues.enabled == 1) {
1635                 /* Configure slow Rx queue */
1636
1637                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1638                                 internals->mode4.dedicated_queues.rx_qid, 128,
1639                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1640                                 NULL, port->slow_pool);
1641                 if (errval != 0) {
1642                         RTE_BOND_LOG(ERR,
1643                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1644                                         slave_eth_dev->data->port_id,
1645                                         internals->mode4.dedicated_queues.rx_qid,
1646                                         errval);
1647                         return errval;
1648                 }
1649
1650                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1651                                 internals->mode4.dedicated_queues.tx_qid, 512,
1652                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1653                                 NULL);
1654                 if (errval != 0) {
1655                         RTE_BOND_LOG(ERR,
1656                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1657                                 slave_eth_dev->data->port_id,
1658                                 internals->mode4.dedicated_queues.tx_qid,
1659                                 errval);
1660                         return errval;
1661                 }
1662         }
1663         return 0;
1664 }
1665
1666 int
1667 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1668                 struct rte_eth_dev *slave_eth_dev)
1669 {
1670         struct bond_rx_queue *bd_rx_q;
1671         struct bond_tx_queue *bd_tx_q;
1672         uint16_t nb_rx_queues;
1673         uint16_t nb_tx_queues;
1674
1675         int errval;
1676         uint16_t q_id;
1677         struct rte_flow_error flow_error;
1678
1679         struct bond_dev_private *internals = (struct bond_dev_private *)
1680                 bonded_eth_dev->data->dev_private;
1681
1682         /* Stop slave */
1683         rte_eth_dev_stop(slave_eth_dev->data->port_id);
1684
1685         /* Enable interrupts on slave device if supported */
1686         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1687                 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1688
1689         /* If RSS is enabled for bonding, try to enable it for slaves  */
1690         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1691                 if (bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len
1692                                 != 0) {
1693                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1694                                         bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
1695                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1696                                         bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
1697                 } else {
1698                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1699                 }
1700
1701                 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1702                                 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1703                 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1704                                 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1705         }
1706
1707         slave_eth_dev->data->dev_conf.rxmode.hw_vlan_filter =
1708                         bonded_eth_dev->data->dev_conf.rxmode.hw_vlan_filter;
1709
1710         nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1711         nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1712
1713         if (internals->mode == BONDING_MODE_8023AD) {
1714                 if (internals->mode4.dedicated_queues.enabled == 1) {
1715                         nb_rx_queues++;
1716                         nb_tx_queues++;
1717                 }
1718         }
1719
1720         /* Configure device */
1721         errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1722                         nb_rx_queues, nb_tx_queues,
1723                         &(slave_eth_dev->data->dev_conf));
1724         if (errval != 0) {
1725                 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u , err (%d)",
1726                                 slave_eth_dev->data->port_id, errval);
1727                 return errval;
1728         }
1729
1730         /* Setup Rx Queues */
1731         for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1732                 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1733
1734                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1735                                 bd_rx_q->nb_rx_desc,
1736                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1737                                 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1738                 if (errval != 0) {
1739                         RTE_BOND_LOG(ERR,
1740                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1741                                         slave_eth_dev->data->port_id, q_id, errval);
1742                         return errval;
1743                 }
1744         }
1745
1746         /* Setup Tx Queues */
1747         for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1748                 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1749
1750                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1751                                 bd_tx_q->nb_tx_desc,
1752                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1753                                 &bd_tx_q->tx_conf);
1754                 if (errval != 0) {
1755                         RTE_BOND_LOG(ERR,
1756                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1757                                 slave_eth_dev->data->port_id, q_id, errval);
1758                         return errval;
1759                 }
1760         }
1761
1762         if (internals->mode == BONDING_MODE_8023AD &&
1763                         internals->mode4.dedicated_queues.enabled == 1) {
1764                 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1765                                 != 0)
1766                         return errval;
1767
1768                 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1769                                 slave_eth_dev->data->port_id) != 0) {
1770                         RTE_BOND_LOG(ERR,
1771                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1772                                 slave_eth_dev->data->port_id, q_id, errval);
1773                         return -1;
1774                 }
1775
1776                 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1777                         rte_flow_destroy(slave_eth_dev->data->port_id,
1778                                         internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1779                                         &flow_error);
1780
1781                 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1782                                 slave_eth_dev->data->port_id);
1783         }
1784
1785         /* Start device */
1786         errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1787         if (errval != 0) {
1788                 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1789                                 slave_eth_dev->data->port_id, errval);
1790                 return -1;
1791         }
1792
1793         /* If RSS is enabled for bonding, synchronize RETA */
1794         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1795                 int i;
1796                 struct bond_dev_private *internals;
1797
1798                 internals = bonded_eth_dev->data->dev_private;
1799
1800                 for (i = 0; i < internals->slave_count; i++) {
1801                         if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1802                                 errval = rte_eth_dev_rss_reta_update(
1803                                                 slave_eth_dev->data->port_id,
1804                                                 &internals->reta_conf[0],
1805                                                 internals->slaves[i].reta_size);
1806                                 if (errval != 0) {
1807                                         RTE_LOG(WARNING, PMD,
1808                                                         "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1809                                                         " RSS Configuration for bonding may be inconsistent.\n",
1810                                                         slave_eth_dev->data->port_id, errval);
1811                                 }
1812                                 break;
1813                         }
1814                 }
1815         }
1816
1817         /* If lsc interrupt is set, check initial slave's link status */
1818         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1819                 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1820                 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1821                         RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1822                         NULL);
1823         }
1824
1825         return 0;
1826 }
1827
1828 void
1829 slave_remove(struct bond_dev_private *internals,
1830                 struct rte_eth_dev *slave_eth_dev)
1831 {
1832         uint8_t i;
1833
1834         for (i = 0; i < internals->slave_count; i++)
1835                 if (internals->slaves[i].port_id ==
1836                                 slave_eth_dev->data->port_id)
1837                         break;
1838
1839         if (i < (internals->slave_count - 1))
1840                 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1841                                 sizeof(internals->slaves[0]) *
1842                                 (internals->slave_count - i - 1));
1843
1844         internals->slave_count--;
1845
1846         /* force reconfiguration of slave interfaces */
1847         _rte_eth_dev_reset(slave_eth_dev);
1848 }
1849
1850 static void
1851 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1852
1853 void
1854 slave_add(struct bond_dev_private *internals,
1855                 struct rte_eth_dev *slave_eth_dev)
1856 {
1857         struct bond_slave_details *slave_details =
1858                         &internals->slaves[internals->slave_count];
1859
1860         slave_details->port_id = slave_eth_dev->data->port_id;
1861         slave_details->last_link_status = 0;
1862
1863         /* Mark slave devices that don't support interrupts so we can
1864          * compensate when we start the bond
1865          */
1866         if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1867                 slave_details->link_status_poll_enabled = 1;
1868         }
1869
1870         slave_details->link_status_wait_to_complete = 0;
1871         /* clean tlb_last_obytes when adding port for bonding device */
1872         memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1873                         sizeof(struct ether_addr));
1874 }
1875
1876 void
1877 bond_ethdev_primary_set(struct bond_dev_private *internals,
1878                 uint16_t slave_port_id)
1879 {
1880         int i;
1881
1882         if (internals->active_slave_count < 1)
1883                 internals->current_primary_port = slave_port_id;
1884         else
1885                 /* Search bonded device slave ports for new proposed primary port */
1886                 for (i = 0; i < internals->active_slave_count; i++) {
1887                         if (internals->active_slaves[i] == slave_port_id)
1888                                 internals->current_primary_port = slave_port_id;
1889                 }
1890 }
1891
1892 static void
1893 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
1894
1895 static int
1896 bond_ethdev_start(struct rte_eth_dev *eth_dev)
1897 {
1898         struct bond_dev_private *internals;
1899         int i;
1900
1901         /* slave eth dev will be started by bonded device */
1902         if (check_for_bonded_ethdev(eth_dev)) {
1903                 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
1904                                 eth_dev->data->port_id);
1905                 return -1;
1906         }
1907
1908         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
1909         eth_dev->data->dev_started = 1;
1910
1911         internals = eth_dev->data->dev_private;
1912
1913         if (internals->slave_count == 0) {
1914                 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
1915                 goto out_err;
1916         }
1917
1918         if (internals->user_defined_mac == 0) {
1919                 struct ether_addr *new_mac_addr = NULL;
1920
1921                 for (i = 0; i < internals->slave_count; i++)
1922                         if (internals->slaves[i].port_id == internals->primary_port)
1923                                 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
1924
1925                 if (new_mac_addr == NULL)
1926                         goto out_err;
1927
1928                 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
1929                         RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
1930                                         eth_dev->data->port_id);
1931                         goto out_err;
1932                 }
1933         }
1934
1935         /* If bonded device is configure in promiscuous mode then re-apply config */
1936         if (internals->promiscuous_en)
1937                 bond_ethdev_promiscuous_enable(eth_dev);
1938
1939         if (internals->mode == BONDING_MODE_8023AD) {
1940                 if (internals->mode4.dedicated_queues.enabled == 1) {
1941                         internals->mode4.dedicated_queues.rx_qid =
1942                                         eth_dev->data->nb_rx_queues;
1943                         internals->mode4.dedicated_queues.tx_qid =
1944                                         eth_dev->data->nb_tx_queues;
1945                 }
1946         }
1947
1948
1949         /* Reconfigure each slave device if starting bonded device */
1950         for (i = 0; i < internals->slave_count; i++) {
1951                 struct rte_eth_dev *slave_ethdev =
1952                                 &(rte_eth_devices[internals->slaves[i].port_id]);
1953                 if (slave_configure(eth_dev, slave_ethdev) != 0) {
1954                         RTE_BOND_LOG(ERR,
1955                                 "bonded port (%d) failed to reconfigure slave device (%d)",
1956                                 eth_dev->data->port_id,
1957                                 internals->slaves[i].port_id);
1958                         goto out_err;
1959                 }
1960                 /* We will need to poll for link status if any slave doesn't
1961                  * support interrupts
1962                  */
1963                 if (internals->slaves[i].link_status_poll_enabled)
1964                         internals->link_status_polling_enabled = 1;
1965         }
1966
1967         /* start polling if needed */
1968         if (internals->link_status_polling_enabled) {
1969                 rte_eal_alarm_set(
1970                         internals->link_status_polling_interval_ms * 1000,
1971                         bond_ethdev_slave_link_status_change_monitor,
1972                         (void *)&rte_eth_devices[internals->port_id]);
1973         }
1974
1975         /* Update all slave devices MACs*/
1976         if (mac_address_slaves_update(eth_dev) != 0)
1977                 goto out_err;
1978
1979         if (internals->user_defined_primary_port)
1980                 bond_ethdev_primary_set(internals, internals->primary_port);
1981
1982         if (internals->mode == BONDING_MODE_8023AD)
1983                 bond_mode_8023ad_start(eth_dev);
1984
1985         if (internals->mode == BONDING_MODE_TLB ||
1986                         internals->mode == BONDING_MODE_ALB)
1987                 bond_tlb_enable(internals);
1988
1989         return 0;
1990
1991 out_err:
1992         eth_dev->data->dev_started = 0;
1993         return -1;
1994 }
1995
1996 static void
1997 bond_ethdev_free_queues(struct rte_eth_dev *dev)
1998 {
1999         uint8_t i;
2000
2001         if (dev->data->rx_queues != NULL) {
2002                 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2003                         rte_free(dev->data->rx_queues[i]);
2004                         dev->data->rx_queues[i] = NULL;
2005                 }
2006                 dev->data->nb_rx_queues = 0;
2007         }
2008
2009         if (dev->data->tx_queues != NULL) {
2010                 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2011                         rte_free(dev->data->tx_queues[i]);
2012                         dev->data->tx_queues[i] = NULL;
2013                 }
2014                 dev->data->nb_tx_queues = 0;
2015         }
2016 }
2017
2018 void
2019 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2020 {
2021         struct bond_dev_private *internals = eth_dev->data->dev_private;
2022         uint8_t i;
2023
2024         if (internals->mode == BONDING_MODE_8023AD) {
2025                 struct port *port;
2026                 void *pkt = NULL;
2027
2028                 bond_mode_8023ad_stop(eth_dev);
2029
2030                 /* Discard all messages to/from mode 4 state machines */
2031                 for (i = 0; i < internals->active_slave_count; i++) {
2032                         port = &mode_8023ad_ports[internals->active_slaves[i]];
2033
2034                         RTE_ASSERT(port->rx_ring != NULL);
2035                         while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2036                                 rte_pktmbuf_free(pkt);
2037
2038                         RTE_ASSERT(port->tx_ring != NULL);
2039                         while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2040                                 rte_pktmbuf_free(pkt);
2041                 }
2042         }
2043
2044         if (internals->mode == BONDING_MODE_TLB ||
2045                         internals->mode == BONDING_MODE_ALB) {
2046                 bond_tlb_disable(internals);
2047                 for (i = 0; i < internals->active_slave_count; i++)
2048                         tlb_last_obytets[internals->active_slaves[i]] = 0;
2049         }
2050
2051         internals->link_status_polling_enabled = 0;
2052         for (i = 0; i < internals->slave_count; i++)
2053                 internals->slaves[i].last_link_status = 0;
2054
2055         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2056         eth_dev->data->dev_started = 0;
2057 }
2058
2059 void
2060 bond_ethdev_close(struct rte_eth_dev *dev)
2061 {
2062         struct bond_dev_private *internals = dev->data->dev_private;
2063         uint8_t bond_port_id = internals->port_id;
2064         int skipped = 0;
2065
2066         RTE_LOG(INFO, EAL, "Closing bonded device %s\n", dev->device->name);
2067         while (internals->slave_count != skipped) {
2068                 uint16_t port_id = internals->slaves[skipped].port_id;
2069
2070                 rte_eth_dev_stop(port_id);
2071
2072                 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2073                         RTE_LOG(ERR, EAL,
2074                                 "Failed to remove port %d from bonded device "
2075                                 "%s\n", port_id, dev->device->name);
2076                         skipped++;
2077                 }
2078         }
2079         bond_ethdev_free_queues(dev);
2080         rte_bitmap_reset(internals->vlan_filter_bmp);
2081 }
2082
2083 /* forward declaration */
2084 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2085
2086 static void
2087 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2088 {
2089         struct bond_dev_private *internals = dev->data->dev_private;
2090
2091         uint16_t max_nb_rx_queues = UINT16_MAX;
2092         uint16_t max_nb_tx_queues = UINT16_MAX;
2093
2094         dev_info->max_mac_addrs = 1;
2095
2096         dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2097                         internals->candidate_max_rx_pktlen :
2098                         ETHER_MAX_JUMBO_FRAME_LEN;
2099
2100         /* Max number of tx/rx queues that the bonded device can support is the
2101          * minimum values of the bonded slaves, as all slaves must be capable
2102          * of supporting the same number of tx/rx queues.
2103          */
2104         if (internals->slave_count > 0) {
2105                 struct rte_eth_dev_info slave_info;
2106                 uint8_t idx;
2107
2108                 for (idx = 0; idx < internals->slave_count; idx++) {
2109                         rte_eth_dev_info_get(internals->slaves[idx].port_id,
2110                                         &slave_info);
2111
2112                         if (slave_info.max_rx_queues < max_nb_rx_queues)
2113                                 max_nb_rx_queues = slave_info.max_rx_queues;
2114
2115                         if (slave_info.max_tx_queues < max_nb_tx_queues)
2116                                 max_nb_tx_queues = slave_info.max_tx_queues;
2117                 }
2118         }
2119
2120         dev_info->max_rx_queues = max_nb_rx_queues;
2121         dev_info->max_tx_queues = max_nb_tx_queues;
2122
2123         /**
2124          * If dedicated hw queues enabled for link bonding device in LACP mode
2125          * then we need to reduce the maximum number of data path queues by 1.
2126          */
2127         if (internals->mode == BONDING_MODE_8023AD &&
2128                 internals->mode4.dedicated_queues.enabled == 1) {
2129                 dev_info->max_rx_queues--;
2130                 dev_info->max_tx_queues--;
2131         }
2132
2133         dev_info->min_rx_bufsize = 0;
2134
2135         dev_info->rx_offload_capa = internals->rx_offload_capa;
2136         dev_info->tx_offload_capa = internals->tx_offload_capa;
2137         dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2138
2139         dev_info->reta_size = internals->reta_size;
2140 }
2141
2142 static int
2143 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2144 {
2145         int res;
2146         uint16_t i;
2147         struct bond_dev_private *internals = dev->data->dev_private;
2148
2149         /* don't do this while a slave is being added */
2150         rte_spinlock_lock(&internals->lock);
2151
2152         if (on)
2153                 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2154         else
2155                 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2156
2157         for (i = 0; i < internals->slave_count; i++) {
2158                 uint16_t port_id = internals->slaves[i].port_id;
2159
2160                 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2161                 if (res == ENOTSUP)
2162                         RTE_LOG(WARNING, PMD,
2163                                 "Setting VLAN filter on slave port %u not supported.\n",
2164                                 port_id);
2165         }
2166
2167         rte_spinlock_unlock(&internals->lock);
2168         return 0;
2169 }
2170
2171 static int
2172 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2173                 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2174                 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2175 {
2176         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2177                         rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2178                                         0, dev->data->numa_node);
2179         if (bd_rx_q == NULL)
2180                 return -1;
2181
2182         bd_rx_q->queue_id = rx_queue_id;
2183         bd_rx_q->dev_private = dev->data->dev_private;
2184
2185         bd_rx_q->nb_rx_desc = nb_rx_desc;
2186
2187         memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2188         bd_rx_q->mb_pool = mb_pool;
2189
2190         dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2191
2192         return 0;
2193 }
2194
2195 static int
2196 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2197                 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2198                 const struct rte_eth_txconf *tx_conf)
2199 {
2200         struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
2201                         rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2202                                         0, dev->data->numa_node);
2203
2204         if (bd_tx_q == NULL)
2205                 return -1;
2206
2207         bd_tx_q->queue_id = tx_queue_id;
2208         bd_tx_q->dev_private = dev->data->dev_private;
2209
2210         bd_tx_q->nb_tx_desc = nb_tx_desc;
2211         memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2212
2213         dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2214
2215         return 0;
2216 }
2217
2218 static void
2219 bond_ethdev_rx_queue_release(void *queue)
2220 {
2221         if (queue == NULL)
2222                 return;
2223
2224         rte_free(queue);
2225 }
2226
2227 static void
2228 bond_ethdev_tx_queue_release(void *queue)
2229 {
2230         if (queue == NULL)
2231                 return;
2232
2233         rte_free(queue);
2234 }
2235
2236 static void
2237 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2238 {
2239         struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2240         struct bond_dev_private *internals;
2241
2242         /* Default value for polling slave found is true as we don't want to
2243          * disable the polling thread if we cannot get the lock */
2244         int i, polling_slave_found = 1;
2245
2246         if (cb_arg == NULL)
2247                 return;
2248
2249         bonded_ethdev = (struct rte_eth_dev *)cb_arg;
2250         internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
2251
2252         if (!bonded_ethdev->data->dev_started ||
2253                 !internals->link_status_polling_enabled)
2254                 return;
2255
2256         /* If device is currently being configured then don't check slaves link
2257          * status, wait until next period */
2258         if (rte_spinlock_trylock(&internals->lock)) {
2259                 if (internals->slave_count > 0)
2260                         polling_slave_found = 0;
2261
2262                 for (i = 0; i < internals->slave_count; i++) {
2263                         if (!internals->slaves[i].link_status_poll_enabled)
2264                                 continue;
2265
2266                         slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2267                         polling_slave_found = 1;
2268
2269                         /* Update slave link status */
2270                         (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2271                                         internals->slaves[i].link_status_wait_to_complete);
2272
2273                         /* if link status has changed since last checked then call lsc
2274                          * event callback */
2275                         if (slave_ethdev->data->dev_link.link_status !=
2276                                         internals->slaves[i].last_link_status) {
2277                                 internals->slaves[i].last_link_status =
2278                                                 slave_ethdev->data->dev_link.link_status;
2279
2280                                 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2281                                                 RTE_ETH_EVENT_INTR_LSC,
2282                                                 &bonded_ethdev->data->port_id,
2283                                                 NULL);
2284                         }
2285                 }
2286                 rte_spinlock_unlock(&internals->lock);
2287         }
2288
2289         if (polling_slave_found)
2290                 /* Set alarm to continue monitoring link status of slave ethdev's */
2291                 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2292                                 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2293 }
2294
2295 static int
2296 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2297 {
2298         void (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2299
2300         struct bond_dev_private *bond_ctx;
2301         struct rte_eth_link slave_link;
2302
2303         uint32_t idx;
2304
2305         bond_ctx = ethdev->data->dev_private;
2306
2307         ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2308
2309         if (ethdev->data->dev_started == 0 ||
2310                         bond_ctx->active_slave_count == 0) {
2311                 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2312                 return 0;
2313         }
2314
2315         ethdev->data->dev_link.link_status = ETH_LINK_UP;
2316
2317         if (wait_to_complete)
2318                 link_update = rte_eth_link_get;
2319         else
2320                 link_update = rte_eth_link_get_nowait;
2321
2322         switch (bond_ctx->mode) {
2323         case BONDING_MODE_BROADCAST:
2324                 /**
2325                  * Setting link speed to UINT32_MAX to ensure we pick up the
2326                  * value of the first active slave
2327                  */
2328                 ethdev->data->dev_link.link_speed = UINT32_MAX;
2329
2330                 /**
2331                  * link speed is minimum value of all the slaves link speed as
2332                  * packet loss will occur on this slave if transmission at rates
2333                  * greater than this are attempted
2334                  */
2335                 for (idx = 1; idx < bond_ctx->active_slave_count; idx++) {
2336                         link_update(bond_ctx->active_slaves[0], &slave_link);
2337
2338                         if (slave_link.link_speed <
2339                                         ethdev->data->dev_link.link_speed)
2340                                 ethdev->data->dev_link.link_speed =
2341                                                 slave_link.link_speed;
2342                 }
2343                 break;
2344         case BONDING_MODE_ACTIVE_BACKUP:
2345                 /* Current primary slave */
2346                 link_update(bond_ctx->current_primary_port, &slave_link);
2347
2348                 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2349                 break;
2350         case BONDING_MODE_8023AD:
2351                 ethdev->data->dev_link.link_autoneg =
2352                                 bond_ctx->mode4.slave_link.link_autoneg;
2353                 ethdev->data->dev_link.link_duplex =
2354                                 bond_ctx->mode4.slave_link.link_duplex;
2355                 /* fall through to update link speed */
2356         case BONDING_MODE_ROUND_ROBIN:
2357         case BONDING_MODE_BALANCE:
2358         case BONDING_MODE_TLB:
2359         case BONDING_MODE_ALB:
2360         default:
2361                 /**
2362                  * In theses mode the maximum theoretical link speed is the sum
2363                  * of all the slaves
2364                  */
2365                 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2366
2367                 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2368                         link_update(bond_ctx->active_slaves[idx], &slave_link);
2369
2370                         ethdev->data->dev_link.link_speed +=
2371                                         slave_link.link_speed;
2372                 }
2373         }
2374
2375
2376         return 0;
2377 }
2378
2379
2380 static int
2381 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2382 {
2383         struct bond_dev_private *internals = dev->data->dev_private;
2384         struct rte_eth_stats slave_stats;
2385         int i, j;
2386
2387         for (i = 0; i < internals->slave_count; i++) {
2388                 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2389
2390                 stats->ipackets += slave_stats.ipackets;
2391                 stats->opackets += slave_stats.opackets;
2392                 stats->ibytes += slave_stats.ibytes;
2393                 stats->obytes += slave_stats.obytes;
2394                 stats->imissed += slave_stats.imissed;
2395                 stats->ierrors += slave_stats.ierrors;
2396                 stats->oerrors += slave_stats.oerrors;
2397                 stats->rx_nombuf += slave_stats.rx_nombuf;
2398
2399                 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2400                         stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2401                         stats->q_opackets[j] += slave_stats.q_opackets[j];
2402                         stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2403                         stats->q_obytes[j] += slave_stats.q_obytes[j];
2404                         stats->q_errors[j] += slave_stats.q_errors[j];
2405                 }
2406
2407         }
2408
2409         return 0;
2410 }
2411
2412 static void
2413 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2414 {
2415         struct bond_dev_private *internals = dev->data->dev_private;
2416         int i;
2417
2418         for (i = 0; i < internals->slave_count; i++)
2419                 rte_eth_stats_reset(internals->slaves[i].port_id);
2420 }
2421
2422 static void
2423 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2424 {
2425         struct bond_dev_private *internals = eth_dev->data->dev_private;
2426         int i;
2427
2428         internals->promiscuous_en = 1;
2429
2430         switch (internals->mode) {
2431         /* Promiscuous mode is propagated to all slaves */
2432         case BONDING_MODE_ROUND_ROBIN:
2433         case BONDING_MODE_BALANCE:
2434         case BONDING_MODE_BROADCAST:
2435                 for (i = 0; i < internals->slave_count; i++)
2436                         rte_eth_promiscuous_enable(internals->slaves[i].port_id);
2437                 break;
2438         /* In mode4 promiscus mode is managed when slave is added/removed */
2439         case BONDING_MODE_8023AD:
2440                 break;
2441         /* Promiscuous mode is propagated only to primary slave */
2442         case BONDING_MODE_ACTIVE_BACKUP:
2443         case BONDING_MODE_TLB:
2444         case BONDING_MODE_ALB:
2445         default:
2446                 rte_eth_promiscuous_enable(internals->current_primary_port);
2447         }
2448 }
2449
2450 static void
2451 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2452 {
2453         struct bond_dev_private *internals = dev->data->dev_private;
2454         int i;
2455
2456         internals->promiscuous_en = 0;
2457
2458         switch (internals->mode) {
2459         /* Promiscuous mode is propagated to all slaves */
2460         case BONDING_MODE_ROUND_ROBIN:
2461         case BONDING_MODE_BALANCE:
2462         case BONDING_MODE_BROADCAST:
2463                 for (i = 0; i < internals->slave_count; i++)
2464                         rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2465                 break;
2466         /* In mode4 promiscus mode is set managed when slave is added/removed */
2467         case BONDING_MODE_8023AD:
2468                 break;
2469         /* Promiscuous mode is propagated only to primary slave */
2470         case BONDING_MODE_ACTIVE_BACKUP:
2471         case BONDING_MODE_TLB:
2472         case BONDING_MODE_ALB:
2473         default:
2474                 rte_eth_promiscuous_disable(internals->current_primary_port);
2475         }
2476 }
2477
2478 static void
2479 bond_ethdev_delayed_lsc_propagation(void *arg)
2480 {
2481         if (arg == NULL)
2482                 return;
2483
2484         _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2485                         RTE_ETH_EVENT_INTR_LSC, NULL, NULL);
2486 }
2487
2488 int
2489 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2490                 void *param, void *ret_param __rte_unused)
2491 {
2492         struct rte_eth_dev *bonded_eth_dev;
2493         struct bond_dev_private *internals;
2494         struct rte_eth_link link;
2495         int rc = -1;
2496
2497         int i, valid_slave = 0;
2498         uint8_t active_pos;
2499         uint8_t lsc_flag = 0;
2500
2501         if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2502                 return rc;
2503
2504         bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
2505
2506         if (check_for_bonded_ethdev(bonded_eth_dev))
2507                 return rc;
2508
2509         internals = bonded_eth_dev->data->dev_private;
2510
2511         /* If the device isn't started don't handle interrupts */
2512         if (!bonded_eth_dev->data->dev_started)
2513                 return rc;
2514
2515         /* verify that port_id is a valid slave of bonded port */
2516         for (i = 0; i < internals->slave_count; i++) {
2517                 if (internals->slaves[i].port_id == port_id) {
2518                         valid_slave = 1;
2519                         break;
2520                 }
2521         }
2522
2523         if (!valid_slave)
2524                 return rc;
2525
2526         /* Synchronize lsc callback parallel calls either by real link event
2527          * from the slaves PMDs or by the bonding PMD itself.
2528          */
2529         rte_spinlock_lock(&internals->lsc_lock);
2530
2531         /* Search for port in active port list */
2532         active_pos = find_slave_by_id(internals->active_slaves,
2533                         internals->active_slave_count, port_id);
2534
2535         rte_eth_link_get_nowait(port_id, &link);
2536         if (link.link_status) {
2537                 if (active_pos < internals->active_slave_count)
2538                         goto link_update;
2539
2540                 /* if no active slave ports then set this port to be primary port */
2541                 if (internals->active_slave_count < 1) {
2542                         /* If first active slave, then change link status */
2543                         bonded_eth_dev->data->dev_link.link_status = ETH_LINK_UP;
2544                         internals->current_primary_port = port_id;
2545                         lsc_flag = 1;
2546
2547                         mac_address_slaves_update(bonded_eth_dev);
2548                 }
2549
2550                 activate_slave(bonded_eth_dev, port_id);
2551
2552                 /* If user has defined the primary port then default to using it */
2553                 if (internals->user_defined_primary_port &&
2554                                 internals->primary_port == port_id)
2555                         bond_ethdev_primary_set(internals, port_id);
2556         } else {
2557                 if (active_pos == internals->active_slave_count)
2558                         goto link_update;
2559
2560                 /* Remove from active slave list */
2561                 deactivate_slave(bonded_eth_dev, port_id);
2562
2563                 if (internals->active_slave_count < 1)
2564                         lsc_flag = 1;
2565
2566                 /* Update primary id, take first active slave from list or if none
2567                  * available set to -1 */
2568                 if (port_id == internals->current_primary_port) {
2569                         if (internals->active_slave_count > 0)
2570                                 bond_ethdev_primary_set(internals,
2571                                                 internals->active_slaves[0]);
2572                         else
2573                                 internals->current_primary_port = internals->primary_port;
2574                 }
2575         }
2576
2577 link_update:
2578         /**
2579          * Update bonded device link properties after any change to active
2580          * slaves
2581          */
2582         bond_ethdev_link_update(bonded_eth_dev, 0);
2583
2584         if (lsc_flag) {
2585                 /* Cancel any possible outstanding interrupts if delays are enabled */
2586                 if (internals->link_up_delay_ms > 0 ||
2587                         internals->link_down_delay_ms > 0)
2588                         rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2589                                         bonded_eth_dev);
2590
2591                 if (bonded_eth_dev->data->dev_link.link_status) {
2592                         if (internals->link_up_delay_ms > 0)
2593                                 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2594                                                 bond_ethdev_delayed_lsc_propagation,
2595                                                 (void *)bonded_eth_dev);
2596                         else
2597                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2598                                                 RTE_ETH_EVENT_INTR_LSC,
2599                                                 NULL, NULL);
2600
2601                 } else {
2602                         if (internals->link_down_delay_ms > 0)
2603                                 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2604                                                 bond_ethdev_delayed_lsc_propagation,
2605                                                 (void *)bonded_eth_dev);
2606                         else
2607                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2608                                                 RTE_ETH_EVENT_INTR_LSC,
2609                                                 NULL, NULL);
2610                 }
2611         }
2612
2613         rte_spinlock_unlock(&internals->lsc_lock);
2614
2615         return rc;
2616 }
2617
2618 static int
2619 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2620                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2621 {
2622         unsigned i, j;
2623         int result = 0;
2624         int slave_reta_size;
2625         unsigned reta_count;
2626         struct bond_dev_private *internals = dev->data->dev_private;
2627
2628         if (reta_size != internals->reta_size)
2629                 return -EINVAL;
2630
2631          /* Copy RETA table */
2632         reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2633
2634         for (i = 0; i < reta_count; i++) {
2635                 internals->reta_conf[i].mask = reta_conf[i].mask;
2636                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2637                         if ((reta_conf[i].mask >> j) & 0x01)
2638                                 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2639         }
2640
2641         /* Fill rest of array */
2642         for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2643                 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2644                                 sizeof(internals->reta_conf[0]) * reta_count);
2645
2646         /* Propagate RETA over slaves */
2647         for (i = 0; i < internals->slave_count; i++) {
2648                 slave_reta_size = internals->slaves[i].reta_size;
2649                 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2650                                 &internals->reta_conf[0], slave_reta_size);
2651                 if (result < 0)
2652                         return result;
2653         }
2654
2655         return 0;
2656 }
2657
2658 static int
2659 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2660                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2661 {
2662         int i, j;
2663         struct bond_dev_private *internals = dev->data->dev_private;
2664
2665         if (reta_size != internals->reta_size)
2666                 return -EINVAL;
2667
2668          /* Copy RETA table */
2669         for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2670                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2671                         if ((reta_conf[i].mask >> j) & 0x01)
2672                                 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2673
2674         return 0;
2675 }
2676
2677 static int
2678 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2679                 struct rte_eth_rss_conf *rss_conf)
2680 {
2681         int i, result = 0;
2682         struct bond_dev_private *internals = dev->data->dev_private;
2683         struct rte_eth_rss_conf bond_rss_conf;
2684
2685         memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2686
2687         bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2688
2689         if (bond_rss_conf.rss_hf != 0)
2690                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2691
2692         if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2693                         sizeof(internals->rss_key)) {
2694                 if (bond_rss_conf.rss_key_len == 0)
2695                         bond_rss_conf.rss_key_len = 40;
2696                 internals->rss_key_len = bond_rss_conf.rss_key_len;
2697                 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2698                                 internals->rss_key_len);
2699         }
2700
2701         for (i = 0; i < internals->slave_count; i++) {
2702                 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2703                                 &bond_rss_conf);
2704                 if (result < 0)
2705                         return result;
2706         }
2707
2708         return 0;
2709 }
2710
2711 static int
2712 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2713                 struct rte_eth_rss_conf *rss_conf)
2714 {
2715         struct bond_dev_private *internals = dev->data->dev_private;
2716
2717         rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2718         rss_conf->rss_key_len = internals->rss_key_len;
2719         if (rss_conf->rss_key)
2720                 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2721
2722         return 0;
2723 }
2724
2725 const struct eth_dev_ops default_dev_ops = {
2726         .dev_start            = bond_ethdev_start,
2727         .dev_stop             = bond_ethdev_stop,
2728         .dev_close            = bond_ethdev_close,
2729         .dev_configure        = bond_ethdev_configure,
2730         .dev_infos_get        = bond_ethdev_info,
2731         .vlan_filter_set      = bond_ethdev_vlan_filter_set,
2732         .rx_queue_setup       = bond_ethdev_rx_queue_setup,
2733         .tx_queue_setup       = bond_ethdev_tx_queue_setup,
2734         .rx_queue_release     = bond_ethdev_rx_queue_release,
2735         .tx_queue_release     = bond_ethdev_tx_queue_release,
2736         .link_update          = bond_ethdev_link_update,
2737         .stats_get            = bond_ethdev_stats_get,
2738         .stats_reset          = bond_ethdev_stats_reset,
2739         .promiscuous_enable   = bond_ethdev_promiscuous_enable,
2740         .promiscuous_disable  = bond_ethdev_promiscuous_disable,
2741         .reta_update          = bond_ethdev_rss_reta_update,
2742         .reta_query           = bond_ethdev_rss_reta_query,
2743         .rss_hash_update      = bond_ethdev_rss_hash_update,
2744         .rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get
2745 };
2746
2747 static int
2748 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
2749 {
2750         const char *name = rte_vdev_device_name(dev);
2751         uint8_t socket_id = dev->device.numa_node;
2752         struct bond_dev_private *internals = NULL;
2753         struct rte_eth_dev *eth_dev = NULL;
2754         uint32_t vlan_filter_bmp_size;
2755
2756         /* now do all data allocation - for eth_dev structure, dummy pci driver
2757          * and internal (private) data
2758          */
2759
2760         /* reserve an ethdev entry */
2761         eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
2762         if (eth_dev == NULL) {
2763                 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
2764                 goto err;
2765         }
2766
2767         internals = eth_dev->data->dev_private;
2768         eth_dev->data->nb_rx_queues = (uint16_t)1;
2769         eth_dev->data->nb_tx_queues = (uint16_t)1;
2770
2771         eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN, 0,
2772                         socket_id);
2773         if (eth_dev->data->mac_addrs == NULL) {
2774                 RTE_BOND_LOG(ERR, "Unable to malloc mac_addrs");
2775                 goto err;
2776         }
2777
2778         eth_dev->dev_ops = &default_dev_ops;
2779         eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC;
2780
2781         rte_spinlock_init(&internals->lock);
2782         rte_spinlock_init(&internals->lsc_lock);
2783
2784         internals->port_id = eth_dev->data->port_id;
2785         internals->mode = BONDING_MODE_INVALID;
2786         internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
2787         internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
2788         internals->xmit_hash = xmit_l2_hash;
2789         internals->user_defined_mac = 0;
2790
2791         internals->link_status_polling_enabled = 0;
2792
2793         internals->link_status_polling_interval_ms =
2794                 DEFAULT_POLLING_INTERVAL_10_MS;
2795         internals->link_down_delay_ms = 0;
2796         internals->link_up_delay_ms = 0;
2797
2798         internals->slave_count = 0;
2799         internals->active_slave_count = 0;
2800         internals->rx_offload_capa = 0;
2801         internals->tx_offload_capa = 0;
2802         internals->candidate_max_rx_pktlen = 0;
2803         internals->max_rx_pktlen = 0;
2804
2805         /* Initially allow to choose any offload type */
2806         internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
2807
2808         memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
2809         memset(internals->slaves, 0, sizeof(internals->slaves));
2810
2811         /* Set mode 4 default configuration */
2812         bond_mode_8023ad_setup(eth_dev, NULL);
2813         if (bond_ethdev_mode_set(eth_dev, mode)) {
2814                 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode too %d",
2815                                  eth_dev->data->port_id, mode);
2816                 goto err;
2817         }
2818
2819         vlan_filter_bmp_size =
2820                 rte_bitmap_get_memory_footprint(ETHER_MAX_VLAN_ID + 1);
2821         internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
2822                                                    RTE_CACHE_LINE_SIZE);
2823         if (internals->vlan_filter_bmpmem == NULL) {
2824                 RTE_BOND_LOG(ERR,
2825                              "Failed to allocate vlan bitmap for bonded device %u\n",
2826                              eth_dev->data->port_id);
2827                 goto err;
2828         }
2829
2830         internals->vlan_filter_bmp = rte_bitmap_init(ETHER_MAX_VLAN_ID + 1,
2831                         internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
2832         if (internals->vlan_filter_bmp == NULL) {
2833                 RTE_BOND_LOG(ERR,
2834                              "Failed to init vlan bitmap for bonded device %u\n",
2835                              eth_dev->data->port_id);
2836                 rte_free(internals->vlan_filter_bmpmem);
2837                 goto err;
2838         }
2839
2840         return eth_dev->data->port_id;
2841
2842 err:
2843         rte_free(internals);
2844         if (eth_dev != NULL) {
2845                 rte_free(eth_dev->data->mac_addrs);
2846                 rte_eth_dev_release_port(eth_dev);
2847         }
2848         return -1;
2849 }
2850
2851 static int
2852 bond_probe(struct rte_vdev_device *dev)
2853 {
2854         const char *name;
2855         struct bond_dev_private *internals;
2856         struct rte_kvargs *kvlist;
2857         uint8_t bonding_mode, socket_id/*, agg_mode*/;
2858         int  arg_count, port_id;
2859         uint8_t agg_mode;
2860
2861         if (!dev)
2862                 return -EINVAL;
2863
2864         name = rte_vdev_device_name(dev);
2865         RTE_LOG(INFO, EAL, "Initializing pmd_bond for %s\n", name);
2866
2867         kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
2868                 pmd_bond_init_valid_arguments);
2869         if (kvlist == NULL)
2870                 return -1;
2871
2872         /* Parse link bonding mode */
2873         if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
2874                 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
2875                                 &bond_ethdev_parse_slave_mode_kvarg,
2876                                 &bonding_mode) != 0) {
2877                         RTE_LOG(ERR, EAL, "Invalid mode for bonded device %s\n",
2878                                         name);
2879                         goto parse_error;
2880                 }
2881         } else {
2882                 RTE_LOG(ERR, EAL, "Mode must be specified only once for bonded "
2883                                 "device %s\n", name);
2884                 goto parse_error;
2885         }
2886
2887         /* Parse socket id to create bonding device on */
2888         arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
2889         if (arg_count == 1) {
2890                 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
2891                                 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
2892                                 != 0) {
2893                         RTE_LOG(ERR, EAL, "Invalid socket Id specified for "
2894                                         "bonded device %s\n", name);
2895                         goto parse_error;
2896                 }
2897         } else if (arg_count > 1) {
2898                 RTE_LOG(ERR, EAL, "Socket Id can be specified only once for "
2899                                 "bonded device %s\n", name);
2900                 goto parse_error;
2901         } else {
2902                 socket_id = rte_socket_id();
2903         }
2904
2905         dev->device.numa_node = socket_id;
2906
2907         /* Create link bonding eth device */
2908         port_id = bond_alloc(dev, bonding_mode);
2909         if (port_id < 0) {
2910                 RTE_LOG(ERR, EAL, "Failed to create socket %s in mode %u on "
2911                                 "socket %u.\n", name, bonding_mode, socket_id);
2912                 goto parse_error;
2913         }
2914         internals = rte_eth_devices[port_id].data->dev_private;
2915         internals->kvlist = kvlist;
2916
2917
2918         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
2919                 if (rte_kvargs_process(kvlist,
2920                                 PMD_BOND_AGG_MODE_KVARG,
2921                                 &bond_ethdev_parse_slave_agg_mode_kvarg,
2922                                 &agg_mode) != 0) {
2923                         RTE_LOG(ERR, EAL,
2924                                         "Failed to parse agg selection mode for bonded device %s\n",
2925                                         name);
2926                         goto parse_error;
2927                 }
2928
2929                 if (internals->mode == BONDING_MODE_8023AD)
2930                         rte_eth_bond_8023ad_agg_selection_set(port_id,
2931                                         agg_mode);
2932         } else {
2933                 rte_eth_bond_8023ad_agg_selection_set(port_id, AGG_STABLE);
2934         }
2935
2936         RTE_LOG(INFO, EAL, "Create bonded device %s on port %d in mode %u on "
2937                         "socket %u.\n", name, port_id, bonding_mode, socket_id);
2938         return 0;
2939
2940 parse_error:
2941         rte_kvargs_free(kvlist);
2942
2943         return -1;
2944 }
2945
2946 static int
2947 bond_remove(struct rte_vdev_device *dev)
2948 {
2949         struct rte_eth_dev *eth_dev;
2950         struct bond_dev_private *internals;
2951         const char *name;
2952
2953         if (!dev)
2954                 return -EINVAL;
2955
2956         name = rte_vdev_device_name(dev);
2957         RTE_LOG(INFO, EAL, "Uninitializing pmd_bond for %s\n", name);
2958
2959         /* now free all data allocation - for eth_dev structure,
2960          * dummy pci driver and internal (private) data
2961          */
2962
2963         /* find an ethdev entry */
2964         eth_dev = rte_eth_dev_allocated(name);
2965         if (eth_dev == NULL)
2966                 return -ENODEV;
2967
2968         RTE_ASSERT(eth_dev->device == &dev->device);
2969
2970         internals = eth_dev->data->dev_private;
2971         if (internals->slave_count != 0)
2972                 return -EBUSY;
2973
2974         if (eth_dev->data->dev_started == 1) {
2975                 bond_ethdev_stop(eth_dev);
2976                 bond_ethdev_close(eth_dev);
2977         }
2978
2979         eth_dev->dev_ops = NULL;
2980         eth_dev->rx_pkt_burst = NULL;
2981         eth_dev->tx_pkt_burst = NULL;
2982
2983         internals = eth_dev->data->dev_private;
2984         /* Try to release mempool used in mode6. If the bond
2985          * device is not mode6, free the NULL is not problem.
2986          */
2987         rte_mempool_free(internals->mode6.mempool);
2988         rte_bitmap_free(internals->vlan_filter_bmp);
2989         rte_free(internals->vlan_filter_bmpmem);
2990         rte_free(eth_dev->data->dev_private);
2991         rte_free(eth_dev->data->mac_addrs);
2992
2993         rte_eth_dev_release_port(eth_dev);
2994
2995         return 0;
2996 }
2997
2998 /* this part will resolve the slave portids after all the other pdev and vdev
2999  * have been allocated */
3000 static int
3001 bond_ethdev_configure(struct rte_eth_dev *dev)
3002 {
3003         const char *name = dev->device->name;
3004         struct bond_dev_private *internals = dev->data->dev_private;
3005         struct rte_kvargs *kvlist = internals->kvlist;
3006         int arg_count;
3007         uint16_t port_id = dev - rte_eth_devices;
3008         uint8_t agg_mode;
3009
3010         static const uint8_t default_rss_key[40] = {
3011                 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3012                 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3013                 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3014                 0xBE, 0xAC, 0x01, 0xFA
3015         };
3016
3017         unsigned i, j;
3018
3019         /* If RSS is enabled, fill table and key with default values */
3020         if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
3021                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = internals->rss_key;
3022                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = 0;
3023                 memcpy(internals->rss_key, default_rss_key, 40);
3024
3025                 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3026                         internals->reta_conf[i].mask = ~0LL;
3027                         for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3028                                 internals->reta_conf[i].reta[j] = j % dev->data->nb_rx_queues;
3029                 }
3030         }
3031
3032         /* set the max_rx_pktlen */
3033         internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3034
3035         /*
3036          * if no kvlist, it means that this bonded device has been created
3037          * through the bonding api.
3038          */
3039         if (!kvlist)
3040                 return 0;
3041
3042         /* Parse MAC address for bonded device */
3043         arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3044         if (arg_count == 1) {
3045                 struct ether_addr bond_mac;
3046
3047                 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3048                                 &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3049                         RTE_LOG(INFO, EAL, "Invalid mac address for bonded device %s\n",
3050                                         name);
3051                         return -1;
3052                 }
3053
3054                 /* Set MAC address */
3055                 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3056                         RTE_LOG(ERR, EAL,
3057                                         "Failed to set mac address on bonded device %s\n",
3058                                         name);
3059                         return -1;
3060                 }
3061         } else if (arg_count > 1) {
3062                 RTE_LOG(ERR, EAL,
3063                                 "MAC address can be specified only once for bonded device %s\n",
3064                                 name);
3065                 return -1;
3066         }
3067
3068         /* Parse/set balance mode transmit policy */
3069         arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3070         if (arg_count == 1) {
3071                 uint8_t xmit_policy;
3072
3073                 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3074                                 &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3075                                                 0) {
3076                         RTE_LOG(INFO, EAL,
3077                                         "Invalid xmit policy specified for bonded device %s\n",
3078                                         name);
3079                         return -1;
3080                 }
3081
3082                 /* Set balance mode transmit policy*/
3083                 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3084                         RTE_LOG(ERR, EAL,
3085                                         "Failed to set balance xmit policy on bonded device %s\n",
3086                                         name);
3087                         return -1;
3088                 }
3089         } else if (arg_count > 1) {
3090                 RTE_LOG(ERR, EAL,
3091                                 "Transmit policy can be specified only once for bonded device"
3092                                 " %s\n", name);
3093                 return -1;
3094         }
3095
3096         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3097                 if (rte_kvargs_process(kvlist,
3098                                 PMD_BOND_AGG_MODE_KVARG,
3099                                 &bond_ethdev_parse_slave_agg_mode_kvarg,
3100                                 &agg_mode) != 0) {
3101                         RTE_LOG(ERR, EAL,
3102                                         "Failed to parse agg selection mode for bonded device %s\n",
3103                                         name);
3104                 }
3105                 if (internals->mode == BONDING_MODE_8023AD)
3106                                 rte_eth_bond_8023ad_agg_selection_set(port_id,
3107                                                 agg_mode);
3108         }
3109
3110         /* Parse/add slave ports to bonded device */
3111         if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3112                 struct bond_ethdev_slave_ports slave_ports;
3113                 unsigned i;
3114
3115                 memset(&slave_ports, 0, sizeof(slave_ports));
3116
3117                 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3118                                 &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3119                         RTE_LOG(ERR, EAL,
3120                                         "Failed to parse slave ports for bonded device %s\n",
3121                                         name);
3122                         return -1;
3123                 }
3124
3125                 for (i = 0; i < slave_ports.slave_count; i++) {
3126                         if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3127                                 RTE_LOG(ERR, EAL,
3128                                                 "Failed to add port %d as slave to bonded device %s\n",
3129                                                 slave_ports.slaves[i], name);
3130                         }
3131                 }
3132
3133         } else {
3134                 RTE_LOG(INFO, EAL, "No slaves specified for bonded device %s\n", name);
3135                 return -1;
3136         }
3137
3138         /* Parse/set primary slave port id*/
3139         arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3140         if (arg_count == 1) {
3141                 uint16_t primary_slave_port_id;
3142
3143                 if (rte_kvargs_process(kvlist,
3144                                 PMD_BOND_PRIMARY_SLAVE_KVARG,
3145                                 &bond_ethdev_parse_primary_slave_port_id_kvarg,
3146                                 &primary_slave_port_id) < 0) {
3147                         RTE_LOG(INFO, EAL,
3148                                         "Invalid primary slave port id specified for bonded device"
3149                                         " %s\n", name);
3150                         return -1;
3151                 }
3152
3153                 /* Set balance mode transmit policy*/
3154                 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3155                                 != 0) {
3156                         RTE_LOG(ERR, EAL,
3157                                         "Failed to set primary slave port %d on bonded device %s\n",
3158                                         primary_slave_port_id, name);
3159                         return -1;
3160                 }
3161         } else if (arg_count > 1) {
3162                 RTE_LOG(INFO, EAL,
3163                                 "Primary slave can be specified only once for bonded device"
3164                                 " %s\n", name);
3165                 return -1;
3166         }
3167
3168         /* Parse link status monitor polling interval */
3169         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3170         if (arg_count == 1) {
3171                 uint32_t lsc_poll_interval_ms;
3172
3173                 if (rte_kvargs_process(kvlist,
3174                                 PMD_BOND_LSC_POLL_PERIOD_KVARG,
3175                                 &bond_ethdev_parse_time_ms_kvarg,
3176                                 &lsc_poll_interval_ms) < 0) {
3177                         RTE_LOG(INFO, EAL,
3178                                         "Invalid lsc polling interval value specified for bonded"
3179                                         " device %s\n", name);
3180                         return -1;
3181                 }
3182
3183                 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3184                                 != 0) {
3185                         RTE_LOG(ERR, EAL,
3186                                         "Failed to set lsc monitor polling interval (%u ms) on"
3187                                         " bonded device %s\n", lsc_poll_interval_ms, name);
3188                         return -1;
3189                 }
3190         } else if (arg_count > 1) {
3191                 RTE_LOG(INFO, EAL,
3192                                 "LSC polling interval can be specified only once for bonded"
3193                                 " device %s\n", name);
3194                 return -1;
3195         }
3196
3197         /* Parse link up interrupt propagation delay */
3198         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3199         if (arg_count == 1) {
3200                 uint32_t link_up_delay_ms;
3201
3202                 if (rte_kvargs_process(kvlist,
3203                                 PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3204                                 &bond_ethdev_parse_time_ms_kvarg,
3205                                 &link_up_delay_ms) < 0) {
3206                         RTE_LOG(INFO, EAL,
3207                                         "Invalid link up propagation delay value specified for"
3208                                         " bonded device %s\n", name);
3209                         return -1;
3210                 }
3211
3212                 /* Set balance mode transmit policy*/
3213                 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3214                                 != 0) {
3215                         RTE_LOG(ERR, EAL,
3216                                         "Failed to set link up propagation delay (%u ms) on bonded"
3217                                         " device %s\n", link_up_delay_ms, name);
3218                         return -1;
3219                 }
3220         } else if (arg_count > 1) {
3221                 RTE_LOG(INFO, EAL,
3222                                 "Link up propagation delay can be specified only once for"
3223                                 " bonded device %s\n", name);
3224                 return -1;
3225         }
3226
3227         /* Parse link down interrupt propagation delay */
3228         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3229         if (arg_count == 1) {
3230                 uint32_t link_down_delay_ms;
3231
3232                 if (rte_kvargs_process(kvlist,
3233                                 PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3234                                 &bond_ethdev_parse_time_ms_kvarg,
3235                                 &link_down_delay_ms) < 0) {
3236                         RTE_LOG(INFO, EAL,
3237                                         "Invalid link down propagation delay value specified for"
3238                                         " bonded device %s\n", name);
3239                         return -1;
3240                 }
3241
3242                 /* Set balance mode transmit policy*/
3243                 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3244                                 != 0) {
3245                         RTE_LOG(ERR, EAL,
3246                                         "Failed to set link down propagation delay (%u ms) on"
3247                                         " bonded device %s\n", link_down_delay_ms, name);
3248                         return -1;
3249                 }
3250         } else if (arg_count > 1) {
3251                 RTE_LOG(INFO, EAL,
3252                                 "Link down propagation delay can be specified only once for"
3253                                 " bonded device %s\n", name);
3254                 return -1;
3255         }
3256
3257         return 0;
3258 }
3259
3260 struct rte_vdev_driver pmd_bond_drv = {
3261         .probe = bond_probe,
3262         .remove = bond_remove,
3263 };
3264
3265 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3266 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3267
3268 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3269         "slave=<ifc> "
3270         "primary=<ifc> "
3271         "mode=[0-6] "
3272         "xmit_policy=[l2 | l23 | l34] "
3273         "agg_mode=[count | stable | bandwidth] "
3274         "socket_id=<int> "
3275         "mac=<mac addr> "
3276         "lsc_poll_period_ms=<int> "
3277         "up_delay=<int> "
3278         "down_delay=<int>");