New upstream version 17.11.3
[deb_dpdk.git] / drivers / net / bonding / rte_eth_bond_pmd.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 #include <stdlib.h>
34 #include <netinet/in.h>
35
36 #include <rte_mbuf.h>
37 #include <rte_malloc.h>
38 #include <rte_ethdev.h>
39 #include <rte_ethdev_vdev.h>
40 #include <rte_tcp.h>
41 #include <rte_udp.h>
42 #include <rte_ip.h>
43 #include <rte_ip_frag.h>
44 #include <rte_devargs.h>
45 #include <rte_kvargs.h>
46 #include <rte_bus_vdev.h>
47 #include <rte_alarm.h>
48 #include <rte_cycles.h>
49
50 #include "rte_eth_bond.h"
51 #include "rte_eth_bond_private.h"
52 #include "rte_eth_bond_8023ad_private.h"
53
54 #define REORDER_PERIOD_MS 10
55 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
56
57 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
58
59 /* Table for statistics in mode 5 TLB */
60 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
61
62 static inline size_t
63 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
64 {
65         size_t vlan_offset = 0;
66
67         if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
68                 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
69
70                 vlan_offset = sizeof(struct vlan_hdr);
71                 *proto = vlan_hdr->eth_proto;
72
73                 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
74                         vlan_hdr = vlan_hdr + 1;
75                         *proto = vlan_hdr->eth_proto;
76                         vlan_offset += sizeof(struct vlan_hdr);
77                 }
78         }
79         return vlan_offset;
80 }
81
82 static uint16_t
83 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
84 {
85         struct bond_dev_private *internals;
86
87         uint16_t num_rx_slave = 0;
88         uint16_t num_rx_total = 0;
89
90         int i;
91
92         /* Cast to structure, containing bonded device's port id and queue id */
93         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
94
95         internals = bd_rx_q->dev_private;
96
97
98         for (i = 0; i < internals->active_slave_count && nb_pkts; i++) {
99                 /* Offset of pointer to *bufs increases as packets are received
100                  * from other slaves */
101                 num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
102                                 bd_rx_q->queue_id, bufs + num_rx_total, nb_pkts);
103                 if (num_rx_slave) {
104                         num_rx_total += num_rx_slave;
105                         nb_pkts -= num_rx_slave;
106                 }
107         }
108
109         return num_rx_total;
110 }
111
112 static uint16_t
113 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
114                 uint16_t nb_pkts)
115 {
116         struct bond_dev_private *internals;
117
118         /* Cast to structure, containing bonded device's port id and queue id */
119         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
120
121         internals = bd_rx_q->dev_private;
122
123         return rte_eth_rx_burst(internals->current_primary_port,
124                         bd_rx_q->queue_id, bufs, nb_pkts);
125 }
126
127 static inline uint8_t
128 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
129 {
130         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
131
132         return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
133                 (ethertype == ether_type_slow_be &&
134                 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
135 }
136
137 /*****************************************************************************
138  * Flow director's setup for mode 4 optimization
139  */
140
141 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
142         .dst.addr_bytes = { 0 },
143         .src.addr_bytes = { 0 },
144         .type = RTE_BE16(ETHER_TYPE_SLOW),
145 };
146
147 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
148         .dst.addr_bytes = { 0 },
149         .src.addr_bytes = { 0 },
150         .type = 0xFFFF,
151 };
152
153 static struct rte_flow_item flow_item_8023ad[] = {
154         {
155                 .type = RTE_FLOW_ITEM_TYPE_ETH,
156                 .spec = &flow_item_eth_type_8023ad,
157                 .last = NULL,
158                 .mask = &flow_item_eth_mask_type_8023ad,
159         },
160         {
161                 .type = RTE_FLOW_ITEM_TYPE_END,
162                 .spec = NULL,
163                 .last = NULL,
164                 .mask = NULL,
165         }
166 };
167
168 const struct rte_flow_attr flow_attr_8023ad = {
169         .group = 0,
170         .priority = 0,
171         .ingress = 1,
172         .egress = 0,
173         .reserved = 0,
174 };
175
176 int
177 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
178                 uint16_t slave_port) {
179         struct rte_eth_dev_info slave_info;
180         struct rte_flow_error error;
181         struct bond_dev_private *internals = (struct bond_dev_private *)
182                         (bond_dev->data->dev_private);
183
184         const struct rte_flow_action_queue lacp_queue_conf = {
185                 .index = 0,
186         };
187
188         const struct rte_flow_action actions[] = {
189                 {
190                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
191                         .conf = &lacp_queue_conf
192                 },
193                 {
194                         .type = RTE_FLOW_ACTION_TYPE_END,
195                 }
196         };
197
198         int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
199                         flow_item_8023ad, actions, &error);
200         if (ret < 0) {
201                 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
202                                 __func__, error.message, slave_port,
203                                 internals->mode4.dedicated_queues.rx_qid);
204                 return -1;
205         }
206
207         rte_eth_dev_info_get(slave_port, &slave_info);
208         if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
209                         slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
210                 RTE_BOND_LOG(ERR,
211                         "%s: Slave %d capabilities doesn't allow to allocate additional queues",
212                         __func__, slave_port);
213                 return -1;
214         }
215
216         return 0;
217 }
218
219 int
220 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
221         struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
222         struct bond_dev_private *internals = (struct bond_dev_private *)
223                         (bond_dev->data->dev_private);
224         struct rte_eth_dev_info bond_info;
225         uint16_t idx;
226
227         /* Verify if all slaves in bonding supports flow director and */
228         if (internals->slave_count > 0) {
229                 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
230
231                 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
232                 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
233
234                 for (idx = 0; idx < internals->slave_count; idx++) {
235                         if (bond_ethdev_8023ad_flow_verify(bond_dev,
236                                         internals->slaves[idx].port_id) != 0)
237                                 return -1;
238                 }
239         }
240
241         return 0;
242 }
243
244 int
245 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
246
247         struct rte_flow_error error;
248         struct bond_dev_private *internals = (struct bond_dev_private *)
249                         (bond_dev->data->dev_private);
250
251         struct rte_flow_action_queue lacp_queue_conf = {
252                 .index = internals->mode4.dedicated_queues.rx_qid,
253         };
254
255         const struct rte_flow_action actions[] = {
256                 {
257                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
258                         .conf = &lacp_queue_conf
259                 },
260                 {
261                         .type = RTE_FLOW_ACTION_TYPE_END,
262                 }
263         };
264
265         internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
266                         &flow_attr_8023ad, flow_item_8023ad, actions, &error);
267         if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
268                 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
269                                 "(slave_port=%d queue_id=%d)",
270                                 error.message, slave_port,
271                                 internals->mode4.dedicated_queues.rx_qid);
272                 return -1;
273         }
274
275         return 0;
276 }
277
278 static uint16_t
279 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
280                 uint16_t nb_pkts)
281 {
282         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
283         struct bond_dev_private *internals = bd_rx_q->dev_private;
284         uint16_t num_rx_total = 0;      /* Total number of received packets */
285         uint16_t slaves[RTE_MAX_ETHPORTS];
286         uint16_t slave_count;
287
288         uint16_t i, idx;
289
290         /* Copy slave list to protect against slave up/down changes during tx
291          * bursting */
292         slave_count = internals->active_slave_count;
293         memcpy(slaves, internals->active_slaves,
294                         sizeof(internals->active_slaves[0]) * slave_count);
295
296         for (i = 0, idx = internals->active_slave;
297                         i < slave_count && num_rx_total < nb_pkts; i++, idx++) {
298                 idx = idx % slave_count;
299
300                 /* Read packets from this slave */
301                 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
302                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
303         }
304
305         internals->active_slave = idx;
306
307         return num_rx_total;
308 }
309
310 static uint16_t
311 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
312                 uint16_t nb_pkts)
313 {
314         struct bond_dev_private *internals;
315         struct bond_tx_queue *bd_tx_q;
316
317         uint16_t num_of_slaves;
318         uint16_t slaves[RTE_MAX_ETHPORTS];
319          /* positions in slaves, not ID */
320         uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
321         uint8_t distributing_count;
322
323         uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
324         uint16_t i, op_slave_idx;
325
326         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
327
328         /* Total amount of packets in slave_bufs */
329         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
330         /* Slow packets placed in each slave */
331
332         if (unlikely(nb_pkts == 0))
333                 return 0;
334
335         bd_tx_q = (struct bond_tx_queue *)queue;
336         internals = bd_tx_q->dev_private;
337
338         /* Copy slave list to protect against slave up/down changes during tx
339          * bursting */
340         num_of_slaves = internals->active_slave_count;
341         if (num_of_slaves < 1)
342                 return num_tx_total;
343
344         memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) *
345                         num_of_slaves);
346
347         distributing_count = 0;
348         for (i = 0; i < num_of_slaves; i++) {
349                 struct port *port = &mode_8023ad_ports[slaves[i]];
350                 if (ACTOR_STATE(port, DISTRIBUTING))
351                         distributing_offsets[distributing_count++] = i;
352         }
353
354         if (likely(distributing_count > 0)) {
355                 /* Populate slaves mbuf with the packets which are to be sent */
356                 for (i = 0; i < nb_pkts; i++) {
357                         /* Select output slave using hash based on xmit policy */
358                         op_slave_idx = internals->xmit_hash(bufs[i],
359                                         distributing_count);
360
361                         /* Populate slave mbuf arrays with mbufs for that slave.
362                          * Use only slaves that are currently distributing.
363                          */
364                         uint8_t slave_offset =
365                                         distributing_offsets[op_slave_idx];
366                         slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] =
367                                         bufs[i];
368                         slave_nb_pkts[slave_offset]++;
369                 }
370         }
371
372         /* Send packet burst on each slave device */
373         for (i = 0; i < num_of_slaves; i++) {
374                 if (slave_nb_pkts[i] == 0)
375                         continue;
376
377                 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
378                                 slave_bufs[i], slave_nb_pkts[i]);
379
380                 num_tx_total += num_tx_slave;
381                 num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
382
383                 /* If tx burst fails move packets to end of bufs */
384                 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
385                         uint16_t j = nb_pkts - num_tx_fail_total;
386                         for ( ; num_tx_slave < slave_nb_pkts[i]; j++,
387                                         num_tx_slave++)
388                                 bufs[j] = slave_bufs[i][num_tx_slave];
389                 }
390         }
391
392         return num_tx_total;
393 }
394
395
396 static uint16_t
397 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
398                 uint16_t nb_pkts)
399 {
400         /* Cast to structure, containing bonded device's port id and queue id */
401         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
402         struct bond_dev_private *internals = bd_rx_q->dev_private;
403         struct ether_addr bond_mac;
404
405         struct ether_hdr *hdr;
406
407         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
408         uint16_t num_rx_total = 0;      /* Total number of received packets */
409         uint16_t slaves[RTE_MAX_ETHPORTS];
410         uint16_t slave_count, idx;
411
412         uint8_t collecting;  /* current slave collecting status */
413         const uint8_t promisc = internals->promiscuous_en;
414         uint8_t i, j, k;
415         uint8_t subtype;
416
417         rte_eth_macaddr_get(internals->port_id, &bond_mac);
418         /* Copy slave list to protect against slave up/down changes during tx
419          * bursting */
420         slave_count = internals->active_slave_count;
421         memcpy(slaves, internals->active_slaves,
422                         sizeof(internals->active_slaves[0]) * slave_count);
423
424         idx = internals->active_slave;
425         if (idx >= slave_count) {
426                 internals->active_slave = 0;
427                 idx = 0;
428         }
429         for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
430                 j = num_rx_total;
431                 collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[idx]],
432                                          COLLECTING);
433
434                 /* Read packets from this slave */
435                 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
436                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
437
438                 for (k = j; k < 2 && k < num_rx_total; k++)
439                         rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
440
441                 /* Handle slow protocol packets. */
442                 while (j < num_rx_total) {
443
444                         /* If packet is not pure L2 and is known, skip it */
445                         if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) {
446                                 j++;
447                                 continue;
448                         }
449
450                         if (j + 3 < num_rx_total)
451                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
452
453                         hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
454                         subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
455
456                         /* Remove packet from array if it is slow packet or slave is not
457                          * in collecting state or bonding interface is not in promiscuous
458                          * mode and packet address does not match. */
459                         if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]) ||
460                                 !collecting || (!promisc &&
461                                         !is_multicast_ether_addr(&hdr->d_addr) &&
462                                         !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
463
464                                 if (hdr->ether_type == ether_type_slow_be) {
465                                         bond_mode_8023ad_handle_slow_pkt(
466                                             internals, slaves[idx], bufs[j]);
467                                 } else
468                                         rte_pktmbuf_free(bufs[j]);
469
470                                 /* Packet is managed by mode 4 or dropped, shift the array */
471                                 num_rx_total--;
472                                 if (j < num_rx_total) {
473                                         memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
474                                                 (num_rx_total - j));
475                                 }
476                         } else
477                                 j++;
478                 }
479                 if (unlikely(++idx == slave_count))
480                         idx = 0;
481         }
482
483         internals->active_slave = idx;
484         return num_rx_total;
485 }
486
487 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
488 uint32_t burstnumberRX;
489 uint32_t burstnumberTX;
490
491 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
492
493 static void
494 arp_op_name(uint16_t arp_op, char *buf)
495 {
496         switch (arp_op) {
497         case ARP_OP_REQUEST:
498                 snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
499                 return;
500         case ARP_OP_REPLY:
501                 snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
502                 return;
503         case ARP_OP_REVREQUEST:
504                 snprintf(buf, sizeof("Reverse ARP Request"), "%s",
505                                 "Reverse ARP Request");
506                 return;
507         case ARP_OP_REVREPLY:
508                 snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
509                                 "Reverse ARP Reply");
510                 return;
511         case ARP_OP_INVREQUEST:
512                 snprintf(buf, sizeof("Peer Identify Request"), "%s",
513                                 "Peer Identify Request");
514                 return;
515         case ARP_OP_INVREPLY:
516                 snprintf(buf, sizeof("Peer Identify Reply"), "%s",
517                                 "Peer Identify Reply");
518                 return;
519         default:
520                 break;
521         }
522         snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
523         return;
524 }
525 #endif
526 #define MaxIPv4String   16
527 static void
528 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
529 {
530         uint32_t ipv4_addr;
531
532         ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
533         snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
534                 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
535                 ipv4_addr & 0xFF);
536 }
537
538 #define MAX_CLIENTS_NUMBER      128
539 uint8_t active_clients;
540 struct client_stats_t {
541         uint16_t port;
542         uint32_t ipv4_addr;
543         uint32_t ipv4_rx_packets;
544         uint32_t ipv4_tx_packets;
545 };
546 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
547
548 static void
549 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
550 {
551         int i = 0;
552
553         for (; i < MAX_CLIENTS_NUMBER; i++)     {
554                 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port))      {
555                         /* Just update RX packets number for this client */
556                         if (TXorRXindicator == &burstnumberRX)
557                                 client_stats[i].ipv4_rx_packets++;
558                         else
559                                 client_stats[i].ipv4_tx_packets++;
560                         return;
561                 }
562         }
563         /* We have a new client. Insert him to the table, and increment stats */
564         if (TXorRXindicator == &burstnumberRX)
565                 client_stats[active_clients].ipv4_rx_packets++;
566         else
567                 client_stats[active_clients].ipv4_tx_packets++;
568         client_stats[active_clients].ipv4_addr = addr;
569         client_stats[active_clients].port = port;
570         active_clients++;
571
572 }
573
574 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
575 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber)     \
576                 RTE_LOG(DEBUG, PMD, \
577                 "%s " \
578                 "port:%d " \
579                 "SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
580                 "SrcIP:%s " \
581                 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
582                 "DstIP:%s " \
583                 "%s " \
584                 "%d\n", \
585                 info, \
586                 port, \
587                 eth_h->s_addr.addr_bytes[0], \
588                 eth_h->s_addr.addr_bytes[1], \
589                 eth_h->s_addr.addr_bytes[2], \
590                 eth_h->s_addr.addr_bytes[3], \
591                 eth_h->s_addr.addr_bytes[4], \
592                 eth_h->s_addr.addr_bytes[5], \
593                 src_ip, \
594                 eth_h->d_addr.addr_bytes[0], \
595                 eth_h->d_addr.addr_bytes[1], \
596                 eth_h->d_addr.addr_bytes[2], \
597                 eth_h->d_addr.addr_bytes[3], \
598                 eth_h->d_addr.addr_bytes[4], \
599                 eth_h->d_addr.addr_bytes[5], \
600                 dst_ip, \
601                 arp_op, \
602                 ++burstnumber)
603 #endif
604
605 static void
606 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
607                 uint16_t port, uint32_t __attribute__((unused)) *burstnumber)
608 {
609         struct ipv4_hdr *ipv4_h;
610 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
611         struct arp_hdr *arp_h;
612         char dst_ip[16];
613         char ArpOp[24];
614         char buf[16];
615 #endif
616         char src_ip[16];
617
618         uint16_t ether_type = eth_h->ether_type;
619         uint16_t offset = get_vlan_offset(eth_h, &ether_type);
620
621 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
622         snprintf(buf, 16, "%s", info);
623 #endif
624
625         if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
626                 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
627                 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
628 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
629                 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
630                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
631 #endif
632                 update_client_stats(ipv4_h->src_addr, port, burstnumber);
633         }
634 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
635         else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
636                 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
637                 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
638                 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
639                 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
640                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
641         }
642 #endif
643 }
644 #endif
645
646 static uint16_t
647 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
648 {
649         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
650         struct bond_dev_private *internals = bd_tx_q->dev_private;
651         struct ether_hdr *eth_h;
652         uint16_t ether_type, offset;
653         uint16_t nb_recv_pkts;
654         int i;
655
656         nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
657
658         for (i = 0; i < nb_recv_pkts; i++) {
659                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
660                 ether_type = eth_h->ether_type;
661                 offset = get_vlan_offset(eth_h, &ether_type);
662
663                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
664 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
665                         mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
666 #endif
667                         bond_mode_alb_arp_recv(eth_h, offset, internals);
668                 }
669 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
670                 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
671                         mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
672 #endif
673         }
674
675         return nb_recv_pkts;
676 }
677
678 static uint16_t
679 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
680                 uint16_t nb_pkts)
681 {
682         struct bond_dev_private *internals;
683         struct bond_tx_queue *bd_tx_q;
684
685         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
686         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
687
688         uint16_t num_of_slaves;
689         uint16_t slaves[RTE_MAX_ETHPORTS];
690
691         uint16_t num_tx_total = 0, num_tx_slave;
692
693         static int slave_idx = 0;
694         int i, cslave_idx = 0, tx_fail_total = 0;
695
696         bd_tx_q = (struct bond_tx_queue *)queue;
697         internals = bd_tx_q->dev_private;
698
699         /* Copy slave list to protect against slave up/down changes during tx
700          * bursting */
701         num_of_slaves = internals->active_slave_count;
702         memcpy(slaves, internals->active_slaves,
703                         sizeof(internals->active_slaves[0]) * num_of_slaves);
704
705         if (num_of_slaves < 1)
706                 return num_tx_total;
707
708         /* Populate slaves mbuf with which packets are to be sent on it  */
709         for (i = 0; i < nb_pkts; i++) {
710                 cslave_idx = (slave_idx + i) % num_of_slaves;
711                 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
712         }
713
714         /* increment current slave index so the next call to tx burst starts on the
715          * next slave */
716         slave_idx = ++cslave_idx;
717
718         /* Send packet burst on each slave device */
719         for (i = 0; i < num_of_slaves; i++) {
720                 if (slave_nb_pkts[i] > 0) {
721                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
722                                         slave_bufs[i], slave_nb_pkts[i]);
723
724                         /* if tx burst fails move packets to end of bufs */
725                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
726                                 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
727
728                                 tx_fail_total += tx_fail_slave;
729
730                                 memcpy(&bufs[nb_pkts - tx_fail_total],
731                                                 &slave_bufs[i][num_tx_slave],
732                                                 tx_fail_slave * sizeof(bufs[0]));
733                         }
734                         num_tx_total += num_tx_slave;
735                 }
736         }
737
738         return num_tx_total;
739 }
740
741 static uint16_t
742 bond_ethdev_tx_burst_active_backup(void *queue,
743                 struct rte_mbuf **bufs, uint16_t nb_pkts)
744 {
745         struct bond_dev_private *internals;
746         struct bond_tx_queue *bd_tx_q;
747
748         bd_tx_q = (struct bond_tx_queue *)queue;
749         internals = bd_tx_q->dev_private;
750
751         if (internals->active_slave_count < 1)
752                 return 0;
753
754         return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
755                         bufs, nb_pkts);
756 }
757
758 static inline uint16_t
759 ether_hash(struct ether_hdr *eth_hdr)
760 {
761         unaligned_uint16_t *word_src_addr =
762                 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
763         unaligned_uint16_t *word_dst_addr =
764                 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
765
766         return (word_src_addr[0] ^ word_dst_addr[0]) ^
767                         (word_src_addr[1] ^ word_dst_addr[1]) ^
768                         (word_src_addr[2] ^ word_dst_addr[2]);
769 }
770
771 static inline uint32_t
772 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
773 {
774         return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
775 }
776
777 static inline uint32_t
778 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
779 {
780         unaligned_uint32_t *word_src_addr =
781                 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
782         unaligned_uint32_t *word_dst_addr =
783                 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
784
785         return (word_src_addr[0] ^ word_dst_addr[0]) ^
786                         (word_src_addr[1] ^ word_dst_addr[1]) ^
787                         (word_src_addr[2] ^ word_dst_addr[2]) ^
788                         (word_src_addr[3] ^ word_dst_addr[3]);
789 }
790
791 uint16_t
792 xmit_l2_hash(const struct rte_mbuf *buf, uint8_t slave_count)
793 {
794         struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
795
796         uint32_t hash = ether_hash(eth_hdr);
797
798         return (hash ^= hash >> 8) % slave_count;
799 }
800
801 uint16_t
802 xmit_l23_hash(const struct rte_mbuf *buf, uint8_t slave_count)
803 {
804         struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
805         uint16_t proto = eth_hdr->ether_type;
806         size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
807         uint32_t hash, l3hash = 0;
808
809         hash = ether_hash(eth_hdr);
810
811         if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
812                 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
813                                 ((char *)(eth_hdr + 1) + vlan_offset);
814                 l3hash = ipv4_hash(ipv4_hdr);
815
816         } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
817                 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
818                                 ((char *)(eth_hdr + 1) + vlan_offset);
819                 l3hash = ipv6_hash(ipv6_hdr);
820         }
821
822         hash = hash ^ l3hash;
823         hash ^= hash >> 16;
824         hash ^= hash >> 8;
825
826         return hash % slave_count;
827 }
828
829 uint16_t
830 xmit_l34_hash(const struct rte_mbuf *buf, uint8_t slave_count)
831 {
832         struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
833         uint16_t proto = eth_hdr->ether_type;
834         size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
835
836         struct udp_hdr *udp_hdr = NULL;
837         struct tcp_hdr *tcp_hdr = NULL;
838         uint32_t hash, l3hash = 0, l4hash = 0;
839
840         if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
841                 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
842                                 ((char *)(eth_hdr + 1) + vlan_offset);
843                 size_t ip_hdr_offset;
844
845                 l3hash = ipv4_hash(ipv4_hdr);
846
847                 /* there is no L4 header in fragmented packet */
848                 if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr) == 0)) {
849                         ip_hdr_offset = (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) *
850                                         IPV4_IHL_MULTIPLIER;
851
852                         if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
853                                 tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr +
854                                                 ip_hdr_offset);
855                                 l4hash = HASH_L4_PORTS(tcp_hdr);
856                         } else if (ipv4_hdr->next_proto_id == IPPROTO_UDP) {
857                                 udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr +
858                                                 ip_hdr_offset);
859                                 l4hash = HASH_L4_PORTS(udp_hdr);
860                         }
861                 }
862         } else if  (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
863                 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
864                                 ((char *)(eth_hdr + 1) + vlan_offset);
865                 l3hash = ipv6_hash(ipv6_hdr);
866
867                 if (ipv6_hdr->proto == IPPROTO_TCP) {
868                         tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
869                         l4hash = HASH_L4_PORTS(tcp_hdr);
870                 } else if (ipv6_hdr->proto == IPPROTO_UDP) {
871                         udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
872                         l4hash = HASH_L4_PORTS(udp_hdr);
873                 }
874         }
875
876         hash = l3hash ^ l4hash;
877         hash ^= hash >> 16;
878         hash ^= hash >> 8;
879
880         return hash % slave_count;
881 }
882
883 struct bwg_slave {
884         uint64_t bwg_left_int;
885         uint64_t bwg_left_remainder;
886         uint8_t slave;
887 };
888
889 void
890 bond_tlb_activate_slave(struct bond_dev_private *internals) {
891         int i;
892
893         for (i = 0; i < internals->active_slave_count; i++) {
894                 tlb_last_obytets[internals->active_slaves[i]] = 0;
895         }
896 }
897
898 static int
899 bandwidth_cmp(const void *a, const void *b)
900 {
901         const struct bwg_slave *bwg_a = a;
902         const struct bwg_slave *bwg_b = b;
903         int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
904         int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
905                         (int64_t)bwg_a->bwg_left_remainder;
906         if (diff > 0)
907                 return 1;
908         else if (diff < 0)
909                 return -1;
910         else if (diff2 > 0)
911                 return 1;
912         else if (diff2 < 0)
913                 return -1;
914         else
915                 return 0;
916 }
917
918 static void
919 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
920                 struct bwg_slave *bwg_slave)
921 {
922         struct rte_eth_link link_status;
923
924         rte_eth_link_get_nowait(port_id, &link_status);
925         uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
926         if (link_bwg == 0)
927                 return;
928         link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
929         bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
930         bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
931 }
932
933 static void
934 bond_ethdev_update_tlb_slave_cb(void *arg)
935 {
936         struct bond_dev_private *internals = arg;
937         struct rte_eth_stats slave_stats;
938         struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
939         uint8_t slave_count;
940         uint64_t tx_bytes;
941
942         uint8_t update_stats = 0;
943         uint8_t i, slave_id;
944
945         internals->slave_update_idx++;
946
947
948         if (internals->slave_update_idx >= REORDER_PERIOD_MS)
949                 update_stats = 1;
950
951         for (i = 0; i < internals->active_slave_count; i++) {
952                 slave_id = internals->active_slaves[i];
953                 rte_eth_stats_get(slave_id, &slave_stats);
954                 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
955                 bandwidth_left(slave_id, tx_bytes,
956                                 internals->slave_update_idx, &bwg_array[i]);
957                 bwg_array[i].slave = slave_id;
958
959                 if (update_stats) {
960                         tlb_last_obytets[slave_id] = slave_stats.obytes;
961                 }
962         }
963
964         if (update_stats == 1)
965                 internals->slave_update_idx = 0;
966
967         slave_count = i;
968         qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
969         for (i = 0; i < slave_count; i++)
970                 internals->tlb_slaves_order[i] = bwg_array[i].slave;
971
972         rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
973                         (struct bond_dev_private *)internals);
974 }
975
976 static uint16_t
977 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
978 {
979         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
980         struct bond_dev_private *internals = bd_tx_q->dev_private;
981
982         struct rte_eth_dev *primary_port =
983                         &rte_eth_devices[internals->primary_port];
984         uint16_t num_tx_total = 0;
985         uint16_t i, j;
986
987         uint16_t num_of_slaves = internals->active_slave_count;
988         uint16_t slaves[RTE_MAX_ETHPORTS];
989
990         struct ether_hdr *ether_hdr;
991         struct ether_addr primary_slave_addr;
992         struct ether_addr active_slave_addr;
993
994         if (num_of_slaves < 1)
995                 return num_tx_total;
996
997         memcpy(slaves, internals->tlb_slaves_order,
998                                 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
999
1000
1001         ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
1002
1003         if (nb_pkts > 3) {
1004                 for (i = 0; i < 3; i++)
1005                         rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
1006         }
1007
1008         for (i = 0; i < num_of_slaves; i++) {
1009                 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
1010                 for (j = num_tx_total; j < nb_pkts; j++) {
1011                         if (j + 3 < nb_pkts)
1012                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
1013
1014                         ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
1015                         if (is_same_ether_addr(&ether_hdr->s_addr, &primary_slave_addr))
1016                                 ether_addr_copy(&active_slave_addr, &ether_hdr->s_addr);
1017 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1018                                         mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
1019 #endif
1020                 }
1021
1022                 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1023                                 bufs + num_tx_total, nb_pkts - num_tx_total);
1024
1025                 if (num_tx_total == nb_pkts)
1026                         break;
1027         }
1028
1029         return num_tx_total;
1030 }
1031
1032 void
1033 bond_tlb_disable(struct bond_dev_private *internals)
1034 {
1035         rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
1036 }
1037
1038 void
1039 bond_tlb_enable(struct bond_dev_private *internals)
1040 {
1041         bond_ethdev_update_tlb_slave_cb(internals);
1042 }
1043
1044 static uint16_t
1045 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1046 {
1047         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1048         struct bond_dev_private *internals = bd_tx_q->dev_private;
1049
1050         struct ether_hdr *eth_h;
1051         uint16_t ether_type, offset;
1052
1053         struct client_data *client_info;
1054
1055         /*
1056          * We create transmit buffers for every slave and one additional to send
1057          * through tlb. In worst case every packet will be send on one port.
1058          */
1059         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
1060         uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
1061
1062         /*
1063          * We create separate transmit buffers for update packets as they won't
1064          * be counted in num_tx_total.
1065          */
1066         struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1067         uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1068
1069         struct rte_mbuf *upd_pkt;
1070         size_t pkt_size;
1071
1072         uint16_t num_send, num_not_send = 0;
1073         uint16_t num_tx_total = 0;
1074         uint16_t slave_idx;
1075
1076         int i, j;
1077
1078         /* Search tx buffer for ARP packets and forward them to alb */
1079         for (i = 0; i < nb_pkts; i++) {
1080                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
1081                 ether_type = eth_h->ether_type;
1082                 offset = get_vlan_offset(eth_h, &ether_type);
1083
1084                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
1085                         slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1086
1087                         /* Change src mac in eth header */
1088                         rte_eth_macaddr_get(slave_idx, &eth_h->s_addr);
1089
1090                         /* Add packet to slave tx buffer */
1091                         slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1092                         slave_bufs_pkts[slave_idx]++;
1093                 } else {
1094                         /* If packet is not ARP, send it with TLB policy */
1095                         slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1096                                         bufs[i];
1097                         slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1098                 }
1099         }
1100
1101         /* Update connected client ARP tables */
1102         if (internals->mode6.ntt) {
1103                 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1104                         client_info = &internals->mode6.client_table[i];
1105
1106                         if (client_info->in_use) {
1107                                 /* Allocate new packet to send ARP update on current slave */
1108                                 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1109                                 if (upd_pkt == NULL) {
1110                                         RTE_LOG(ERR, PMD, "Failed to allocate ARP packet from pool\n");
1111                                         continue;
1112                                 }
1113                                 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
1114                                                 + client_info->vlan_count * sizeof(struct vlan_hdr);
1115                                 upd_pkt->data_len = pkt_size;
1116                                 upd_pkt->pkt_len = pkt_size;
1117
1118                                 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1119                                                 internals);
1120
1121                                 /* Add packet to update tx buffer */
1122                                 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1123                                 update_bufs_pkts[slave_idx]++;
1124                         }
1125                 }
1126                 internals->mode6.ntt = 0;
1127         }
1128
1129         /* Send ARP packets on proper slaves */
1130         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1131                 if (slave_bufs_pkts[i] > 0) {
1132                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1133                                         slave_bufs[i], slave_bufs_pkts[i]);
1134                         for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1135                                 bufs[nb_pkts - 1 - num_not_send - j] =
1136                                                 slave_bufs[i][nb_pkts - 1 - j];
1137                         }
1138
1139                         num_tx_total += num_send;
1140                         num_not_send += slave_bufs_pkts[i] - num_send;
1141
1142 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1143         /* Print TX stats including update packets */
1144                         for (j = 0; j < slave_bufs_pkts[i]; j++) {
1145                                 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
1146                                 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1147                         }
1148 #endif
1149                 }
1150         }
1151
1152         /* Send update packets on proper slaves */
1153         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1154                 if (update_bufs_pkts[i] > 0) {
1155                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1156                                         update_bufs_pkts[i]);
1157                         for (j = num_send; j < update_bufs_pkts[i]; j++) {
1158                                 rte_pktmbuf_free(update_bufs[i][j]);
1159                         }
1160 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1161                         for (j = 0; j < update_bufs_pkts[i]; j++) {
1162                                 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
1163                                 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1164                         }
1165 #endif
1166                 }
1167         }
1168
1169         /* Send non-ARP packets using tlb policy */
1170         if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1171                 num_send = bond_ethdev_tx_burst_tlb(queue,
1172                                 slave_bufs[RTE_MAX_ETHPORTS],
1173                                 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1174
1175                 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1176                         bufs[nb_pkts - 1 - num_not_send - j] =
1177                                         slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1178                 }
1179
1180                 num_tx_total += num_send;
1181         }
1182
1183         return num_tx_total;
1184 }
1185
1186 static uint16_t
1187 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1188                 uint16_t nb_pkts)
1189 {
1190         struct bond_dev_private *internals;
1191         struct bond_tx_queue *bd_tx_q;
1192
1193         uint16_t num_of_slaves;
1194         uint16_t slaves[RTE_MAX_ETHPORTS];
1195
1196         uint16_t num_tx_total = 0, num_tx_slave = 0, tx_fail_total = 0;
1197
1198         int i, op_slave_id;
1199
1200         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
1201         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
1202
1203         bd_tx_q = (struct bond_tx_queue *)queue;
1204         internals = bd_tx_q->dev_private;
1205
1206         /* Copy slave list to protect against slave up/down changes during tx
1207          * bursting */
1208         num_of_slaves = internals->active_slave_count;
1209         memcpy(slaves, internals->active_slaves,
1210                         sizeof(internals->active_slaves[0]) * num_of_slaves);
1211
1212         if (num_of_slaves < 1)
1213                 return num_tx_total;
1214
1215         /* Populate slaves mbuf with the packets which are to be sent on it  */
1216         for (i = 0; i < nb_pkts; i++) {
1217                 /* Select output slave using hash based on xmit policy */
1218                 op_slave_id = internals->xmit_hash(bufs[i], num_of_slaves);
1219
1220                 /* Populate slave mbuf arrays with mbufs for that slave */
1221                 slave_bufs[op_slave_id][slave_nb_pkts[op_slave_id]++] = bufs[i];
1222         }
1223
1224         /* Send packet burst on each slave device */
1225         for (i = 0; i < num_of_slaves; i++) {
1226                 if (slave_nb_pkts[i] > 0) {
1227                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1228                                         slave_bufs[i], slave_nb_pkts[i]);
1229
1230                         /* if tx burst fails move packets to end of bufs */
1231                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
1232                                 int slave_tx_fail_count = slave_nb_pkts[i] - num_tx_slave;
1233
1234                                 tx_fail_total += slave_tx_fail_count;
1235                                 memcpy(&bufs[nb_pkts - tx_fail_total],
1236                                                 &slave_bufs[i][num_tx_slave],
1237                                                 slave_tx_fail_count * sizeof(bufs[0]));
1238                         }
1239
1240                         num_tx_total += num_tx_slave;
1241                 }
1242         }
1243
1244         return num_tx_total;
1245 }
1246
1247 static uint16_t
1248 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1249                 uint16_t nb_pkts)
1250 {
1251         struct bond_dev_private *internals;
1252         struct bond_tx_queue *bd_tx_q;
1253
1254         uint16_t num_of_slaves;
1255         uint16_t slaves[RTE_MAX_ETHPORTS];
1256          /* positions in slaves, not ID */
1257         uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
1258         uint8_t distributing_count;
1259
1260         uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
1261         uint16_t i, j, op_slave_idx;
1262         const uint16_t buffs_size = nb_pkts + BOND_MODE_8023AX_SLAVE_TX_PKTS + 1;
1263
1264         /* Allocate additional packets in case 8023AD mode. */
1265         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][buffs_size];
1266         void *slow_pkts[BOND_MODE_8023AX_SLAVE_TX_PKTS] = { NULL };
1267
1268         /* Total amount of packets in slave_bufs */
1269         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
1270         /* Slow packets placed in each slave */
1271         uint8_t slave_slow_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
1272
1273         bd_tx_q = (struct bond_tx_queue *)queue;
1274         internals = bd_tx_q->dev_private;
1275
1276         /* Copy slave list to protect against slave up/down changes during tx
1277          * bursting */
1278         num_of_slaves = internals->active_slave_count;
1279         if (num_of_slaves < 1)
1280                 return num_tx_total;
1281
1282         memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) * num_of_slaves);
1283
1284         distributing_count = 0;
1285         for (i = 0; i < num_of_slaves; i++) {
1286                 struct port *port = &mode_8023ad_ports[slaves[i]];
1287
1288                 slave_slow_nb_pkts[i] = rte_ring_dequeue_burst(port->tx_ring,
1289                                 slow_pkts, BOND_MODE_8023AX_SLAVE_TX_PKTS,
1290                                 NULL);
1291                 slave_nb_pkts[i] = slave_slow_nb_pkts[i];
1292
1293                 for (j = 0; j < slave_slow_nb_pkts[i]; j++)
1294                         slave_bufs[i][j] = slow_pkts[j];
1295
1296                 if (ACTOR_STATE(port, DISTRIBUTING))
1297                         distributing_offsets[distributing_count++] = i;
1298         }
1299
1300         if (likely(distributing_count > 0)) {
1301                 /* Populate slaves mbuf with the packets which are to be sent on it */
1302                 for (i = 0; i < nb_pkts; i++) {
1303                         /* Select output slave using hash based on xmit policy */
1304                         op_slave_idx = internals->xmit_hash(bufs[i], distributing_count);
1305
1306                         /* Populate slave mbuf arrays with mbufs for that slave. Use only
1307                          * slaves that are currently distributing. */
1308                         uint8_t slave_offset = distributing_offsets[op_slave_idx];
1309                         slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] = bufs[i];
1310                         slave_nb_pkts[slave_offset]++;
1311                 }
1312         }
1313
1314         /* Send packet burst on each slave device */
1315         for (i = 0; i < num_of_slaves; i++) {
1316                 if (slave_nb_pkts[i] == 0)
1317                         continue;
1318
1319                 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1320                                 slave_bufs[i], slave_nb_pkts[i]);
1321
1322                 /* If tx burst fails drop slow packets */
1323                 for ( ; num_tx_slave < slave_slow_nb_pkts[i]; num_tx_slave++)
1324                         rte_pktmbuf_free(slave_bufs[i][num_tx_slave]);
1325
1326                 num_tx_total += num_tx_slave - slave_slow_nb_pkts[i];
1327                 num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
1328
1329                 /* If tx burst fails move packets to end of bufs */
1330                 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
1331                         uint16_t j = nb_pkts - num_tx_fail_total;
1332                         for ( ; num_tx_slave < slave_nb_pkts[i]; j++, num_tx_slave++)
1333                                 bufs[j] = slave_bufs[i][num_tx_slave];
1334                 }
1335         }
1336
1337         return num_tx_total;
1338 }
1339
1340 static uint16_t
1341 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1342                 uint16_t nb_pkts)
1343 {
1344         struct bond_dev_private *internals;
1345         struct bond_tx_queue *bd_tx_q;
1346
1347         uint8_t tx_failed_flag = 0, num_of_slaves;
1348         uint16_t slaves[RTE_MAX_ETHPORTS];
1349
1350         uint16_t max_nb_of_tx_pkts = 0;
1351
1352         int slave_tx_total[RTE_MAX_ETHPORTS];
1353         int i, most_successful_tx_slave = -1;
1354
1355         bd_tx_q = (struct bond_tx_queue *)queue;
1356         internals = bd_tx_q->dev_private;
1357
1358         /* Copy slave list to protect against slave up/down changes during tx
1359          * bursting */
1360         num_of_slaves = internals->active_slave_count;
1361         memcpy(slaves, internals->active_slaves,
1362                         sizeof(internals->active_slaves[0]) * num_of_slaves);
1363
1364         if (num_of_slaves < 1)
1365                 return 0;
1366
1367         /* Increment reference count on mbufs */
1368         for (i = 0; i < nb_pkts; i++)
1369                 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1370
1371         /* Transmit burst on each active slave */
1372         for (i = 0; i < num_of_slaves; i++) {
1373                 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1374                                         bufs, nb_pkts);
1375
1376                 if (unlikely(slave_tx_total[i] < nb_pkts))
1377                         tx_failed_flag = 1;
1378
1379                 /* record the value and slave index for the slave which transmits the
1380                  * maximum number of packets */
1381                 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1382                         max_nb_of_tx_pkts = slave_tx_total[i];
1383                         most_successful_tx_slave = i;
1384                 }
1385         }
1386
1387         /* if slaves fail to transmit packets from burst, the calling application
1388          * is not expected to know about multiple references to packets so we must
1389          * handle failures of all packets except those of the most successful slave
1390          */
1391         if (unlikely(tx_failed_flag))
1392                 for (i = 0; i < num_of_slaves; i++)
1393                         if (i != most_successful_tx_slave)
1394                                 while (slave_tx_total[i] < nb_pkts)
1395                                         rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1396
1397         return max_nb_of_tx_pkts;
1398 }
1399
1400 void
1401 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1402 {
1403         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1404
1405         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1406                 /**
1407                  * If in mode 4 then save the link properties of the first
1408                  * slave, all subsequent slaves must match these properties
1409                  */
1410                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1411
1412                 bond_link->link_autoneg = slave_link->link_autoneg;
1413                 bond_link->link_duplex = slave_link->link_duplex;
1414                 bond_link->link_speed = slave_link->link_speed;
1415         } else {
1416                 /**
1417                  * In any other mode the link properties are set to default
1418                  * values of AUTONEG/DUPLEX
1419                  */
1420                 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1421                 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1422         }
1423 }
1424
1425 int
1426 link_properties_valid(struct rte_eth_dev *ethdev,
1427                 struct rte_eth_link *slave_link)
1428 {
1429         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1430
1431         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1432                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1433
1434                 if (bond_link->link_duplex != slave_link->link_duplex ||
1435                         bond_link->link_autoneg != slave_link->link_autoneg ||
1436                         bond_link->link_speed != slave_link->link_speed)
1437                         return -1;
1438         }
1439
1440         return 0;
1441 }
1442
1443 int
1444 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1445 {
1446         struct ether_addr *mac_addr;
1447
1448         if (eth_dev == NULL) {
1449                 RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__);
1450                 return -1;
1451         }
1452
1453         if (dst_mac_addr == NULL) {
1454                 RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__);
1455                 return -1;
1456         }
1457
1458         mac_addr = eth_dev->data->mac_addrs;
1459
1460         ether_addr_copy(mac_addr, dst_mac_addr);
1461         return 0;
1462 }
1463
1464 int
1465 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1466 {
1467         struct ether_addr *mac_addr;
1468
1469         if (eth_dev == NULL) {
1470                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1471                 return -1;
1472         }
1473
1474         if (new_mac_addr == NULL) {
1475                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1476                 return -1;
1477         }
1478
1479         mac_addr = eth_dev->data->mac_addrs;
1480
1481         /* If new MAC is different to current MAC then update */
1482         if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1483                 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1484
1485         return 0;
1486 }
1487
1488 int
1489 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1490 {
1491         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1492         int i;
1493
1494         /* Update slave devices MAC addresses */
1495         if (internals->slave_count < 1)
1496                 return -1;
1497
1498         switch (internals->mode) {
1499         case BONDING_MODE_ROUND_ROBIN:
1500         case BONDING_MODE_BALANCE:
1501         case BONDING_MODE_BROADCAST:
1502                 for (i = 0; i < internals->slave_count; i++) {
1503                         if (rte_eth_dev_default_mac_addr_set(
1504                                         internals->slaves[i].port_id,
1505                                         bonded_eth_dev->data->mac_addrs)) {
1506                                 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1507                                                 internals->slaves[i].port_id);
1508                                 return -1;
1509                         }
1510                 }
1511                 break;
1512         case BONDING_MODE_8023AD:
1513                 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1514                 break;
1515         case BONDING_MODE_ACTIVE_BACKUP:
1516         case BONDING_MODE_TLB:
1517         case BONDING_MODE_ALB:
1518         default:
1519                 for (i = 0; i < internals->slave_count; i++) {
1520                         if (internals->slaves[i].port_id ==
1521                                         internals->current_primary_port) {
1522                                 if (rte_eth_dev_default_mac_addr_set(
1523                                                 internals->primary_port,
1524                                                 bonded_eth_dev->data->mac_addrs)) {
1525                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1526                                                         internals->current_primary_port);
1527                                         return -1;
1528                                 }
1529                         } else {
1530                                 if (rte_eth_dev_default_mac_addr_set(
1531                                                 internals->slaves[i].port_id,
1532                                                 &internals->slaves[i].persisted_mac_addr)) {
1533                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1534                                                         internals->slaves[i].port_id);
1535                                         return -1;
1536                                 }
1537                         }
1538                 }
1539         }
1540
1541         return 0;
1542 }
1543
1544 int
1545 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1546 {
1547         struct bond_dev_private *internals;
1548
1549         internals = eth_dev->data->dev_private;
1550
1551         switch (mode) {
1552         case BONDING_MODE_ROUND_ROBIN:
1553                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1554                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1555                 break;
1556         case BONDING_MODE_ACTIVE_BACKUP:
1557                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1558                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1559                 break;
1560         case BONDING_MODE_BALANCE:
1561                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1562                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1563                 break;
1564         case BONDING_MODE_BROADCAST:
1565                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1566                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1567                 break;
1568         case BONDING_MODE_8023AD:
1569                 if (bond_mode_8023ad_enable(eth_dev) != 0)
1570                         return -1;
1571
1572                 if (internals->mode4.dedicated_queues.enabled == 0) {
1573                         eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1574                         eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1575                         RTE_LOG(WARNING, PMD,
1576                                 "Using mode 4, it is necessary to do TX burst "
1577                                 "and RX burst at least every 100ms.\n");
1578                 } else {
1579                         /* Use flow director's optimization */
1580                         eth_dev->rx_pkt_burst =
1581                                         bond_ethdev_rx_burst_8023ad_fast_queue;
1582                         eth_dev->tx_pkt_burst =
1583                                         bond_ethdev_tx_burst_8023ad_fast_queue;
1584                 }
1585                 break;
1586         case BONDING_MODE_TLB:
1587                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1588                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1589                 break;
1590         case BONDING_MODE_ALB:
1591                 if (bond_mode_alb_enable(eth_dev) != 0)
1592                         return -1;
1593
1594                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1595                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1596                 break;
1597         default:
1598                 return -1;
1599         }
1600
1601         internals->mode = mode;
1602
1603         return 0;
1604 }
1605
1606
1607 static int
1608 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1609                 struct rte_eth_dev *slave_eth_dev)
1610 {
1611         int errval = 0;
1612         struct bond_dev_private *internals = (struct bond_dev_private *)
1613                 bonded_eth_dev->data->dev_private;
1614         struct port *port = &mode_8023ad_ports[slave_eth_dev->data->port_id];
1615
1616         if (port->slow_pool == NULL) {
1617                 char mem_name[256];
1618                 int slave_id = slave_eth_dev->data->port_id;
1619
1620                 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1621                                 slave_id);
1622                 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1623                         250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1624                         slave_eth_dev->data->numa_node);
1625
1626                 /* Any memory allocation failure in initialization is critical because
1627                  * resources can't be free, so reinitialization is impossible. */
1628                 if (port->slow_pool == NULL) {
1629                         rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1630                                 slave_id, mem_name, rte_strerror(rte_errno));
1631                 }
1632         }
1633
1634         if (internals->mode4.dedicated_queues.enabled == 1) {
1635                 /* Configure slow Rx queue */
1636
1637                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1638                                 internals->mode4.dedicated_queues.rx_qid, 128,
1639                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1640                                 NULL, port->slow_pool);
1641                 if (errval != 0) {
1642                         RTE_BOND_LOG(ERR,
1643                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1644                                         slave_eth_dev->data->port_id,
1645                                         internals->mode4.dedicated_queues.rx_qid,
1646                                         errval);
1647                         return errval;
1648                 }
1649
1650                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1651                                 internals->mode4.dedicated_queues.tx_qid, 512,
1652                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1653                                 NULL);
1654                 if (errval != 0) {
1655                         RTE_BOND_LOG(ERR,
1656                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1657                                 slave_eth_dev->data->port_id,
1658                                 internals->mode4.dedicated_queues.tx_qid,
1659                                 errval);
1660                         return errval;
1661                 }
1662         }
1663         return 0;
1664 }
1665
1666 int
1667 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1668                 struct rte_eth_dev *slave_eth_dev)
1669 {
1670         struct bond_rx_queue *bd_rx_q;
1671         struct bond_tx_queue *bd_tx_q;
1672         uint16_t nb_rx_queues;
1673         uint16_t nb_tx_queues;
1674
1675         int errval;
1676         uint16_t q_id;
1677         struct rte_flow_error flow_error;
1678
1679         struct bond_dev_private *internals = (struct bond_dev_private *)
1680                 bonded_eth_dev->data->dev_private;
1681
1682         /* Stop slave */
1683         rte_eth_dev_stop(slave_eth_dev->data->port_id);
1684
1685         /* Enable interrupts on slave device if supported */
1686         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1687                 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1688
1689         /* If RSS is enabled for bonding, try to enable it for slaves  */
1690         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1691                 if (bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len
1692                                 != 0) {
1693                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1694                                         bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
1695                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1696                                         bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
1697                 } else {
1698                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1699                 }
1700
1701                 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1702                                 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1703                 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1704                                 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1705         }
1706
1707         slave_eth_dev->data->dev_conf.rxmode.hw_vlan_filter =
1708                         bonded_eth_dev->data->dev_conf.rxmode.hw_vlan_filter;
1709
1710         nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1711         nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1712
1713         if (internals->mode == BONDING_MODE_8023AD) {
1714                 if (internals->mode4.dedicated_queues.enabled == 1) {
1715                         nb_rx_queues++;
1716                         nb_tx_queues++;
1717                 }
1718         }
1719
1720         /* Configure device */
1721         errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1722                         nb_rx_queues, nb_tx_queues,
1723                         &(slave_eth_dev->data->dev_conf));
1724         if (errval != 0) {
1725                 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u , err (%d)",
1726                                 slave_eth_dev->data->port_id, errval);
1727                 return errval;
1728         }
1729
1730         /* Setup Rx Queues */
1731         for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1732                 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1733
1734                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1735                                 bd_rx_q->nb_rx_desc,
1736                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1737                                 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1738                 if (errval != 0) {
1739                         RTE_BOND_LOG(ERR,
1740                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1741                                         slave_eth_dev->data->port_id, q_id, errval);
1742                         return errval;
1743                 }
1744         }
1745
1746         /* Setup Tx Queues */
1747         for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1748                 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1749
1750                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1751                                 bd_tx_q->nb_tx_desc,
1752                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1753                                 &bd_tx_q->tx_conf);
1754                 if (errval != 0) {
1755                         RTE_BOND_LOG(ERR,
1756                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1757                                 slave_eth_dev->data->port_id, q_id, errval);
1758                         return errval;
1759                 }
1760         }
1761
1762         if (internals->mode == BONDING_MODE_8023AD &&
1763                         internals->mode4.dedicated_queues.enabled == 1) {
1764                 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1765                                 != 0)
1766                         return errval;
1767
1768                 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1769                                 slave_eth_dev->data->port_id) != 0) {
1770                         RTE_BOND_LOG(ERR,
1771                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1772                                 slave_eth_dev->data->port_id, q_id, errval);
1773                         return -1;
1774                 }
1775
1776                 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1777                         rte_flow_destroy(slave_eth_dev->data->port_id,
1778                                         internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1779                                         &flow_error);
1780
1781                 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1782                                 slave_eth_dev->data->port_id);
1783         }
1784
1785         /* Start device */
1786         errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1787         if (errval != 0) {
1788                 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1789                                 slave_eth_dev->data->port_id, errval);
1790                 return -1;
1791         }
1792
1793         /* If RSS is enabled for bonding, synchronize RETA */
1794         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1795                 int i;
1796                 struct bond_dev_private *internals;
1797
1798                 internals = bonded_eth_dev->data->dev_private;
1799
1800                 for (i = 0; i < internals->slave_count; i++) {
1801                         if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1802                                 errval = rte_eth_dev_rss_reta_update(
1803                                                 slave_eth_dev->data->port_id,
1804                                                 &internals->reta_conf[0],
1805                                                 internals->slaves[i].reta_size);
1806                                 if (errval != 0) {
1807                                         RTE_LOG(WARNING, PMD,
1808                                                         "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1809                                                         " RSS Configuration for bonding may be inconsistent.\n",
1810                                                         slave_eth_dev->data->port_id, errval);
1811                                 }
1812                                 break;
1813                         }
1814                 }
1815         }
1816
1817         /* If lsc interrupt is set, check initial slave's link status */
1818         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1819                 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1820                 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1821                         RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1822                         NULL);
1823         }
1824
1825         return 0;
1826 }
1827
1828 void
1829 slave_remove(struct bond_dev_private *internals,
1830                 struct rte_eth_dev *slave_eth_dev)
1831 {
1832         uint8_t i;
1833
1834         for (i = 0; i < internals->slave_count; i++)
1835                 if (internals->slaves[i].port_id ==
1836                                 slave_eth_dev->data->port_id)
1837                         break;
1838
1839         if (i < (internals->slave_count - 1))
1840                 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1841                                 sizeof(internals->slaves[0]) *
1842                                 (internals->slave_count - i - 1));
1843
1844         internals->slave_count--;
1845
1846         /* force reconfiguration of slave interfaces */
1847         _rte_eth_dev_reset(slave_eth_dev);
1848 }
1849
1850 static void
1851 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1852
1853 void
1854 slave_add(struct bond_dev_private *internals,
1855                 struct rte_eth_dev *slave_eth_dev)
1856 {
1857         struct bond_slave_details *slave_details =
1858                         &internals->slaves[internals->slave_count];
1859
1860         slave_details->port_id = slave_eth_dev->data->port_id;
1861         slave_details->last_link_status = 0;
1862
1863         /* Mark slave devices that don't support interrupts so we can
1864          * compensate when we start the bond
1865          */
1866         if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1867                 slave_details->link_status_poll_enabled = 1;
1868         }
1869
1870         slave_details->link_status_wait_to_complete = 0;
1871         /* clean tlb_last_obytes when adding port for bonding device */
1872         memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1873                         sizeof(struct ether_addr));
1874 }
1875
1876 void
1877 bond_ethdev_primary_set(struct bond_dev_private *internals,
1878                 uint16_t slave_port_id)
1879 {
1880         int i;
1881
1882         if (internals->active_slave_count < 1)
1883                 internals->current_primary_port = slave_port_id;
1884         else
1885                 /* Search bonded device slave ports for new proposed primary port */
1886                 for (i = 0; i < internals->active_slave_count; i++) {
1887                         if (internals->active_slaves[i] == slave_port_id)
1888                                 internals->current_primary_port = slave_port_id;
1889                 }
1890 }
1891
1892 static void
1893 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
1894
1895 static int
1896 bond_ethdev_start(struct rte_eth_dev *eth_dev)
1897 {
1898         struct bond_dev_private *internals;
1899         int i;
1900
1901         /* slave eth dev will be started by bonded device */
1902         if (check_for_bonded_ethdev(eth_dev)) {
1903                 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
1904                                 eth_dev->data->port_id);
1905                 return -1;
1906         }
1907
1908         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
1909         eth_dev->data->dev_started = 1;
1910
1911         internals = eth_dev->data->dev_private;
1912
1913         if (internals->slave_count == 0) {
1914                 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
1915                 goto out_err;
1916         }
1917
1918         if (internals->user_defined_mac == 0) {
1919                 struct ether_addr *new_mac_addr = NULL;
1920
1921                 for (i = 0; i < internals->slave_count; i++)
1922                         if (internals->slaves[i].port_id == internals->primary_port)
1923                                 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
1924
1925                 if (new_mac_addr == NULL)
1926                         goto out_err;
1927
1928                 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
1929                         RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
1930                                         eth_dev->data->port_id);
1931                         goto out_err;
1932                 }
1933         }
1934
1935         /* Update all slave devices MACs*/
1936         if (mac_address_slaves_update(eth_dev) != 0)
1937                 goto out_err;
1938
1939         /* If bonded device is configure in promiscuous mode then re-apply config */
1940         if (internals->promiscuous_en)
1941                 bond_ethdev_promiscuous_enable(eth_dev);
1942
1943         if (internals->mode == BONDING_MODE_8023AD) {
1944                 if (internals->mode4.dedicated_queues.enabled == 1) {
1945                         internals->mode4.dedicated_queues.rx_qid =
1946                                         eth_dev->data->nb_rx_queues;
1947                         internals->mode4.dedicated_queues.tx_qid =
1948                                         eth_dev->data->nb_tx_queues;
1949                 }
1950         }
1951
1952
1953         /* Reconfigure each slave device if starting bonded device */
1954         for (i = 0; i < internals->slave_count; i++) {
1955                 struct rte_eth_dev *slave_ethdev =
1956                                 &(rte_eth_devices[internals->slaves[i].port_id]);
1957                 if (slave_configure(eth_dev, slave_ethdev) != 0) {
1958                         RTE_BOND_LOG(ERR,
1959                                 "bonded port (%d) failed to reconfigure slave device (%d)",
1960                                 eth_dev->data->port_id,
1961                                 internals->slaves[i].port_id);
1962                         goto out_err;
1963                 }
1964                 /* We will need to poll for link status if any slave doesn't
1965                  * support interrupts
1966                  */
1967                 if (internals->slaves[i].link_status_poll_enabled)
1968                         internals->link_status_polling_enabled = 1;
1969         }
1970
1971         /* start polling if needed */
1972         if (internals->link_status_polling_enabled) {
1973                 rte_eal_alarm_set(
1974                         internals->link_status_polling_interval_ms * 1000,
1975                         bond_ethdev_slave_link_status_change_monitor,
1976                         (void *)&rte_eth_devices[internals->port_id]);
1977         }
1978
1979         if (internals->user_defined_primary_port)
1980                 bond_ethdev_primary_set(internals, internals->primary_port);
1981
1982         if (internals->mode == BONDING_MODE_8023AD)
1983                 bond_mode_8023ad_start(eth_dev);
1984
1985         if (internals->mode == BONDING_MODE_TLB ||
1986                         internals->mode == BONDING_MODE_ALB)
1987                 bond_tlb_enable(internals);
1988
1989         return 0;
1990
1991 out_err:
1992         eth_dev->data->dev_started = 0;
1993         return -1;
1994 }
1995
1996 static void
1997 bond_ethdev_free_queues(struct rte_eth_dev *dev)
1998 {
1999         uint8_t i;
2000
2001         if (dev->data->rx_queues != NULL) {
2002                 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2003                         rte_free(dev->data->rx_queues[i]);
2004                         dev->data->rx_queues[i] = NULL;
2005                 }
2006                 dev->data->nb_rx_queues = 0;
2007         }
2008
2009         if (dev->data->tx_queues != NULL) {
2010                 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2011                         rte_free(dev->data->tx_queues[i]);
2012                         dev->data->tx_queues[i] = NULL;
2013                 }
2014                 dev->data->nb_tx_queues = 0;
2015         }
2016 }
2017
2018 void
2019 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2020 {
2021         struct bond_dev_private *internals = eth_dev->data->dev_private;
2022         uint8_t i;
2023
2024         if (internals->mode == BONDING_MODE_8023AD) {
2025                 struct port *port;
2026                 void *pkt = NULL;
2027
2028                 bond_mode_8023ad_stop(eth_dev);
2029
2030                 /* Discard all messages to/from mode 4 state machines */
2031                 for (i = 0; i < internals->active_slave_count; i++) {
2032                         port = &mode_8023ad_ports[internals->active_slaves[i]];
2033
2034                         RTE_ASSERT(port->rx_ring != NULL);
2035                         while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2036                                 rte_pktmbuf_free(pkt);
2037
2038                         RTE_ASSERT(port->tx_ring != NULL);
2039                         while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2040                                 rte_pktmbuf_free(pkt);
2041                 }
2042         }
2043
2044         if (internals->mode == BONDING_MODE_TLB ||
2045                         internals->mode == BONDING_MODE_ALB) {
2046                 bond_tlb_disable(internals);
2047                 for (i = 0; i < internals->active_slave_count; i++)
2048                         tlb_last_obytets[internals->active_slaves[i]] = 0;
2049         }
2050
2051         internals->active_slave_count = 0;
2052         internals->link_status_polling_enabled = 0;
2053         for (i = 0; i < internals->slave_count; i++)
2054                 internals->slaves[i].last_link_status = 0;
2055
2056         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2057         eth_dev->data->dev_started = 0;
2058 }
2059
2060 void
2061 bond_ethdev_close(struct rte_eth_dev *dev)
2062 {
2063         struct bond_dev_private *internals = dev->data->dev_private;
2064         uint8_t bond_port_id = internals->port_id;
2065         int skipped = 0;
2066
2067         RTE_LOG(INFO, EAL, "Closing bonded device %s\n", dev->device->name);
2068         while (internals->slave_count != skipped) {
2069                 uint16_t port_id = internals->slaves[skipped].port_id;
2070
2071                 rte_eth_dev_stop(port_id);
2072
2073                 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2074                         RTE_LOG(ERR, EAL,
2075                                 "Failed to remove port %d from bonded device "
2076                                 "%s\n", port_id, dev->device->name);
2077                         skipped++;
2078                 }
2079         }
2080         bond_ethdev_free_queues(dev);
2081         rte_bitmap_reset(internals->vlan_filter_bmp);
2082 }
2083
2084 /* forward declaration */
2085 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2086
2087 static void
2088 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2089 {
2090         struct bond_dev_private *internals = dev->data->dev_private;
2091
2092         uint16_t max_nb_rx_queues = UINT16_MAX;
2093         uint16_t max_nb_tx_queues = UINT16_MAX;
2094
2095         dev_info->max_mac_addrs = 1;
2096
2097         dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2098                         internals->candidate_max_rx_pktlen :
2099                         ETHER_MAX_JUMBO_FRAME_LEN;
2100
2101         /* Max number of tx/rx queues that the bonded device can support is the
2102          * minimum values of the bonded slaves, as all slaves must be capable
2103          * of supporting the same number of tx/rx queues.
2104          */
2105         if (internals->slave_count > 0) {
2106                 struct rte_eth_dev_info slave_info;
2107                 uint8_t idx;
2108
2109                 for (idx = 0; idx < internals->slave_count; idx++) {
2110                         rte_eth_dev_info_get(internals->slaves[idx].port_id,
2111                                         &slave_info);
2112
2113                         if (slave_info.max_rx_queues < max_nb_rx_queues)
2114                                 max_nb_rx_queues = slave_info.max_rx_queues;
2115
2116                         if (slave_info.max_tx_queues < max_nb_tx_queues)
2117                                 max_nb_tx_queues = slave_info.max_tx_queues;
2118                 }
2119         }
2120
2121         dev_info->max_rx_queues = max_nb_rx_queues;
2122         dev_info->max_tx_queues = max_nb_tx_queues;
2123
2124         /**
2125          * If dedicated hw queues enabled for link bonding device in LACP mode
2126          * then we need to reduce the maximum number of data path queues by 1.
2127          */
2128         if (internals->mode == BONDING_MODE_8023AD &&
2129                 internals->mode4.dedicated_queues.enabled == 1) {
2130                 dev_info->max_rx_queues--;
2131                 dev_info->max_tx_queues--;
2132         }
2133
2134         dev_info->min_rx_bufsize = 0;
2135
2136         dev_info->rx_offload_capa = internals->rx_offload_capa;
2137         dev_info->tx_offload_capa = internals->tx_offload_capa;
2138         dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2139
2140         dev_info->reta_size = internals->reta_size;
2141 }
2142
2143 static int
2144 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2145 {
2146         int res;
2147         uint16_t i;
2148         struct bond_dev_private *internals = dev->data->dev_private;
2149
2150         /* don't do this while a slave is being added */
2151         rte_spinlock_lock(&internals->lock);
2152
2153         if (on)
2154                 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2155         else
2156                 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2157
2158         for (i = 0; i < internals->slave_count; i++) {
2159                 uint16_t port_id = internals->slaves[i].port_id;
2160
2161                 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2162                 if (res == ENOTSUP)
2163                         RTE_LOG(WARNING, PMD,
2164                                 "Setting VLAN filter on slave port %u not supported.\n",
2165                                 port_id);
2166         }
2167
2168         rte_spinlock_unlock(&internals->lock);
2169         return 0;
2170 }
2171
2172 static int
2173 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2174                 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2175                 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2176 {
2177         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2178                         rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2179                                         0, dev->data->numa_node);
2180         if (bd_rx_q == NULL)
2181                 return -1;
2182
2183         bd_rx_q->queue_id = rx_queue_id;
2184         bd_rx_q->dev_private = dev->data->dev_private;
2185
2186         bd_rx_q->nb_rx_desc = nb_rx_desc;
2187
2188         memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2189         bd_rx_q->mb_pool = mb_pool;
2190
2191         dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2192
2193         return 0;
2194 }
2195
2196 static int
2197 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2198                 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2199                 const struct rte_eth_txconf *tx_conf)
2200 {
2201         struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
2202                         rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2203                                         0, dev->data->numa_node);
2204
2205         if (bd_tx_q == NULL)
2206                 return -1;
2207
2208         bd_tx_q->queue_id = tx_queue_id;
2209         bd_tx_q->dev_private = dev->data->dev_private;
2210
2211         bd_tx_q->nb_tx_desc = nb_tx_desc;
2212         memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2213
2214         dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2215
2216         return 0;
2217 }
2218
2219 static void
2220 bond_ethdev_rx_queue_release(void *queue)
2221 {
2222         if (queue == NULL)
2223                 return;
2224
2225         rte_free(queue);
2226 }
2227
2228 static void
2229 bond_ethdev_tx_queue_release(void *queue)
2230 {
2231         if (queue == NULL)
2232                 return;
2233
2234         rte_free(queue);
2235 }
2236
2237 static void
2238 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2239 {
2240         struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2241         struct bond_dev_private *internals;
2242
2243         /* Default value for polling slave found is true as we don't want to
2244          * disable the polling thread if we cannot get the lock */
2245         int i, polling_slave_found = 1;
2246
2247         if (cb_arg == NULL)
2248                 return;
2249
2250         bonded_ethdev = (struct rte_eth_dev *)cb_arg;
2251         internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
2252
2253         if (!bonded_ethdev->data->dev_started ||
2254                 !internals->link_status_polling_enabled)
2255                 return;
2256
2257         /* If device is currently being configured then don't check slaves link
2258          * status, wait until next period */
2259         if (rte_spinlock_trylock(&internals->lock)) {
2260                 if (internals->slave_count > 0)
2261                         polling_slave_found = 0;
2262
2263                 for (i = 0; i < internals->slave_count; i++) {
2264                         if (!internals->slaves[i].link_status_poll_enabled)
2265                                 continue;
2266
2267                         slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2268                         polling_slave_found = 1;
2269
2270                         /* Update slave link status */
2271                         (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2272                                         internals->slaves[i].link_status_wait_to_complete);
2273
2274                         /* if link status has changed since last checked then call lsc
2275                          * event callback */
2276                         if (slave_ethdev->data->dev_link.link_status !=
2277                                         internals->slaves[i].last_link_status) {
2278                                 internals->slaves[i].last_link_status =
2279                                                 slave_ethdev->data->dev_link.link_status;
2280
2281                                 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2282                                                 RTE_ETH_EVENT_INTR_LSC,
2283                                                 &bonded_ethdev->data->port_id,
2284                                                 NULL);
2285                         }
2286                 }
2287                 rte_spinlock_unlock(&internals->lock);
2288         }
2289
2290         if (polling_slave_found)
2291                 /* Set alarm to continue monitoring link status of slave ethdev's */
2292                 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2293                                 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2294 }
2295
2296 static int
2297 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2298 {
2299         void (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2300
2301         struct bond_dev_private *bond_ctx;
2302         struct rte_eth_link slave_link;
2303
2304         uint32_t idx;
2305
2306         bond_ctx = ethdev->data->dev_private;
2307
2308         ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2309
2310         if (ethdev->data->dev_started == 0 ||
2311                         bond_ctx->active_slave_count == 0) {
2312                 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2313                 return 0;
2314         }
2315
2316         ethdev->data->dev_link.link_status = ETH_LINK_UP;
2317
2318         if (wait_to_complete)
2319                 link_update = rte_eth_link_get;
2320         else
2321                 link_update = rte_eth_link_get_nowait;
2322
2323         switch (bond_ctx->mode) {
2324         case BONDING_MODE_BROADCAST:
2325                 /**
2326                  * Setting link speed to UINT32_MAX to ensure we pick up the
2327                  * value of the first active slave
2328                  */
2329                 ethdev->data->dev_link.link_speed = UINT32_MAX;
2330
2331                 /**
2332                  * link speed is minimum value of all the slaves link speed as
2333                  * packet loss will occur on this slave if transmission at rates
2334                  * greater than this are attempted
2335                  */
2336                 for (idx = 1; idx < bond_ctx->active_slave_count; idx++) {
2337                         link_update(bond_ctx->active_slaves[0], &slave_link);
2338
2339                         if (slave_link.link_speed <
2340                                         ethdev->data->dev_link.link_speed)
2341                                 ethdev->data->dev_link.link_speed =
2342                                                 slave_link.link_speed;
2343                 }
2344                 break;
2345         case BONDING_MODE_ACTIVE_BACKUP:
2346                 /* Current primary slave */
2347                 link_update(bond_ctx->current_primary_port, &slave_link);
2348
2349                 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2350                 break;
2351         case BONDING_MODE_8023AD:
2352                 ethdev->data->dev_link.link_autoneg =
2353                                 bond_ctx->mode4.slave_link.link_autoneg;
2354                 ethdev->data->dev_link.link_duplex =
2355                                 bond_ctx->mode4.slave_link.link_duplex;
2356                 /* fall through to update link speed */
2357         case BONDING_MODE_ROUND_ROBIN:
2358         case BONDING_MODE_BALANCE:
2359         case BONDING_MODE_TLB:
2360         case BONDING_MODE_ALB:
2361         default:
2362                 /**
2363                  * In theses mode the maximum theoretical link speed is the sum
2364                  * of all the slaves
2365                  */
2366                 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2367
2368                 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2369                         link_update(bond_ctx->active_slaves[idx], &slave_link);
2370
2371                         ethdev->data->dev_link.link_speed +=
2372                                         slave_link.link_speed;
2373                 }
2374         }
2375
2376
2377         return 0;
2378 }
2379
2380
2381 static int
2382 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2383 {
2384         struct bond_dev_private *internals = dev->data->dev_private;
2385         struct rte_eth_stats slave_stats;
2386         int i, j;
2387
2388         for (i = 0; i < internals->slave_count; i++) {
2389                 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2390
2391                 stats->ipackets += slave_stats.ipackets;
2392                 stats->opackets += slave_stats.opackets;
2393                 stats->ibytes += slave_stats.ibytes;
2394                 stats->obytes += slave_stats.obytes;
2395                 stats->imissed += slave_stats.imissed;
2396                 stats->ierrors += slave_stats.ierrors;
2397                 stats->oerrors += slave_stats.oerrors;
2398                 stats->rx_nombuf += slave_stats.rx_nombuf;
2399
2400                 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2401                         stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2402                         stats->q_opackets[j] += slave_stats.q_opackets[j];
2403                         stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2404                         stats->q_obytes[j] += slave_stats.q_obytes[j];
2405                         stats->q_errors[j] += slave_stats.q_errors[j];
2406                 }
2407
2408         }
2409
2410         return 0;
2411 }
2412
2413 static void
2414 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2415 {
2416         struct bond_dev_private *internals = dev->data->dev_private;
2417         int i;
2418
2419         for (i = 0; i < internals->slave_count; i++)
2420                 rte_eth_stats_reset(internals->slaves[i].port_id);
2421 }
2422
2423 static void
2424 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2425 {
2426         struct bond_dev_private *internals = eth_dev->data->dev_private;
2427         int i;
2428
2429         internals->promiscuous_en = 1;
2430
2431         switch (internals->mode) {
2432         /* Promiscuous mode is propagated to all slaves */
2433         case BONDING_MODE_ROUND_ROBIN:
2434         case BONDING_MODE_BALANCE:
2435         case BONDING_MODE_BROADCAST:
2436                 for (i = 0; i < internals->slave_count; i++)
2437                         rte_eth_promiscuous_enable(internals->slaves[i].port_id);
2438                 break;
2439         /* In mode4 promiscus mode is managed when slave is added/removed */
2440         case BONDING_MODE_8023AD:
2441                 break;
2442         /* Promiscuous mode is propagated only to primary slave */
2443         case BONDING_MODE_ACTIVE_BACKUP:
2444         case BONDING_MODE_TLB:
2445         case BONDING_MODE_ALB:
2446         default:
2447                 rte_eth_promiscuous_enable(internals->current_primary_port);
2448         }
2449 }
2450
2451 static void
2452 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2453 {
2454         struct bond_dev_private *internals = dev->data->dev_private;
2455         int i;
2456
2457         internals->promiscuous_en = 0;
2458
2459         switch (internals->mode) {
2460         /* Promiscuous mode is propagated to all slaves */
2461         case BONDING_MODE_ROUND_ROBIN:
2462         case BONDING_MODE_BALANCE:
2463         case BONDING_MODE_BROADCAST:
2464                 for (i = 0; i < internals->slave_count; i++)
2465                         rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2466                 break;
2467         /* In mode4 promiscus mode is set managed when slave is added/removed */
2468         case BONDING_MODE_8023AD:
2469                 break;
2470         /* Promiscuous mode is propagated only to primary slave */
2471         case BONDING_MODE_ACTIVE_BACKUP:
2472         case BONDING_MODE_TLB:
2473         case BONDING_MODE_ALB:
2474         default:
2475                 rte_eth_promiscuous_disable(internals->current_primary_port);
2476         }
2477 }
2478
2479 static void
2480 bond_ethdev_delayed_lsc_propagation(void *arg)
2481 {
2482         if (arg == NULL)
2483                 return;
2484
2485         _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2486                         RTE_ETH_EVENT_INTR_LSC, NULL, NULL);
2487 }
2488
2489 int
2490 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2491                 void *param, void *ret_param __rte_unused)
2492 {
2493         struct rte_eth_dev *bonded_eth_dev;
2494         struct bond_dev_private *internals;
2495         struct rte_eth_link link;
2496         int rc = -1;
2497
2498         int i, valid_slave = 0;
2499         uint8_t active_pos;
2500         uint8_t lsc_flag = 0;
2501
2502         if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2503                 return rc;
2504
2505         bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
2506
2507         if (check_for_bonded_ethdev(bonded_eth_dev))
2508                 return rc;
2509
2510         internals = bonded_eth_dev->data->dev_private;
2511
2512         /* If the device isn't started don't handle interrupts */
2513         if (!bonded_eth_dev->data->dev_started)
2514                 return rc;
2515
2516         /* verify that port_id is a valid slave of bonded port */
2517         for (i = 0; i < internals->slave_count; i++) {
2518                 if (internals->slaves[i].port_id == port_id) {
2519                         valid_slave = 1;
2520                         break;
2521                 }
2522         }
2523
2524         if (!valid_slave)
2525                 return rc;
2526
2527         /* Synchronize lsc callback parallel calls either by real link event
2528          * from the slaves PMDs or by the bonding PMD itself.
2529          */
2530         rte_spinlock_lock(&internals->lsc_lock);
2531
2532         /* Search for port in active port list */
2533         active_pos = find_slave_by_id(internals->active_slaves,
2534                         internals->active_slave_count, port_id);
2535
2536         rte_eth_link_get_nowait(port_id, &link);
2537         if (link.link_status) {
2538                 if (active_pos < internals->active_slave_count) {
2539                         rte_spinlock_unlock(&internals->lsc_lock);
2540                         return rc;
2541                 }
2542
2543                 /* if no active slave ports then set this port to be primary port */
2544                 if (internals->active_slave_count < 1) {
2545                         /* If first active slave, then change link status */
2546                         bonded_eth_dev->data->dev_link.link_status = ETH_LINK_UP;
2547                         internals->current_primary_port = port_id;
2548                         lsc_flag = 1;
2549
2550                         mac_address_slaves_update(bonded_eth_dev);
2551                 }
2552
2553                 activate_slave(bonded_eth_dev, port_id);
2554
2555                 /* If user has defined the primary port then default to using it */
2556                 if (internals->user_defined_primary_port &&
2557                                 internals->primary_port == port_id)
2558                         bond_ethdev_primary_set(internals, port_id);
2559         } else {
2560                 if (active_pos == internals->active_slave_count) {
2561                         rte_spinlock_unlock(&internals->lsc_lock);
2562                         return rc;
2563                 }
2564
2565                 /* Remove from active slave list */
2566                 deactivate_slave(bonded_eth_dev, port_id);
2567
2568                 if (internals->active_slave_count < 1)
2569                         lsc_flag = 1;
2570
2571                 /* Update primary id, take first active slave from list or if none
2572                  * available set to -1 */
2573                 if (port_id == internals->current_primary_port) {
2574                         if (internals->active_slave_count > 0)
2575                                 bond_ethdev_primary_set(internals,
2576                                                 internals->active_slaves[0]);
2577                         else
2578                                 internals->current_primary_port = internals->primary_port;
2579                 }
2580         }
2581
2582         /**
2583          * Update bonded device link properties after any change to active
2584          * slaves
2585          */
2586         bond_ethdev_link_update(bonded_eth_dev, 0);
2587
2588         if (lsc_flag) {
2589                 /* Cancel any possible outstanding interrupts if delays are enabled */
2590                 if (internals->link_up_delay_ms > 0 ||
2591                         internals->link_down_delay_ms > 0)
2592                         rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2593                                         bonded_eth_dev);
2594
2595                 if (bonded_eth_dev->data->dev_link.link_status) {
2596                         if (internals->link_up_delay_ms > 0)
2597                                 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2598                                                 bond_ethdev_delayed_lsc_propagation,
2599                                                 (void *)bonded_eth_dev);
2600                         else
2601                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2602                                                 RTE_ETH_EVENT_INTR_LSC,
2603                                                 NULL, NULL);
2604
2605                 } else {
2606                         if (internals->link_down_delay_ms > 0)
2607                                 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2608                                                 bond_ethdev_delayed_lsc_propagation,
2609                                                 (void *)bonded_eth_dev);
2610                         else
2611                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2612                                                 RTE_ETH_EVENT_INTR_LSC,
2613                                                 NULL, NULL);
2614                 }
2615         }
2616
2617         rte_spinlock_unlock(&internals->lsc_lock);
2618
2619         return 0;
2620 }
2621
2622 static int
2623 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2624                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2625 {
2626         unsigned i, j;
2627         int result = 0;
2628         int slave_reta_size;
2629         unsigned reta_count;
2630         struct bond_dev_private *internals = dev->data->dev_private;
2631
2632         if (reta_size != internals->reta_size)
2633                 return -EINVAL;
2634
2635          /* Copy RETA table */
2636         reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2637
2638         for (i = 0; i < reta_count; i++) {
2639                 internals->reta_conf[i].mask = reta_conf[i].mask;
2640                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2641                         if ((reta_conf[i].mask >> j) & 0x01)
2642                                 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2643         }
2644
2645         /* Fill rest of array */
2646         for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2647                 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2648                                 sizeof(internals->reta_conf[0]) * reta_count);
2649
2650         /* Propagate RETA over slaves */
2651         for (i = 0; i < internals->slave_count; i++) {
2652                 slave_reta_size = internals->slaves[i].reta_size;
2653                 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2654                                 &internals->reta_conf[0], slave_reta_size);
2655                 if (result < 0)
2656                         return result;
2657         }
2658
2659         return 0;
2660 }
2661
2662 static int
2663 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2664                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2665 {
2666         int i, j;
2667         struct bond_dev_private *internals = dev->data->dev_private;
2668
2669         if (reta_size != internals->reta_size)
2670                 return -EINVAL;
2671
2672          /* Copy RETA table */
2673         for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2674                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2675                         if ((reta_conf[i].mask >> j) & 0x01)
2676                                 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2677
2678         return 0;
2679 }
2680
2681 static int
2682 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2683                 struct rte_eth_rss_conf *rss_conf)
2684 {
2685         int i, result = 0;
2686         struct bond_dev_private *internals = dev->data->dev_private;
2687         struct rte_eth_rss_conf bond_rss_conf;
2688
2689         memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2690
2691         bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2692
2693         if (bond_rss_conf.rss_hf != 0)
2694                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2695
2696         if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2697                         sizeof(internals->rss_key)) {
2698                 if (bond_rss_conf.rss_key_len == 0)
2699                         bond_rss_conf.rss_key_len = 40;
2700                 internals->rss_key_len = bond_rss_conf.rss_key_len;
2701                 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2702                                 internals->rss_key_len);
2703         }
2704
2705         for (i = 0; i < internals->slave_count; i++) {
2706                 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2707                                 &bond_rss_conf);
2708                 if (result < 0)
2709                         return result;
2710         }
2711
2712         return 0;
2713 }
2714
2715 static int
2716 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2717                 struct rte_eth_rss_conf *rss_conf)
2718 {
2719         struct bond_dev_private *internals = dev->data->dev_private;
2720
2721         rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2722         rss_conf->rss_key_len = internals->rss_key_len;
2723         if (rss_conf->rss_key)
2724                 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2725
2726         return 0;
2727 }
2728
2729 const struct eth_dev_ops default_dev_ops = {
2730         .dev_start            = bond_ethdev_start,
2731         .dev_stop             = bond_ethdev_stop,
2732         .dev_close            = bond_ethdev_close,
2733         .dev_configure        = bond_ethdev_configure,
2734         .dev_infos_get        = bond_ethdev_info,
2735         .vlan_filter_set      = bond_ethdev_vlan_filter_set,
2736         .rx_queue_setup       = bond_ethdev_rx_queue_setup,
2737         .tx_queue_setup       = bond_ethdev_tx_queue_setup,
2738         .rx_queue_release     = bond_ethdev_rx_queue_release,
2739         .tx_queue_release     = bond_ethdev_tx_queue_release,
2740         .link_update          = bond_ethdev_link_update,
2741         .stats_get            = bond_ethdev_stats_get,
2742         .stats_reset          = bond_ethdev_stats_reset,
2743         .promiscuous_enable   = bond_ethdev_promiscuous_enable,
2744         .promiscuous_disable  = bond_ethdev_promiscuous_disable,
2745         .reta_update          = bond_ethdev_rss_reta_update,
2746         .reta_query           = bond_ethdev_rss_reta_query,
2747         .rss_hash_update      = bond_ethdev_rss_hash_update,
2748         .rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get
2749 };
2750
2751 static int
2752 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
2753 {
2754         const char *name = rte_vdev_device_name(dev);
2755         uint8_t socket_id = dev->device.numa_node;
2756         struct bond_dev_private *internals = NULL;
2757         struct rte_eth_dev *eth_dev = NULL;
2758         uint32_t vlan_filter_bmp_size;
2759
2760         /* now do all data allocation - for eth_dev structure, dummy pci driver
2761          * and internal (private) data
2762          */
2763
2764         /* reserve an ethdev entry */
2765         eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
2766         if (eth_dev == NULL) {
2767                 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
2768                 goto err;
2769         }
2770
2771         internals = eth_dev->data->dev_private;
2772         eth_dev->data->nb_rx_queues = (uint16_t)1;
2773         eth_dev->data->nb_tx_queues = (uint16_t)1;
2774
2775         eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN, 0,
2776                         socket_id);
2777         if (eth_dev->data->mac_addrs == NULL) {
2778                 RTE_BOND_LOG(ERR, "Unable to malloc mac_addrs");
2779                 goto err;
2780         }
2781
2782         eth_dev->dev_ops = &default_dev_ops;
2783         eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC;
2784
2785         rte_spinlock_init(&internals->lock);
2786         rte_spinlock_init(&internals->lsc_lock);
2787
2788         internals->port_id = eth_dev->data->port_id;
2789         internals->mode = BONDING_MODE_INVALID;
2790         internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
2791         internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
2792         internals->xmit_hash = xmit_l2_hash;
2793         internals->user_defined_mac = 0;
2794
2795         internals->link_status_polling_enabled = 0;
2796
2797         internals->link_status_polling_interval_ms =
2798                 DEFAULT_POLLING_INTERVAL_10_MS;
2799         internals->link_down_delay_ms = 0;
2800         internals->link_up_delay_ms = 0;
2801
2802         internals->slave_count = 0;
2803         internals->active_slave_count = 0;
2804         internals->rx_offload_capa = 0;
2805         internals->tx_offload_capa = 0;
2806         internals->candidate_max_rx_pktlen = 0;
2807         internals->max_rx_pktlen = 0;
2808
2809         /* Initially allow to choose any offload type */
2810         internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
2811
2812         memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
2813         memset(internals->slaves, 0, sizeof(internals->slaves));
2814
2815         /* Set mode 4 default configuration */
2816         bond_mode_8023ad_setup(eth_dev, NULL);
2817         if (bond_ethdev_mode_set(eth_dev, mode)) {
2818                 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode too %d",
2819                                  eth_dev->data->port_id, mode);
2820                 goto err;
2821         }
2822
2823         vlan_filter_bmp_size =
2824                 rte_bitmap_get_memory_footprint(ETHER_MAX_VLAN_ID + 1);
2825         internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
2826                                                    RTE_CACHE_LINE_SIZE);
2827         if (internals->vlan_filter_bmpmem == NULL) {
2828                 RTE_BOND_LOG(ERR,
2829                              "Failed to allocate vlan bitmap for bonded device %u\n",
2830                              eth_dev->data->port_id);
2831                 goto err;
2832         }
2833
2834         internals->vlan_filter_bmp = rte_bitmap_init(ETHER_MAX_VLAN_ID + 1,
2835                         internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
2836         if (internals->vlan_filter_bmp == NULL) {
2837                 RTE_BOND_LOG(ERR,
2838                              "Failed to init vlan bitmap for bonded device %u\n",
2839                              eth_dev->data->port_id);
2840                 rte_free(internals->vlan_filter_bmpmem);
2841                 goto err;
2842         }
2843
2844         return eth_dev->data->port_id;
2845
2846 err:
2847         rte_free(internals);
2848         if (eth_dev != NULL) {
2849                 rte_free(eth_dev->data->mac_addrs);
2850                 rte_eth_dev_release_port(eth_dev);
2851         }
2852         return -1;
2853 }
2854
2855 static int
2856 bond_probe(struct rte_vdev_device *dev)
2857 {
2858         const char *name;
2859         struct bond_dev_private *internals;
2860         struct rte_kvargs *kvlist;
2861         uint8_t bonding_mode, socket_id/*, agg_mode*/;
2862         int  arg_count, port_id;
2863         uint8_t agg_mode;
2864
2865         if (!dev)
2866                 return -EINVAL;
2867
2868         name = rte_vdev_device_name(dev);
2869         RTE_LOG(INFO, EAL, "Initializing pmd_bond for %s\n", name);
2870
2871         kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
2872                 pmd_bond_init_valid_arguments);
2873         if (kvlist == NULL)
2874                 return -1;
2875
2876         /* Parse link bonding mode */
2877         if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
2878                 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
2879                                 &bond_ethdev_parse_slave_mode_kvarg,
2880                                 &bonding_mode) != 0) {
2881                         RTE_LOG(ERR, EAL, "Invalid mode for bonded device %s\n",
2882                                         name);
2883                         goto parse_error;
2884                 }
2885         } else {
2886                 RTE_LOG(ERR, EAL, "Mode must be specified only once for bonded "
2887                                 "device %s\n", name);
2888                 goto parse_error;
2889         }
2890
2891         /* Parse socket id to create bonding device on */
2892         arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
2893         if (arg_count == 1) {
2894                 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
2895                                 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
2896                                 != 0) {
2897                         RTE_LOG(ERR, EAL, "Invalid socket Id specified for "
2898                                         "bonded device %s\n", name);
2899                         goto parse_error;
2900                 }
2901         } else if (arg_count > 1) {
2902                 RTE_LOG(ERR, EAL, "Socket Id can be specified only once for "
2903                                 "bonded device %s\n", name);
2904                 goto parse_error;
2905         } else {
2906                 socket_id = rte_socket_id();
2907         }
2908
2909         dev->device.numa_node = socket_id;
2910
2911         /* Create link bonding eth device */
2912         port_id = bond_alloc(dev, bonding_mode);
2913         if (port_id < 0) {
2914                 RTE_LOG(ERR, EAL, "Failed to create socket %s in mode %u on "
2915                                 "socket %u.\n", name, bonding_mode, socket_id);
2916                 goto parse_error;
2917         }
2918         internals = rte_eth_devices[port_id].data->dev_private;
2919         internals->kvlist = kvlist;
2920
2921
2922         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
2923                 if (rte_kvargs_process(kvlist,
2924                                 PMD_BOND_AGG_MODE_KVARG,
2925                                 &bond_ethdev_parse_slave_agg_mode_kvarg,
2926                                 &agg_mode) != 0) {
2927                         RTE_LOG(ERR, EAL,
2928                                         "Failed to parse agg selection mode for bonded device %s\n",
2929                                         name);
2930                         goto parse_error;
2931                 }
2932
2933                 if (internals->mode == BONDING_MODE_8023AD)
2934                         rte_eth_bond_8023ad_agg_selection_set(port_id,
2935                                         agg_mode);
2936         } else {
2937                 rte_eth_bond_8023ad_agg_selection_set(port_id, AGG_STABLE);
2938         }
2939
2940         RTE_LOG(INFO, EAL, "Create bonded device %s on port %d in mode %u on "
2941                         "socket %u.\n", name, port_id, bonding_mode, socket_id);
2942         return 0;
2943
2944 parse_error:
2945         rte_kvargs_free(kvlist);
2946
2947         return -1;
2948 }
2949
2950 static int
2951 bond_remove(struct rte_vdev_device *dev)
2952 {
2953         struct rte_eth_dev *eth_dev;
2954         struct bond_dev_private *internals;
2955         const char *name;
2956
2957         if (!dev)
2958                 return -EINVAL;
2959
2960         name = rte_vdev_device_name(dev);
2961         RTE_LOG(INFO, EAL, "Uninitializing pmd_bond for %s\n", name);
2962
2963         /* now free all data allocation - for eth_dev structure,
2964          * dummy pci driver and internal (private) data
2965          */
2966
2967         /* find an ethdev entry */
2968         eth_dev = rte_eth_dev_allocated(name);
2969         if (eth_dev == NULL)
2970                 return -ENODEV;
2971
2972         RTE_ASSERT(eth_dev->device == &dev->device);
2973
2974         internals = eth_dev->data->dev_private;
2975         if (internals->slave_count != 0)
2976                 return -EBUSY;
2977
2978         if (eth_dev->data->dev_started == 1) {
2979                 bond_ethdev_stop(eth_dev);
2980                 bond_ethdev_close(eth_dev);
2981         }
2982
2983         eth_dev->dev_ops = NULL;
2984         eth_dev->rx_pkt_burst = NULL;
2985         eth_dev->tx_pkt_burst = NULL;
2986
2987         internals = eth_dev->data->dev_private;
2988         /* Try to release mempool used in mode6. If the bond
2989          * device is not mode6, free the NULL is not problem.
2990          */
2991         rte_mempool_free(internals->mode6.mempool);
2992         rte_bitmap_free(internals->vlan_filter_bmp);
2993         rte_free(internals->vlan_filter_bmpmem);
2994         rte_free(eth_dev->data->dev_private);
2995         rte_free(eth_dev->data->mac_addrs);
2996
2997         rte_eth_dev_release_port(eth_dev);
2998
2999         return 0;
3000 }
3001
3002 /* this part will resolve the slave portids after all the other pdev and vdev
3003  * have been allocated */
3004 static int
3005 bond_ethdev_configure(struct rte_eth_dev *dev)
3006 {
3007         const char *name = dev->device->name;
3008         struct bond_dev_private *internals = dev->data->dev_private;
3009         struct rte_kvargs *kvlist = internals->kvlist;
3010         int arg_count;
3011         uint16_t port_id = dev - rte_eth_devices;
3012         uint8_t agg_mode;
3013
3014         static const uint8_t default_rss_key[40] = {
3015                 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3016                 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3017                 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3018                 0xBE, 0xAC, 0x01, 0xFA
3019         };
3020
3021         unsigned i, j;
3022
3023         /* If RSS is enabled, fill table and key with default values */
3024         if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
3025                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = internals->rss_key;
3026                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = 0;
3027                 memcpy(internals->rss_key, default_rss_key, 40);
3028
3029                 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3030                         internals->reta_conf[i].mask = ~0LL;
3031                         for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3032                                 internals->reta_conf[i].reta[j] = j % dev->data->nb_rx_queues;
3033                 }
3034         }
3035
3036         /* set the max_rx_pktlen */
3037         internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3038
3039         /*
3040          * if no kvlist, it means that this bonded device has been created
3041          * through the bonding api.
3042          */
3043         if (!kvlist)
3044                 return 0;
3045
3046         /* Parse MAC address for bonded device */
3047         arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3048         if (arg_count == 1) {
3049                 struct ether_addr bond_mac;
3050
3051                 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3052                                 &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3053                         RTE_LOG(INFO, EAL, "Invalid mac address for bonded device %s\n",
3054                                         name);
3055                         return -1;
3056                 }
3057
3058                 /* Set MAC address */
3059                 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3060                         RTE_LOG(ERR, EAL,
3061                                         "Failed to set mac address on bonded device %s\n",
3062                                         name);
3063                         return -1;
3064                 }
3065         } else if (arg_count > 1) {
3066                 RTE_LOG(ERR, EAL,
3067                                 "MAC address can be specified only once for bonded device %s\n",
3068                                 name);
3069                 return -1;
3070         }
3071
3072         /* Parse/set balance mode transmit policy */
3073         arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3074         if (arg_count == 1) {
3075                 uint8_t xmit_policy;
3076
3077                 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3078                                 &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3079                                                 0) {
3080                         RTE_LOG(INFO, EAL,
3081                                         "Invalid xmit policy specified for bonded device %s\n",
3082                                         name);
3083                         return -1;
3084                 }
3085
3086                 /* Set balance mode transmit policy*/
3087                 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3088                         RTE_LOG(ERR, EAL,
3089                                         "Failed to set balance xmit policy on bonded device %s\n",
3090                                         name);
3091                         return -1;
3092                 }
3093         } else if (arg_count > 1) {
3094                 RTE_LOG(ERR, EAL,
3095                                 "Transmit policy can be specified only once for bonded device"
3096                                 " %s\n", name);
3097                 return -1;
3098         }
3099
3100         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3101                 if (rte_kvargs_process(kvlist,
3102                                 PMD_BOND_AGG_MODE_KVARG,
3103                                 &bond_ethdev_parse_slave_agg_mode_kvarg,
3104                                 &agg_mode) != 0) {
3105                         RTE_LOG(ERR, EAL,
3106                                         "Failed to parse agg selection mode for bonded device %s\n",
3107                                         name);
3108                 }
3109                 if (internals->mode == BONDING_MODE_8023AD)
3110                                 rte_eth_bond_8023ad_agg_selection_set(port_id,
3111                                                 agg_mode);
3112         }
3113
3114         /* Parse/add slave ports to bonded device */
3115         if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3116                 struct bond_ethdev_slave_ports slave_ports;
3117                 unsigned i;
3118
3119                 memset(&slave_ports, 0, sizeof(slave_ports));
3120
3121                 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3122                                 &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3123                         RTE_LOG(ERR, EAL,
3124                                         "Failed to parse slave ports for bonded device %s\n",
3125                                         name);
3126                         return -1;
3127                 }
3128
3129                 for (i = 0; i < slave_ports.slave_count; i++) {
3130                         if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3131                                 RTE_LOG(ERR, EAL,
3132                                                 "Failed to add port %d as slave to bonded device %s\n",
3133                                                 slave_ports.slaves[i], name);
3134                         }
3135                 }
3136
3137         } else {
3138                 RTE_LOG(INFO, EAL, "No slaves specified for bonded device %s\n", name);
3139                 return -1;
3140         }
3141
3142         /* Parse/set primary slave port id*/
3143         arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3144         if (arg_count == 1) {
3145                 uint16_t primary_slave_port_id;
3146
3147                 if (rte_kvargs_process(kvlist,
3148                                 PMD_BOND_PRIMARY_SLAVE_KVARG,
3149                                 &bond_ethdev_parse_primary_slave_port_id_kvarg,
3150                                 &primary_slave_port_id) < 0) {
3151                         RTE_LOG(INFO, EAL,
3152                                         "Invalid primary slave port id specified for bonded device"
3153                                         " %s\n", name);
3154                         return -1;
3155                 }
3156
3157                 /* Set balance mode transmit policy*/
3158                 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3159                                 != 0) {
3160                         RTE_LOG(ERR, EAL,
3161                                         "Failed to set primary slave port %d on bonded device %s\n",
3162                                         primary_slave_port_id, name);
3163                         return -1;
3164                 }
3165         } else if (arg_count > 1) {
3166                 RTE_LOG(INFO, EAL,
3167                                 "Primary slave can be specified only once for bonded device"
3168                                 " %s\n", name);
3169                 return -1;
3170         }
3171
3172         /* Parse link status monitor polling interval */
3173         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3174         if (arg_count == 1) {
3175                 uint32_t lsc_poll_interval_ms;
3176
3177                 if (rte_kvargs_process(kvlist,
3178                                 PMD_BOND_LSC_POLL_PERIOD_KVARG,
3179                                 &bond_ethdev_parse_time_ms_kvarg,
3180                                 &lsc_poll_interval_ms) < 0) {
3181                         RTE_LOG(INFO, EAL,
3182                                         "Invalid lsc polling interval value specified for bonded"
3183                                         " device %s\n", name);
3184                         return -1;
3185                 }
3186
3187                 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3188                                 != 0) {
3189                         RTE_LOG(ERR, EAL,
3190                                         "Failed to set lsc monitor polling interval (%u ms) on"
3191                                         " bonded device %s\n", lsc_poll_interval_ms, name);
3192                         return -1;
3193                 }
3194         } else if (arg_count > 1) {
3195                 RTE_LOG(INFO, EAL,
3196                                 "LSC polling interval can be specified only once for bonded"
3197                                 " device %s\n", name);
3198                 return -1;
3199         }
3200
3201         /* Parse link up interrupt propagation delay */
3202         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3203         if (arg_count == 1) {
3204                 uint32_t link_up_delay_ms;
3205
3206                 if (rte_kvargs_process(kvlist,
3207                                 PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3208                                 &bond_ethdev_parse_time_ms_kvarg,
3209                                 &link_up_delay_ms) < 0) {
3210                         RTE_LOG(INFO, EAL,
3211                                         "Invalid link up propagation delay value specified for"
3212                                         " bonded device %s\n", name);
3213                         return -1;
3214                 }
3215
3216                 /* Set balance mode transmit policy*/
3217                 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3218                                 != 0) {
3219                         RTE_LOG(ERR, EAL,
3220                                         "Failed to set link up propagation delay (%u ms) on bonded"
3221                                         " device %s\n", link_up_delay_ms, name);
3222                         return -1;
3223                 }
3224         } else if (arg_count > 1) {
3225                 RTE_LOG(INFO, EAL,
3226                                 "Link up propagation delay can be specified only once for"
3227                                 " bonded device %s\n", name);
3228                 return -1;
3229         }
3230
3231         /* Parse link down interrupt propagation delay */
3232         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3233         if (arg_count == 1) {
3234                 uint32_t link_down_delay_ms;
3235
3236                 if (rte_kvargs_process(kvlist,
3237                                 PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3238                                 &bond_ethdev_parse_time_ms_kvarg,
3239                                 &link_down_delay_ms) < 0) {
3240                         RTE_LOG(INFO, EAL,
3241                                         "Invalid link down propagation delay value specified for"
3242                                         " bonded device %s\n", name);
3243                         return -1;
3244                 }
3245
3246                 /* Set balance mode transmit policy*/
3247                 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3248                                 != 0) {
3249                         RTE_LOG(ERR, EAL,
3250                                         "Failed to set link down propagation delay (%u ms) on"
3251                                         " bonded device %s\n", link_down_delay_ms, name);
3252                         return -1;
3253                 }
3254         } else if (arg_count > 1) {
3255                 RTE_LOG(INFO, EAL,
3256                                 "Link down propagation delay can be specified only once for"
3257                                 " bonded device %s\n", name);
3258                 return -1;
3259         }
3260
3261         return 0;
3262 }
3263
3264 struct rte_vdev_driver pmd_bond_drv = {
3265         .probe = bond_probe,
3266         .remove = bond_remove,
3267 };
3268
3269 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3270 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3271
3272 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3273         "slave=<ifc> "
3274         "primary=<ifc> "
3275         "mode=[0-6] "
3276         "xmit_policy=[l2 | l23 | l34] "
3277         "agg_mode=[count | stable | bandwidth] "
3278         "socket_id=<int> "
3279         "mac=<mac addr> "
3280         "lsc_poll_period_ms=<int> "
3281         "up_delay=<int> "
3282         "down_delay=<int>");