New upstream version 17.11.1
[deb_dpdk.git] / drivers / net / bonding / rte_eth_bond_pmd.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 #include <stdlib.h>
34 #include <netinet/in.h>
35
36 #include <rte_mbuf.h>
37 #include <rte_malloc.h>
38 #include <rte_ethdev.h>
39 #include <rte_ethdev_vdev.h>
40 #include <rte_tcp.h>
41 #include <rte_udp.h>
42 #include <rte_ip.h>
43 #include <rte_ip_frag.h>
44 #include <rte_devargs.h>
45 #include <rte_kvargs.h>
46 #include <rte_bus_vdev.h>
47 #include <rte_alarm.h>
48 #include <rte_cycles.h>
49
50 #include "rte_eth_bond.h"
51 #include "rte_eth_bond_private.h"
52 #include "rte_eth_bond_8023ad_private.h"
53
54 #define REORDER_PERIOD_MS 10
55 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
56
57 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
58
59 /* Table for statistics in mode 5 TLB */
60 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
61
62 static inline size_t
63 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
64 {
65         size_t vlan_offset = 0;
66
67         if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
68                 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
69
70                 vlan_offset = sizeof(struct vlan_hdr);
71                 *proto = vlan_hdr->eth_proto;
72
73                 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
74                         vlan_hdr = vlan_hdr + 1;
75                         *proto = vlan_hdr->eth_proto;
76                         vlan_offset += sizeof(struct vlan_hdr);
77                 }
78         }
79         return vlan_offset;
80 }
81
82 static uint16_t
83 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
84 {
85         struct bond_dev_private *internals;
86
87         uint16_t num_rx_slave = 0;
88         uint16_t num_rx_total = 0;
89
90         int i;
91
92         /* Cast to structure, containing bonded device's port id and queue id */
93         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
94
95         internals = bd_rx_q->dev_private;
96
97
98         for (i = 0; i < internals->active_slave_count && nb_pkts; i++) {
99                 /* Offset of pointer to *bufs increases as packets are received
100                  * from other slaves */
101                 num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
102                                 bd_rx_q->queue_id, bufs + num_rx_total, nb_pkts);
103                 if (num_rx_slave) {
104                         num_rx_total += num_rx_slave;
105                         nb_pkts -= num_rx_slave;
106                 }
107         }
108
109         return num_rx_total;
110 }
111
112 static uint16_t
113 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
114                 uint16_t nb_pkts)
115 {
116         struct bond_dev_private *internals;
117
118         /* Cast to structure, containing bonded device's port id and queue id */
119         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
120
121         internals = bd_rx_q->dev_private;
122
123         return rte_eth_rx_burst(internals->current_primary_port,
124                         bd_rx_q->queue_id, bufs, nb_pkts);
125 }
126
127 static inline uint8_t
128 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
129 {
130         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
131
132         return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
133                 (ethertype == ether_type_slow_be &&
134                 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
135 }
136
137 /*****************************************************************************
138  * Flow director's setup for mode 4 optimization
139  */
140
141 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
142         .dst.addr_bytes = { 0 },
143         .src.addr_bytes = { 0 },
144         .type = RTE_BE16(ETHER_TYPE_SLOW),
145 };
146
147 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
148         .dst.addr_bytes = { 0 },
149         .src.addr_bytes = { 0 },
150         .type = 0xFFFF,
151 };
152
153 static struct rte_flow_item flow_item_8023ad[] = {
154         {
155                 .type = RTE_FLOW_ITEM_TYPE_ETH,
156                 .spec = &flow_item_eth_type_8023ad,
157                 .last = NULL,
158                 .mask = &flow_item_eth_mask_type_8023ad,
159         },
160         {
161                 .type = RTE_FLOW_ITEM_TYPE_END,
162                 .spec = NULL,
163                 .last = NULL,
164                 .mask = NULL,
165         }
166 };
167
168 const struct rte_flow_attr flow_attr_8023ad = {
169         .group = 0,
170         .priority = 0,
171         .ingress = 1,
172         .egress = 0,
173         .reserved = 0,
174 };
175
176 int
177 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
178                 uint16_t slave_port) {
179         struct rte_eth_dev_info slave_info;
180         struct rte_flow_error error;
181         struct bond_dev_private *internals = (struct bond_dev_private *)
182                         (bond_dev->data->dev_private);
183
184         const struct rte_flow_action_queue lacp_queue_conf = {
185                 .index = 0,
186         };
187
188         const struct rte_flow_action actions[] = {
189                 {
190                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
191                         .conf = &lacp_queue_conf
192                 },
193                 {
194                         .type = RTE_FLOW_ACTION_TYPE_END,
195                 }
196         };
197
198         int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
199                         flow_item_8023ad, actions, &error);
200         if (ret < 0) {
201                 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
202                                 __func__, error.message, slave_port,
203                                 internals->mode4.dedicated_queues.rx_qid);
204                 return -1;
205         }
206
207         rte_eth_dev_info_get(slave_port, &slave_info);
208         if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
209                         slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
210                 RTE_BOND_LOG(ERR,
211                         "%s: Slave %d capabilities doesn't allow to allocate additional queues",
212                         __func__, slave_port);
213                 return -1;
214         }
215
216         return 0;
217 }
218
219 int
220 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
221         struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
222         struct bond_dev_private *internals = (struct bond_dev_private *)
223                         (bond_dev->data->dev_private);
224         struct rte_eth_dev_info bond_info;
225         uint16_t idx;
226
227         /* Verify if all slaves in bonding supports flow director and */
228         if (internals->slave_count > 0) {
229                 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
230
231                 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
232                 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
233
234                 for (idx = 0; idx < internals->slave_count; idx++) {
235                         if (bond_ethdev_8023ad_flow_verify(bond_dev,
236                                         internals->slaves[idx].port_id) != 0)
237                                 return -1;
238                 }
239         }
240
241         return 0;
242 }
243
244 int
245 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
246
247         struct rte_flow_error error;
248         struct bond_dev_private *internals = (struct bond_dev_private *)
249                         (bond_dev->data->dev_private);
250
251         struct rte_flow_action_queue lacp_queue_conf = {
252                 .index = internals->mode4.dedicated_queues.rx_qid,
253         };
254
255         const struct rte_flow_action actions[] = {
256                 {
257                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
258                         .conf = &lacp_queue_conf
259                 },
260                 {
261                         .type = RTE_FLOW_ACTION_TYPE_END,
262                 }
263         };
264
265         internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
266                         &flow_attr_8023ad, flow_item_8023ad, actions, &error);
267         if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
268                 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
269                                 "(slave_port=%d queue_id=%d)",
270                                 error.message, slave_port,
271                                 internals->mode4.dedicated_queues.rx_qid);
272                 return -1;
273         }
274
275         return 0;
276 }
277
278 static uint16_t
279 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
280                 uint16_t nb_pkts)
281 {
282         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
283         struct bond_dev_private *internals = bd_rx_q->dev_private;
284         uint16_t num_rx_total = 0;      /* Total number of received packets */
285         uint16_t slaves[RTE_MAX_ETHPORTS];
286         uint16_t slave_count;
287
288         uint16_t i, idx;
289
290         /* Copy slave list to protect against slave up/down changes during tx
291          * bursting */
292         slave_count = internals->active_slave_count;
293         memcpy(slaves, internals->active_slaves,
294                         sizeof(internals->active_slaves[0]) * slave_count);
295
296         for (i = 0, idx = internals->active_slave;
297                         i < slave_count && num_rx_total < nb_pkts; i++, idx++) {
298                 idx = idx % slave_count;
299
300                 /* Read packets from this slave */
301                 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
302                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
303         }
304
305         internals->active_slave = idx;
306
307         return num_rx_total;
308 }
309
310 static uint16_t
311 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
312                 uint16_t nb_pkts)
313 {
314         struct bond_dev_private *internals;
315         struct bond_tx_queue *bd_tx_q;
316
317         uint16_t num_of_slaves;
318         uint16_t slaves[RTE_MAX_ETHPORTS];
319          /* positions in slaves, not ID */
320         uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
321         uint8_t distributing_count;
322
323         uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
324         uint16_t i, op_slave_idx;
325
326         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
327
328         /* Total amount of packets in slave_bufs */
329         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
330         /* Slow packets placed in each slave */
331
332         if (unlikely(nb_pkts == 0))
333                 return 0;
334
335         bd_tx_q = (struct bond_tx_queue *)queue;
336         internals = bd_tx_q->dev_private;
337
338         /* Copy slave list to protect against slave up/down changes during tx
339          * bursting */
340         num_of_slaves = internals->active_slave_count;
341         if (num_of_slaves < 1)
342                 return num_tx_total;
343
344         memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) *
345                         num_of_slaves);
346
347         distributing_count = 0;
348         for (i = 0; i < num_of_slaves; i++) {
349                 struct port *port = &mode_8023ad_ports[slaves[i]];
350                 if (ACTOR_STATE(port, DISTRIBUTING))
351                         distributing_offsets[distributing_count++] = i;
352         }
353
354         if (likely(distributing_count > 0)) {
355                 /* Populate slaves mbuf with the packets which are to be sent */
356                 for (i = 0; i < nb_pkts; i++) {
357                         /* Select output slave using hash based on xmit policy */
358                         op_slave_idx = internals->xmit_hash(bufs[i],
359                                         distributing_count);
360
361                         /* Populate slave mbuf arrays with mbufs for that slave.
362                          * Use only slaves that are currently distributing.
363                          */
364                         uint8_t slave_offset =
365                                         distributing_offsets[op_slave_idx];
366                         slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] =
367                                         bufs[i];
368                         slave_nb_pkts[slave_offset]++;
369                 }
370         }
371
372         /* Send packet burst on each slave device */
373         for (i = 0; i < num_of_slaves; i++) {
374                 if (slave_nb_pkts[i] == 0)
375                         continue;
376
377                 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
378                                 slave_bufs[i], slave_nb_pkts[i]);
379
380                 num_tx_total += num_tx_slave;
381                 num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
382
383                 /* If tx burst fails move packets to end of bufs */
384                 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
385                         uint16_t j = nb_pkts - num_tx_fail_total;
386                         for ( ; num_tx_slave < slave_nb_pkts[i]; j++,
387                                         num_tx_slave++)
388                                 bufs[j] = slave_bufs[i][num_tx_slave];
389                 }
390         }
391
392         return num_tx_total;
393 }
394
395
396 static uint16_t
397 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
398                 uint16_t nb_pkts)
399 {
400         /* Cast to structure, containing bonded device's port id and queue id */
401         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
402         struct bond_dev_private *internals = bd_rx_q->dev_private;
403         struct ether_addr bond_mac;
404
405         struct ether_hdr *hdr;
406
407         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
408         uint16_t num_rx_total = 0;      /* Total number of received packets */
409         uint16_t slaves[RTE_MAX_ETHPORTS];
410         uint16_t slave_count, idx;
411
412         uint8_t collecting;  /* current slave collecting status */
413         const uint8_t promisc = internals->promiscuous_en;
414         uint8_t i, j, k;
415         uint8_t subtype;
416
417         rte_eth_macaddr_get(internals->port_id, &bond_mac);
418         /* Copy slave list to protect against slave up/down changes during tx
419          * bursting */
420         slave_count = internals->active_slave_count;
421         memcpy(slaves, internals->active_slaves,
422                         sizeof(internals->active_slaves[0]) * slave_count);
423
424         idx = internals->active_slave;
425         if (idx >= slave_count) {
426                 internals->active_slave = 0;
427                 idx = 0;
428         }
429         for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
430                 j = num_rx_total;
431                 collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[idx]],
432                                          COLLECTING);
433
434                 /* Read packets from this slave */
435                 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
436                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
437
438                 for (k = j; k < 2 && k < num_rx_total; k++)
439                         rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
440
441                 /* Handle slow protocol packets. */
442                 while (j < num_rx_total) {
443
444                         /* If packet is not pure L2 and is known, skip it */
445                         if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) {
446                                 j++;
447                                 continue;
448                         }
449
450                         if (j + 3 < num_rx_total)
451                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
452
453                         hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
454                         subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
455
456                         /* Remove packet from array if it is slow packet or slave is not
457                          * in collecting state or bonding interface is not in promiscuous
458                          * mode and packet address does not match. */
459                         if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]) ||
460                                 !collecting || (!promisc &&
461                                         !is_multicast_ether_addr(&hdr->d_addr) &&
462                                         !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
463
464                                 if (hdr->ether_type == ether_type_slow_be) {
465                                         bond_mode_8023ad_handle_slow_pkt(
466                                             internals, slaves[idx], bufs[j]);
467                                 } else
468                                         rte_pktmbuf_free(bufs[j]);
469
470                                 /* Packet is managed by mode 4 or dropped, shift the array */
471                                 num_rx_total--;
472                                 if (j < num_rx_total) {
473                                         memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
474                                                 (num_rx_total - j));
475                                 }
476                         } else
477                                 j++;
478                 }
479                 if (unlikely(++idx == slave_count))
480                         idx = 0;
481         }
482
483         internals->active_slave = idx;
484         return num_rx_total;
485 }
486
487 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
488 uint32_t burstnumberRX;
489 uint32_t burstnumberTX;
490
491 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
492
493 static void
494 arp_op_name(uint16_t arp_op, char *buf)
495 {
496         switch (arp_op) {
497         case ARP_OP_REQUEST:
498                 snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
499                 return;
500         case ARP_OP_REPLY:
501                 snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
502                 return;
503         case ARP_OP_REVREQUEST:
504                 snprintf(buf, sizeof("Reverse ARP Request"), "%s",
505                                 "Reverse ARP Request");
506                 return;
507         case ARP_OP_REVREPLY:
508                 snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
509                                 "Reverse ARP Reply");
510                 return;
511         case ARP_OP_INVREQUEST:
512                 snprintf(buf, sizeof("Peer Identify Request"), "%s",
513                                 "Peer Identify Request");
514                 return;
515         case ARP_OP_INVREPLY:
516                 snprintf(buf, sizeof("Peer Identify Reply"), "%s",
517                                 "Peer Identify Reply");
518                 return;
519         default:
520                 break;
521         }
522         snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
523         return;
524 }
525 #endif
526 #define MaxIPv4String   16
527 static void
528 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
529 {
530         uint32_t ipv4_addr;
531
532         ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
533         snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
534                 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
535                 ipv4_addr & 0xFF);
536 }
537
538 #define MAX_CLIENTS_NUMBER      128
539 uint8_t active_clients;
540 struct client_stats_t {
541         uint16_t port;
542         uint32_t ipv4_addr;
543         uint32_t ipv4_rx_packets;
544         uint32_t ipv4_tx_packets;
545 };
546 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
547
548 static void
549 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
550 {
551         int i = 0;
552
553         for (; i < MAX_CLIENTS_NUMBER; i++)     {
554                 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port))      {
555                         /* Just update RX packets number for this client */
556                         if (TXorRXindicator == &burstnumberRX)
557                                 client_stats[i].ipv4_rx_packets++;
558                         else
559                                 client_stats[i].ipv4_tx_packets++;
560                         return;
561                 }
562         }
563         /* We have a new client. Insert him to the table, and increment stats */
564         if (TXorRXindicator == &burstnumberRX)
565                 client_stats[active_clients].ipv4_rx_packets++;
566         else
567                 client_stats[active_clients].ipv4_tx_packets++;
568         client_stats[active_clients].ipv4_addr = addr;
569         client_stats[active_clients].port = port;
570         active_clients++;
571
572 }
573
574 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
575 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber)     \
576                 RTE_LOG(DEBUG, PMD, \
577                 "%s " \
578                 "port:%d " \
579                 "SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
580                 "SrcIP:%s " \
581                 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
582                 "DstIP:%s " \
583                 "%s " \
584                 "%d\n", \
585                 info, \
586                 port, \
587                 eth_h->s_addr.addr_bytes[0], \
588                 eth_h->s_addr.addr_bytes[1], \
589                 eth_h->s_addr.addr_bytes[2], \
590                 eth_h->s_addr.addr_bytes[3], \
591                 eth_h->s_addr.addr_bytes[4], \
592                 eth_h->s_addr.addr_bytes[5], \
593                 src_ip, \
594                 eth_h->d_addr.addr_bytes[0], \
595                 eth_h->d_addr.addr_bytes[1], \
596                 eth_h->d_addr.addr_bytes[2], \
597                 eth_h->d_addr.addr_bytes[3], \
598                 eth_h->d_addr.addr_bytes[4], \
599                 eth_h->d_addr.addr_bytes[5], \
600                 dst_ip, \
601                 arp_op, \
602                 ++burstnumber)
603 #endif
604
605 static void
606 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
607                 uint16_t port, uint32_t __attribute__((unused)) *burstnumber)
608 {
609         struct ipv4_hdr *ipv4_h;
610 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
611         struct arp_hdr *arp_h;
612         char dst_ip[16];
613         char ArpOp[24];
614         char buf[16];
615 #endif
616         char src_ip[16];
617
618         uint16_t ether_type = eth_h->ether_type;
619         uint16_t offset = get_vlan_offset(eth_h, &ether_type);
620
621 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
622         snprintf(buf, 16, "%s", info);
623 #endif
624
625         if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
626                 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
627                 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
628 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
629                 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
630                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
631 #endif
632                 update_client_stats(ipv4_h->src_addr, port, burstnumber);
633         }
634 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
635         else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
636                 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
637                 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
638                 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
639                 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
640                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
641         }
642 #endif
643 }
644 #endif
645
646 static uint16_t
647 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
648 {
649         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
650         struct bond_dev_private *internals = bd_tx_q->dev_private;
651         struct ether_hdr *eth_h;
652         uint16_t ether_type, offset;
653         uint16_t nb_recv_pkts;
654         int i;
655
656         nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
657
658         for (i = 0; i < nb_recv_pkts; i++) {
659                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
660                 ether_type = eth_h->ether_type;
661                 offset = get_vlan_offset(eth_h, &ether_type);
662
663                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
664 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
665                         mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
666 #endif
667                         bond_mode_alb_arp_recv(eth_h, offset, internals);
668                 }
669 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
670                 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
671                         mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
672 #endif
673         }
674
675         return nb_recv_pkts;
676 }
677
678 static uint16_t
679 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
680                 uint16_t nb_pkts)
681 {
682         struct bond_dev_private *internals;
683         struct bond_tx_queue *bd_tx_q;
684
685         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
686         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
687
688         uint16_t num_of_slaves;
689         uint16_t slaves[RTE_MAX_ETHPORTS];
690
691         uint16_t num_tx_total = 0, num_tx_slave;
692
693         static int slave_idx = 0;
694         int i, cslave_idx = 0, tx_fail_total = 0;
695
696         bd_tx_q = (struct bond_tx_queue *)queue;
697         internals = bd_tx_q->dev_private;
698
699         /* Copy slave list to protect against slave up/down changes during tx
700          * bursting */
701         num_of_slaves = internals->active_slave_count;
702         memcpy(slaves, internals->active_slaves,
703                         sizeof(internals->active_slaves[0]) * num_of_slaves);
704
705         if (num_of_slaves < 1)
706                 return num_tx_total;
707
708         /* Populate slaves mbuf with which packets are to be sent on it  */
709         for (i = 0; i < nb_pkts; i++) {
710                 cslave_idx = (slave_idx + i) % num_of_slaves;
711                 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
712         }
713
714         /* increment current slave index so the next call to tx burst starts on the
715          * next slave */
716         slave_idx = ++cslave_idx;
717
718         /* Send packet burst on each slave device */
719         for (i = 0; i < num_of_slaves; i++) {
720                 if (slave_nb_pkts[i] > 0) {
721                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
722                                         slave_bufs[i], slave_nb_pkts[i]);
723
724                         /* if tx burst fails move packets to end of bufs */
725                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
726                                 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
727
728                                 tx_fail_total += tx_fail_slave;
729
730                                 memcpy(&bufs[nb_pkts - tx_fail_total],
731                                                 &slave_bufs[i][num_tx_slave],
732                                                 tx_fail_slave * sizeof(bufs[0]));
733                         }
734                         num_tx_total += num_tx_slave;
735                 }
736         }
737
738         return num_tx_total;
739 }
740
741 static uint16_t
742 bond_ethdev_tx_burst_active_backup(void *queue,
743                 struct rte_mbuf **bufs, uint16_t nb_pkts)
744 {
745         struct bond_dev_private *internals;
746         struct bond_tx_queue *bd_tx_q;
747
748         bd_tx_q = (struct bond_tx_queue *)queue;
749         internals = bd_tx_q->dev_private;
750
751         if (internals->active_slave_count < 1)
752                 return 0;
753
754         return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
755                         bufs, nb_pkts);
756 }
757
758 static inline uint16_t
759 ether_hash(struct ether_hdr *eth_hdr)
760 {
761         unaligned_uint16_t *word_src_addr =
762                 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
763         unaligned_uint16_t *word_dst_addr =
764                 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
765
766         return (word_src_addr[0] ^ word_dst_addr[0]) ^
767                         (word_src_addr[1] ^ word_dst_addr[1]) ^
768                         (word_src_addr[2] ^ word_dst_addr[2]);
769 }
770
771 static inline uint32_t
772 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
773 {
774         return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
775 }
776
777 static inline uint32_t
778 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
779 {
780         unaligned_uint32_t *word_src_addr =
781                 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
782         unaligned_uint32_t *word_dst_addr =
783                 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
784
785         return (word_src_addr[0] ^ word_dst_addr[0]) ^
786                         (word_src_addr[1] ^ word_dst_addr[1]) ^
787                         (word_src_addr[2] ^ word_dst_addr[2]) ^
788                         (word_src_addr[3] ^ word_dst_addr[3]);
789 }
790
791 uint16_t
792 xmit_l2_hash(const struct rte_mbuf *buf, uint8_t slave_count)
793 {
794         struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
795
796         uint32_t hash = ether_hash(eth_hdr);
797
798         return (hash ^= hash >> 8) % slave_count;
799 }
800
801 uint16_t
802 xmit_l23_hash(const struct rte_mbuf *buf, uint8_t slave_count)
803 {
804         struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
805         uint16_t proto = eth_hdr->ether_type;
806         size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
807         uint32_t hash, l3hash = 0;
808
809         hash = ether_hash(eth_hdr);
810
811         if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
812                 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
813                                 ((char *)(eth_hdr + 1) + vlan_offset);
814                 l3hash = ipv4_hash(ipv4_hdr);
815
816         } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
817                 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
818                                 ((char *)(eth_hdr + 1) + vlan_offset);
819                 l3hash = ipv6_hash(ipv6_hdr);
820         }
821
822         hash = hash ^ l3hash;
823         hash ^= hash >> 16;
824         hash ^= hash >> 8;
825
826         return hash % slave_count;
827 }
828
829 uint16_t
830 xmit_l34_hash(const struct rte_mbuf *buf, uint8_t slave_count)
831 {
832         struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
833         uint16_t proto = eth_hdr->ether_type;
834         size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
835
836         struct udp_hdr *udp_hdr = NULL;
837         struct tcp_hdr *tcp_hdr = NULL;
838         uint32_t hash, l3hash = 0, l4hash = 0;
839
840         if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
841                 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
842                                 ((char *)(eth_hdr + 1) + vlan_offset);
843                 size_t ip_hdr_offset;
844
845                 l3hash = ipv4_hash(ipv4_hdr);
846
847                 /* there is no L4 header in fragmented packet */
848                 if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr) == 0)) {
849                         ip_hdr_offset = (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) *
850                                         IPV4_IHL_MULTIPLIER;
851
852                         if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
853                                 tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr +
854                                                 ip_hdr_offset);
855                                 l4hash = HASH_L4_PORTS(tcp_hdr);
856                         } else if (ipv4_hdr->next_proto_id == IPPROTO_UDP) {
857                                 udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr +
858                                                 ip_hdr_offset);
859                                 l4hash = HASH_L4_PORTS(udp_hdr);
860                         }
861                 }
862         } else if  (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
863                 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
864                                 ((char *)(eth_hdr + 1) + vlan_offset);
865                 l3hash = ipv6_hash(ipv6_hdr);
866
867                 if (ipv6_hdr->proto == IPPROTO_TCP) {
868                         tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
869                         l4hash = HASH_L4_PORTS(tcp_hdr);
870                 } else if (ipv6_hdr->proto == IPPROTO_UDP) {
871                         udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
872                         l4hash = HASH_L4_PORTS(udp_hdr);
873                 }
874         }
875
876         hash = l3hash ^ l4hash;
877         hash ^= hash >> 16;
878         hash ^= hash >> 8;
879
880         return hash % slave_count;
881 }
882
883 struct bwg_slave {
884         uint64_t bwg_left_int;
885         uint64_t bwg_left_remainder;
886         uint8_t slave;
887 };
888
889 void
890 bond_tlb_activate_slave(struct bond_dev_private *internals) {
891         int i;
892
893         for (i = 0; i < internals->active_slave_count; i++) {
894                 tlb_last_obytets[internals->active_slaves[i]] = 0;
895         }
896 }
897
898 static int
899 bandwidth_cmp(const void *a, const void *b)
900 {
901         const struct bwg_slave *bwg_a = a;
902         const struct bwg_slave *bwg_b = b;
903         int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
904         int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
905                         (int64_t)bwg_a->bwg_left_remainder;
906         if (diff > 0)
907                 return 1;
908         else if (diff < 0)
909                 return -1;
910         else if (diff2 > 0)
911                 return 1;
912         else if (diff2 < 0)
913                 return -1;
914         else
915                 return 0;
916 }
917
918 static void
919 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
920                 struct bwg_slave *bwg_slave)
921 {
922         struct rte_eth_link link_status;
923
924         rte_eth_link_get_nowait(port_id, &link_status);
925         uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
926         if (link_bwg == 0)
927                 return;
928         link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
929         bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
930         bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
931 }
932
933 static void
934 bond_ethdev_update_tlb_slave_cb(void *arg)
935 {
936         struct bond_dev_private *internals = arg;
937         struct rte_eth_stats slave_stats;
938         struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
939         uint8_t slave_count;
940         uint64_t tx_bytes;
941
942         uint8_t update_stats = 0;
943         uint8_t i, slave_id;
944
945         internals->slave_update_idx++;
946
947
948         if (internals->slave_update_idx >= REORDER_PERIOD_MS)
949                 update_stats = 1;
950
951         for (i = 0; i < internals->active_slave_count; i++) {
952                 slave_id = internals->active_slaves[i];
953                 rte_eth_stats_get(slave_id, &slave_stats);
954                 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
955                 bandwidth_left(slave_id, tx_bytes,
956                                 internals->slave_update_idx, &bwg_array[i]);
957                 bwg_array[i].slave = slave_id;
958
959                 if (update_stats) {
960                         tlb_last_obytets[slave_id] = slave_stats.obytes;
961                 }
962         }
963
964         if (update_stats == 1)
965                 internals->slave_update_idx = 0;
966
967         slave_count = i;
968         qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
969         for (i = 0; i < slave_count; i++)
970                 internals->tlb_slaves_order[i] = bwg_array[i].slave;
971
972         rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
973                         (struct bond_dev_private *)internals);
974 }
975
976 static uint16_t
977 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
978 {
979         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
980         struct bond_dev_private *internals = bd_tx_q->dev_private;
981
982         struct rte_eth_dev *primary_port =
983                         &rte_eth_devices[internals->primary_port];
984         uint16_t num_tx_total = 0;
985         uint16_t i, j;
986
987         uint16_t num_of_slaves = internals->active_slave_count;
988         uint16_t slaves[RTE_MAX_ETHPORTS];
989
990         struct ether_hdr *ether_hdr;
991         struct ether_addr primary_slave_addr;
992         struct ether_addr active_slave_addr;
993
994         if (num_of_slaves < 1)
995                 return num_tx_total;
996
997         memcpy(slaves, internals->tlb_slaves_order,
998                                 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
999
1000
1001         ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
1002
1003         if (nb_pkts > 3) {
1004                 for (i = 0; i < 3; i++)
1005                         rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
1006         }
1007
1008         for (i = 0; i < num_of_slaves; i++) {
1009                 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
1010                 for (j = num_tx_total; j < nb_pkts; j++) {
1011                         if (j + 3 < nb_pkts)
1012                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
1013
1014                         ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
1015                         if (is_same_ether_addr(&ether_hdr->s_addr, &primary_slave_addr))
1016                                 ether_addr_copy(&active_slave_addr, &ether_hdr->s_addr);
1017 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1018                                         mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
1019 #endif
1020                 }
1021
1022                 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1023                                 bufs + num_tx_total, nb_pkts - num_tx_total);
1024
1025                 if (num_tx_total == nb_pkts)
1026                         break;
1027         }
1028
1029         return num_tx_total;
1030 }
1031
1032 void
1033 bond_tlb_disable(struct bond_dev_private *internals)
1034 {
1035         rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
1036 }
1037
1038 void
1039 bond_tlb_enable(struct bond_dev_private *internals)
1040 {
1041         bond_ethdev_update_tlb_slave_cb(internals);
1042 }
1043
1044 static uint16_t
1045 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1046 {
1047         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1048         struct bond_dev_private *internals = bd_tx_q->dev_private;
1049
1050         struct ether_hdr *eth_h;
1051         uint16_t ether_type, offset;
1052
1053         struct client_data *client_info;
1054
1055         /*
1056          * We create transmit buffers for every slave and one additional to send
1057          * through tlb. In worst case every packet will be send on one port.
1058          */
1059         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
1060         uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
1061
1062         /*
1063          * We create separate transmit buffers for update packets as they won't
1064          * be counted in num_tx_total.
1065          */
1066         struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1067         uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1068
1069         struct rte_mbuf *upd_pkt;
1070         size_t pkt_size;
1071
1072         uint16_t num_send, num_not_send = 0;
1073         uint16_t num_tx_total = 0;
1074         uint16_t slave_idx;
1075
1076         int i, j;
1077
1078         /* Search tx buffer for ARP packets and forward them to alb */
1079         for (i = 0; i < nb_pkts; i++) {
1080                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
1081                 ether_type = eth_h->ether_type;
1082                 offset = get_vlan_offset(eth_h, &ether_type);
1083
1084                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
1085                         slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1086
1087                         /* Change src mac in eth header */
1088                         rte_eth_macaddr_get(slave_idx, &eth_h->s_addr);
1089
1090                         /* Add packet to slave tx buffer */
1091                         slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1092                         slave_bufs_pkts[slave_idx]++;
1093                 } else {
1094                         /* If packet is not ARP, send it with TLB policy */
1095                         slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1096                                         bufs[i];
1097                         slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1098                 }
1099         }
1100
1101         /* Update connected client ARP tables */
1102         if (internals->mode6.ntt) {
1103                 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1104                         client_info = &internals->mode6.client_table[i];
1105
1106                         if (client_info->in_use) {
1107                                 /* Allocate new packet to send ARP update on current slave */
1108                                 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1109                                 if (upd_pkt == NULL) {
1110                                         RTE_LOG(ERR, PMD, "Failed to allocate ARP packet from pool\n");
1111                                         continue;
1112                                 }
1113                                 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
1114                                                 + client_info->vlan_count * sizeof(struct vlan_hdr);
1115                                 upd_pkt->data_len = pkt_size;
1116                                 upd_pkt->pkt_len = pkt_size;
1117
1118                                 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1119                                                 internals);
1120
1121                                 /* Add packet to update tx buffer */
1122                                 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1123                                 update_bufs_pkts[slave_idx]++;
1124                         }
1125                 }
1126                 internals->mode6.ntt = 0;
1127         }
1128
1129         /* Send ARP packets on proper slaves */
1130         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1131                 if (slave_bufs_pkts[i] > 0) {
1132                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1133                                         slave_bufs[i], slave_bufs_pkts[i]);
1134                         for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1135                                 bufs[nb_pkts - 1 - num_not_send - j] =
1136                                                 slave_bufs[i][nb_pkts - 1 - j];
1137                         }
1138
1139                         num_tx_total += num_send;
1140                         num_not_send += slave_bufs_pkts[i] - num_send;
1141
1142 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1143         /* Print TX stats including update packets */
1144                         for (j = 0; j < slave_bufs_pkts[i]; j++) {
1145                                 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
1146                                 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1147                         }
1148 #endif
1149                 }
1150         }
1151
1152         /* Send update packets on proper slaves */
1153         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1154                 if (update_bufs_pkts[i] > 0) {
1155                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1156                                         update_bufs_pkts[i]);
1157                         for (j = num_send; j < update_bufs_pkts[i]; j++) {
1158                                 rte_pktmbuf_free(update_bufs[i][j]);
1159                         }
1160 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1161                         for (j = 0; j < update_bufs_pkts[i]; j++) {
1162                                 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
1163                                 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1164                         }
1165 #endif
1166                 }
1167         }
1168
1169         /* Send non-ARP packets using tlb policy */
1170         if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1171                 num_send = bond_ethdev_tx_burst_tlb(queue,
1172                                 slave_bufs[RTE_MAX_ETHPORTS],
1173                                 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1174
1175                 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1176                         bufs[nb_pkts - 1 - num_not_send - j] =
1177                                         slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1178                 }
1179
1180                 num_tx_total += num_send;
1181         }
1182
1183         return num_tx_total;
1184 }
1185
1186 static uint16_t
1187 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1188                 uint16_t nb_pkts)
1189 {
1190         struct bond_dev_private *internals;
1191         struct bond_tx_queue *bd_tx_q;
1192
1193         uint16_t num_of_slaves;
1194         uint16_t slaves[RTE_MAX_ETHPORTS];
1195
1196         uint16_t num_tx_total = 0, num_tx_slave = 0, tx_fail_total = 0;
1197
1198         int i, op_slave_id;
1199
1200         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
1201         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
1202
1203         bd_tx_q = (struct bond_tx_queue *)queue;
1204         internals = bd_tx_q->dev_private;
1205
1206         /* Copy slave list to protect against slave up/down changes during tx
1207          * bursting */
1208         num_of_slaves = internals->active_slave_count;
1209         memcpy(slaves, internals->active_slaves,
1210                         sizeof(internals->active_slaves[0]) * num_of_slaves);
1211
1212         if (num_of_slaves < 1)
1213                 return num_tx_total;
1214
1215         /* Populate slaves mbuf with the packets which are to be sent on it  */
1216         for (i = 0; i < nb_pkts; i++) {
1217                 /* Select output slave using hash based on xmit policy */
1218                 op_slave_id = internals->xmit_hash(bufs[i], num_of_slaves);
1219
1220                 /* Populate slave mbuf arrays with mbufs for that slave */
1221                 slave_bufs[op_slave_id][slave_nb_pkts[op_slave_id]++] = bufs[i];
1222         }
1223
1224         /* Send packet burst on each slave device */
1225         for (i = 0; i < num_of_slaves; i++) {
1226                 if (slave_nb_pkts[i] > 0) {
1227                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1228                                         slave_bufs[i], slave_nb_pkts[i]);
1229
1230                         /* if tx burst fails move packets to end of bufs */
1231                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
1232                                 int slave_tx_fail_count = slave_nb_pkts[i] - num_tx_slave;
1233
1234                                 tx_fail_total += slave_tx_fail_count;
1235                                 memcpy(&bufs[nb_pkts - tx_fail_total],
1236                                                 &slave_bufs[i][num_tx_slave],
1237                                                 slave_tx_fail_count * sizeof(bufs[0]));
1238                         }
1239
1240                         num_tx_total += num_tx_slave;
1241                 }
1242         }
1243
1244         return num_tx_total;
1245 }
1246
1247 static uint16_t
1248 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1249                 uint16_t nb_pkts)
1250 {
1251         struct bond_dev_private *internals;
1252         struct bond_tx_queue *bd_tx_q;
1253
1254         uint16_t num_of_slaves;
1255         uint16_t slaves[RTE_MAX_ETHPORTS];
1256          /* positions in slaves, not ID */
1257         uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
1258         uint8_t distributing_count;
1259
1260         uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
1261         uint16_t i, j, op_slave_idx;
1262         const uint16_t buffs_size = nb_pkts + BOND_MODE_8023AX_SLAVE_TX_PKTS + 1;
1263
1264         /* Allocate additional packets in case 8023AD mode. */
1265         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][buffs_size];
1266         void *slow_pkts[BOND_MODE_8023AX_SLAVE_TX_PKTS] = { NULL };
1267
1268         /* Total amount of packets in slave_bufs */
1269         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
1270         /* Slow packets placed in each slave */
1271         uint8_t slave_slow_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
1272
1273         bd_tx_q = (struct bond_tx_queue *)queue;
1274         internals = bd_tx_q->dev_private;
1275
1276         /* Copy slave list to protect against slave up/down changes during tx
1277          * bursting */
1278         num_of_slaves = internals->active_slave_count;
1279         if (num_of_slaves < 1)
1280                 return num_tx_total;
1281
1282         memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) * num_of_slaves);
1283
1284         distributing_count = 0;
1285         for (i = 0; i < num_of_slaves; i++) {
1286                 struct port *port = &mode_8023ad_ports[slaves[i]];
1287
1288                 slave_slow_nb_pkts[i] = rte_ring_dequeue_burst(port->tx_ring,
1289                                 slow_pkts, BOND_MODE_8023AX_SLAVE_TX_PKTS,
1290                                 NULL);
1291                 slave_nb_pkts[i] = slave_slow_nb_pkts[i];
1292
1293                 for (j = 0; j < slave_slow_nb_pkts[i]; j++)
1294                         slave_bufs[i][j] = slow_pkts[j];
1295
1296                 if (ACTOR_STATE(port, DISTRIBUTING))
1297                         distributing_offsets[distributing_count++] = i;
1298         }
1299
1300         if (likely(distributing_count > 0)) {
1301                 /* Populate slaves mbuf with the packets which are to be sent on it */
1302                 for (i = 0; i < nb_pkts; i++) {
1303                         /* Select output slave using hash based on xmit policy */
1304                         op_slave_idx = internals->xmit_hash(bufs[i], distributing_count);
1305
1306                         /* Populate slave mbuf arrays with mbufs for that slave. Use only
1307                          * slaves that are currently distributing. */
1308                         uint8_t slave_offset = distributing_offsets[op_slave_idx];
1309                         slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] = bufs[i];
1310                         slave_nb_pkts[slave_offset]++;
1311                 }
1312         }
1313
1314         /* Send packet burst on each slave device */
1315         for (i = 0; i < num_of_slaves; i++) {
1316                 if (slave_nb_pkts[i] == 0)
1317                         continue;
1318
1319                 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1320                                 slave_bufs[i], slave_nb_pkts[i]);
1321
1322                 /* If tx burst fails drop slow packets */
1323                 for ( ; num_tx_slave < slave_slow_nb_pkts[i]; num_tx_slave++)
1324                         rte_pktmbuf_free(slave_bufs[i][num_tx_slave]);
1325
1326                 num_tx_total += num_tx_slave - slave_slow_nb_pkts[i];
1327                 num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
1328
1329                 /* If tx burst fails move packets to end of bufs */
1330                 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
1331                         uint16_t j = nb_pkts - num_tx_fail_total;
1332                         for ( ; num_tx_slave < slave_nb_pkts[i]; j++, num_tx_slave++)
1333                                 bufs[j] = slave_bufs[i][num_tx_slave];
1334                 }
1335         }
1336
1337         return num_tx_total;
1338 }
1339
1340 static uint16_t
1341 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1342                 uint16_t nb_pkts)
1343 {
1344         struct bond_dev_private *internals;
1345         struct bond_tx_queue *bd_tx_q;
1346
1347         uint8_t tx_failed_flag = 0, num_of_slaves;
1348         uint16_t slaves[RTE_MAX_ETHPORTS];
1349
1350         uint16_t max_nb_of_tx_pkts = 0;
1351
1352         int slave_tx_total[RTE_MAX_ETHPORTS];
1353         int i, most_successful_tx_slave = -1;
1354
1355         bd_tx_q = (struct bond_tx_queue *)queue;
1356         internals = bd_tx_q->dev_private;
1357
1358         /* Copy slave list to protect against slave up/down changes during tx
1359          * bursting */
1360         num_of_slaves = internals->active_slave_count;
1361         memcpy(slaves, internals->active_slaves,
1362                         sizeof(internals->active_slaves[0]) * num_of_slaves);
1363
1364         if (num_of_slaves < 1)
1365                 return 0;
1366
1367         /* Increment reference count on mbufs */
1368         for (i = 0; i < nb_pkts; i++)
1369                 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1370
1371         /* Transmit burst on each active slave */
1372         for (i = 0; i < num_of_slaves; i++) {
1373                 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1374                                         bufs, nb_pkts);
1375
1376                 if (unlikely(slave_tx_total[i] < nb_pkts))
1377                         tx_failed_flag = 1;
1378
1379                 /* record the value and slave index for the slave which transmits the
1380                  * maximum number of packets */
1381                 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1382                         max_nb_of_tx_pkts = slave_tx_total[i];
1383                         most_successful_tx_slave = i;
1384                 }
1385         }
1386
1387         /* if slaves fail to transmit packets from burst, the calling application
1388          * is not expected to know about multiple references to packets so we must
1389          * handle failures of all packets except those of the most successful slave
1390          */
1391         if (unlikely(tx_failed_flag))
1392                 for (i = 0; i < num_of_slaves; i++)
1393                         if (i != most_successful_tx_slave)
1394                                 while (slave_tx_total[i] < nb_pkts)
1395                                         rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1396
1397         return max_nb_of_tx_pkts;
1398 }
1399
1400 void
1401 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1402 {
1403         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1404
1405         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1406                 /**
1407                  * If in mode 4 then save the link properties of the first
1408                  * slave, all subsequent slaves must match these properties
1409                  */
1410                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1411
1412                 bond_link->link_autoneg = slave_link->link_autoneg;
1413                 bond_link->link_duplex = slave_link->link_duplex;
1414                 bond_link->link_speed = slave_link->link_speed;
1415         } else {
1416                 /**
1417                  * In any other mode the link properties are set to default
1418                  * values of AUTONEG/DUPLEX
1419                  */
1420                 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1421                 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1422         }
1423 }
1424
1425 int
1426 link_properties_valid(struct rte_eth_dev *ethdev,
1427                 struct rte_eth_link *slave_link)
1428 {
1429         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1430
1431         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1432                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1433
1434                 if (bond_link->link_duplex != slave_link->link_duplex ||
1435                         bond_link->link_autoneg != slave_link->link_autoneg ||
1436                         bond_link->link_speed != slave_link->link_speed)
1437                         return -1;
1438         }
1439
1440         return 0;
1441 }
1442
1443 int
1444 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1445 {
1446         struct ether_addr *mac_addr;
1447
1448         if (eth_dev == NULL) {
1449                 RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__);
1450                 return -1;
1451         }
1452
1453         if (dst_mac_addr == NULL) {
1454                 RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__);
1455                 return -1;
1456         }
1457
1458         mac_addr = eth_dev->data->mac_addrs;
1459
1460         ether_addr_copy(mac_addr, dst_mac_addr);
1461         return 0;
1462 }
1463
1464 int
1465 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1466 {
1467         struct ether_addr *mac_addr;
1468
1469         if (eth_dev == NULL) {
1470                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1471                 return -1;
1472         }
1473
1474         if (new_mac_addr == NULL) {
1475                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1476                 return -1;
1477         }
1478
1479         mac_addr = eth_dev->data->mac_addrs;
1480
1481         /* If new MAC is different to current MAC then update */
1482         if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1483                 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1484
1485         return 0;
1486 }
1487
1488 int
1489 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1490 {
1491         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1492         int i;
1493
1494         /* Update slave devices MAC addresses */
1495         if (internals->slave_count < 1)
1496                 return -1;
1497
1498         switch (internals->mode) {
1499         case BONDING_MODE_ROUND_ROBIN:
1500         case BONDING_MODE_BALANCE:
1501         case BONDING_MODE_BROADCAST:
1502                 for (i = 0; i < internals->slave_count; i++) {
1503                         if (rte_eth_dev_default_mac_addr_set(
1504                                         internals->slaves[i].port_id,
1505                                         bonded_eth_dev->data->mac_addrs)) {
1506                                 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1507                                                 internals->slaves[i].port_id);
1508                                 return -1;
1509                         }
1510                 }
1511                 break;
1512         case BONDING_MODE_8023AD:
1513                 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1514                 break;
1515         case BONDING_MODE_ACTIVE_BACKUP:
1516         case BONDING_MODE_TLB:
1517         case BONDING_MODE_ALB:
1518         default:
1519                 for (i = 0; i < internals->slave_count; i++) {
1520                         if (internals->slaves[i].port_id ==
1521                                         internals->current_primary_port) {
1522                                 if (rte_eth_dev_default_mac_addr_set(
1523                                                 internals->primary_port,
1524                                                 bonded_eth_dev->data->mac_addrs)) {
1525                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1526                                                         internals->current_primary_port);
1527                                         return -1;
1528                                 }
1529                         } else {
1530                                 if (rte_eth_dev_default_mac_addr_set(
1531                                                 internals->slaves[i].port_id,
1532                                                 &internals->slaves[i].persisted_mac_addr)) {
1533                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1534                                                         internals->slaves[i].port_id);
1535                                         return -1;
1536                                 }
1537                         }
1538                 }
1539         }
1540
1541         return 0;
1542 }
1543
1544 int
1545 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1546 {
1547         struct bond_dev_private *internals;
1548
1549         internals = eth_dev->data->dev_private;
1550
1551         switch (mode) {
1552         case BONDING_MODE_ROUND_ROBIN:
1553                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1554                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1555                 break;
1556         case BONDING_MODE_ACTIVE_BACKUP:
1557                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1558                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1559                 break;
1560         case BONDING_MODE_BALANCE:
1561                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1562                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1563                 break;
1564         case BONDING_MODE_BROADCAST:
1565                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1566                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1567                 break;
1568         case BONDING_MODE_8023AD:
1569                 if (bond_mode_8023ad_enable(eth_dev) != 0)
1570                         return -1;
1571
1572                 if (internals->mode4.dedicated_queues.enabled == 0) {
1573                         eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1574                         eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1575                         RTE_LOG(WARNING, PMD,
1576                                 "Using mode 4, it is necessary to do TX burst "
1577                                 "and RX burst at least every 100ms.\n");
1578                 } else {
1579                         /* Use flow director's optimization */
1580                         eth_dev->rx_pkt_burst =
1581                                         bond_ethdev_rx_burst_8023ad_fast_queue;
1582                         eth_dev->tx_pkt_burst =
1583                                         bond_ethdev_tx_burst_8023ad_fast_queue;
1584                 }
1585                 break;
1586         case BONDING_MODE_TLB:
1587                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1588                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1589                 break;
1590         case BONDING_MODE_ALB:
1591                 if (bond_mode_alb_enable(eth_dev) != 0)
1592                         return -1;
1593
1594                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1595                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1596                 break;
1597         default:
1598                 return -1;
1599         }
1600
1601         internals->mode = mode;
1602
1603         return 0;
1604 }
1605
1606
1607 static int
1608 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1609                 struct rte_eth_dev *slave_eth_dev)
1610 {
1611         int errval = 0;
1612         struct bond_dev_private *internals = (struct bond_dev_private *)
1613                 bonded_eth_dev->data->dev_private;
1614         struct port *port = &mode_8023ad_ports[slave_eth_dev->data->port_id];
1615
1616         if (port->slow_pool == NULL) {
1617                 char mem_name[256];
1618                 int slave_id = slave_eth_dev->data->port_id;
1619
1620                 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1621                                 slave_id);
1622                 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1623                         250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1624                         slave_eth_dev->data->numa_node);
1625
1626                 /* Any memory allocation failure in initialization is critical because
1627                  * resources can't be free, so reinitialization is impossible. */
1628                 if (port->slow_pool == NULL) {
1629                         rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1630                                 slave_id, mem_name, rte_strerror(rte_errno));
1631                 }
1632         }
1633
1634         if (internals->mode4.dedicated_queues.enabled == 1) {
1635                 /* Configure slow Rx queue */
1636
1637                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1638                                 internals->mode4.dedicated_queues.rx_qid, 128,
1639                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1640                                 NULL, port->slow_pool);
1641                 if (errval != 0) {
1642                         RTE_BOND_LOG(ERR,
1643                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1644                                         slave_eth_dev->data->port_id,
1645                                         internals->mode4.dedicated_queues.rx_qid,
1646                                         errval);
1647                         return errval;
1648                 }
1649
1650                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1651                                 internals->mode4.dedicated_queues.tx_qid, 512,
1652                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1653                                 NULL);
1654                 if (errval != 0) {
1655                         RTE_BOND_LOG(ERR,
1656                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1657                                 slave_eth_dev->data->port_id,
1658                                 internals->mode4.dedicated_queues.tx_qid,
1659                                 errval);
1660                         return errval;
1661                 }
1662         }
1663         return 0;
1664 }
1665
1666 int
1667 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1668                 struct rte_eth_dev *slave_eth_dev)
1669 {
1670         struct bond_rx_queue *bd_rx_q;
1671         struct bond_tx_queue *bd_tx_q;
1672         uint16_t nb_rx_queues;
1673         uint16_t nb_tx_queues;
1674
1675         int errval;
1676         uint16_t q_id;
1677         struct rte_flow_error flow_error;
1678
1679         struct bond_dev_private *internals = (struct bond_dev_private *)
1680                 bonded_eth_dev->data->dev_private;
1681
1682         /* Stop slave */
1683         rte_eth_dev_stop(slave_eth_dev->data->port_id);
1684
1685         /* Enable interrupts on slave device if supported */
1686         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1687                 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1688
1689         /* If RSS is enabled for bonding, try to enable it for slaves  */
1690         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1691                 if (bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len
1692                                 != 0) {
1693                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1694                                         bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
1695                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1696                                         bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
1697                 } else {
1698                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1699                 }
1700
1701                 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1702                                 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1703                 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1704                                 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1705         }
1706
1707         slave_eth_dev->data->dev_conf.rxmode.hw_vlan_filter =
1708                         bonded_eth_dev->data->dev_conf.rxmode.hw_vlan_filter;
1709
1710         nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1711         nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1712
1713         if (internals->mode == BONDING_MODE_8023AD) {
1714                 if (internals->mode4.dedicated_queues.enabled == 1) {
1715                         nb_rx_queues++;
1716                         nb_tx_queues++;
1717                 }
1718         }
1719
1720         /* Configure device */
1721         errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1722                         nb_rx_queues, nb_tx_queues,
1723                         &(slave_eth_dev->data->dev_conf));
1724         if (errval != 0) {
1725                 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u , err (%d)",
1726                                 slave_eth_dev->data->port_id, errval);
1727                 return errval;
1728         }
1729
1730         /* Setup Rx Queues */
1731         for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1732                 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1733
1734                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1735                                 bd_rx_q->nb_rx_desc,
1736                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1737                                 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1738                 if (errval != 0) {
1739                         RTE_BOND_LOG(ERR,
1740                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1741                                         slave_eth_dev->data->port_id, q_id, errval);
1742                         return errval;
1743                 }
1744         }
1745
1746         /* Setup Tx Queues */
1747         for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1748                 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1749
1750                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1751                                 bd_tx_q->nb_tx_desc,
1752                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1753                                 &bd_tx_q->tx_conf);
1754                 if (errval != 0) {
1755                         RTE_BOND_LOG(ERR,
1756                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1757                                 slave_eth_dev->data->port_id, q_id, errval);
1758                         return errval;
1759                 }
1760         }
1761
1762         if (internals->mode == BONDING_MODE_8023AD &&
1763                         internals->mode4.dedicated_queues.enabled == 1) {
1764                 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1765                                 != 0)
1766                         return errval;
1767
1768                 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1769                                 slave_eth_dev->data->port_id) != 0) {
1770                         RTE_BOND_LOG(ERR,
1771                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1772                                 slave_eth_dev->data->port_id, q_id, errval);
1773                         return -1;
1774                 }
1775
1776                 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1777                         rte_flow_destroy(slave_eth_dev->data->port_id,
1778                                         internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1779                                         &flow_error);
1780
1781                 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1782                                 slave_eth_dev->data->port_id);
1783         }
1784
1785         /* Start device */
1786         errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1787         if (errval != 0) {
1788                 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1789                                 slave_eth_dev->data->port_id, errval);
1790                 return -1;
1791         }
1792
1793         /* If RSS is enabled for bonding, synchronize RETA */
1794         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1795                 int i;
1796                 struct bond_dev_private *internals;
1797
1798                 internals = bonded_eth_dev->data->dev_private;
1799
1800                 for (i = 0; i < internals->slave_count; i++) {
1801                         if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1802                                 errval = rte_eth_dev_rss_reta_update(
1803                                                 slave_eth_dev->data->port_id,
1804                                                 &internals->reta_conf[0],
1805                                                 internals->slaves[i].reta_size);
1806                                 if (errval != 0) {
1807                                         RTE_LOG(WARNING, PMD,
1808                                                         "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1809                                                         " RSS Configuration for bonding may be inconsistent.\n",
1810                                                         slave_eth_dev->data->port_id, errval);
1811                                 }
1812                                 break;
1813                         }
1814                 }
1815         }
1816
1817         /* If lsc interrupt is set, check initial slave's link status */
1818         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1819                 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1820                 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1821                         RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1822                         NULL);
1823         }
1824
1825         return 0;
1826 }
1827
1828 void
1829 slave_remove(struct bond_dev_private *internals,
1830                 struct rte_eth_dev *slave_eth_dev)
1831 {
1832         uint8_t i;
1833
1834         for (i = 0; i < internals->slave_count; i++)
1835                 if (internals->slaves[i].port_id ==
1836                                 slave_eth_dev->data->port_id)
1837                         break;
1838
1839         if (i < (internals->slave_count - 1))
1840                 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1841                                 sizeof(internals->slaves[0]) *
1842                                 (internals->slave_count - i - 1));
1843
1844         internals->slave_count--;
1845
1846         /* force reconfiguration of slave interfaces */
1847         _rte_eth_dev_reset(slave_eth_dev);
1848 }
1849
1850 static void
1851 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1852
1853 void
1854 slave_add(struct bond_dev_private *internals,
1855                 struct rte_eth_dev *slave_eth_dev)
1856 {
1857         struct bond_slave_details *slave_details =
1858                         &internals->slaves[internals->slave_count];
1859
1860         slave_details->port_id = slave_eth_dev->data->port_id;
1861         slave_details->last_link_status = 0;
1862
1863         /* Mark slave devices that don't support interrupts so we can
1864          * compensate when we start the bond
1865          */
1866         if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1867                 slave_details->link_status_poll_enabled = 1;
1868         }
1869
1870         slave_details->link_status_wait_to_complete = 0;
1871         /* clean tlb_last_obytes when adding port for bonding device */
1872         memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1873                         sizeof(struct ether_addr));
1874 }
1875
1876 void
1877 bond_ethdev_primary_set(struct bond_dev_private *internals,
1878                 uint16_t slave_port_id)
1879 {
1880         int i;
1881
1882         if (internals->active_slave_count < 1)
1883                 internals->current_primary_port = slave_port_id;
1884         else
1885                 /* Search bonded device slave ports for new proposed primary port */
1886                 for (i = 0; i < internals->active_slave_count; i++) {
1887                         if (internals->active_slaves[i] == slave_port_id)
1888                                 internals->current_primary_port = slave_port_id;
1889                 }
1890 }
1891
1892 static void
1893 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
1894
1895 static int
1896 bond_ethdev_start(struct rte_eth_dev *eth_dev)
1897 {
1898         struct bond_dev_private *internals;
1899         int i;
1900
1901         /* slave eth dev will be started by bonded device */
1902         if (check_for_bonded_ethdev(eth_dev)) {
1903                 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
1904                                 eth_dev->data->port_id);
1905                 return -1;
1906         }
1907
1908         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
1909         eth_dev->data->dev_started = 1;
1910
1911         internals = eth_dev->data->dev_private;
1912
1913         if (internals->slave_count == 0) {
1914                 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
1915                 return -1;
1916         }
1917
1918         if (internals->user_defined_mac == 0) {
1919                 struct ether_addr *new_mac_addr = NULL;
1920
1921                 for (i = 0; i < internals->slave_count; i++)
1922                         if (internals->slaves[i].port_id == internals->primary_port)
1923                                 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
1924
1925                 if (new_mac_addr == NULL)
1926                         return -1;
1927
1928                 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
1929                         RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
1930                                         eth_dev->data->port_id);
1931                         return -1;
1932                 }
1933         }
1934
1935         /* Update all slave devices MACs*/
1936         if (mac_address_slaves_update(eth_dev) != 0)
1937                 return -1;
1938
1939         /* If bonded device is configure in promiscuous mode then re-apply config */
1940         if (internals->promiscuous_en)
1941                 bond_ethdev_promiscuous_enable(eth_dev);
1942
1943         if (internals->mode == BONDING_MODE_8023AD) {
1944                 if (internals->mode4.dedicated_queues.enabled == 1) {
1945                         internals->mode4.dedicated_queues.rx_qid =
1946                                         eth_dev->data->nb_rx_queues;
1947                         internals->mode4.dedicated_queues.tx_qid =
1948                                         eth_dev->data->nb_tx_queues;
1949                 }
1950         }
1951
1952
1953         /* Reconfigure each slave device if starting bonded device */
1954         for (i = 0; i < internals->slave_count; i++) {
1955                 struct rte_eth_dev *slave_ethdev =
1956                                 &(rte_eth_devices[internals->slaves[i].port_id]);
1957                 if (slave_configure(eth_dev, slave_ethdev) != 0) {
1958                         RTE_BOND_LOG(ERR,
1959                                 "bonded port (%d) failed to reconfigure slave device (%d)",
1960                                 eth_dev->data->port_id,
1961                                 internals->slaves[i].port_id);
1962                         return -1;
1963                 }
1964                 /* We will need to poll for link status if any slave doesn't
1965                  * support interrupts
1966                  */
1967                 if (internals->slaves[i].link_status_poll_enabled)
1968                         internals->link_status_polling_enabled = 1;
1969         }
1970         /* start polling if needed */
1971         if (internals->link_status_polling_enabled) {
1972                 rte_eal_alarm_set(
1973                         internals->link_status_polling_interval_ms * 1000,
1974                         bond_ethdev_slave_link_status_change_monitor,
1975                         (void *)&rte_eth_devices[internals->port_id]);
1976         }
1977
1978         if (internals->user_defined_primary_port)
1979                 bond_ethdev_primary_set(internals, internals->primary_port);
1980
1981         if (internals->mode == BONDING_MODE_8023AD)
1982                 bond_mode_8023ad_start(eth_dev);
1983
1984         if (internals->mode == BONDING_MODE_TLB ||
1985                         internals->mode == BONDING_MODE_ALB)
1986                 bond_tlb_enable(internals);
1987
1988         return 0;
1989 }
1990
1991 static void
1992 bond_ethdev_free_queues(struct rte_eth_dev *dev)
1993 {
1994         uint8_t i;
1995
1996         if (dev->data->rx_queues != NULL) {
1997                 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1998                         rte_free(dev->data->rx_queues[i]);
1999                         dev->data->rx_queues[i] = NULL;
2000                 }
2001                 dev->data->nb_rx_queues = 0;
2002         }
2003
2004         if (dev->data->tx_queues != NULL) {
2005                 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2006                         rte_free(dev->data->tx_queues[i]);
2007                         dev->data->tx_queues[i] = NULL;
2008                 }
2009                 dev->data->nb_tx_queues = 0;
2010         }
2011 }
2012
2013 void
2014 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2015 {
2016         struct bond_dev_private *internals = eth_dev->data->dev_private;
2017         uint8_t i;
2018
2019         if (internals->mode == BONDING_MODE_8023AD) {
2020                 struct port *port;
2021                 void *pkt = NULL;
2022
2023                 bond_mode_8023ad_stop(eth_dev);
2024
2025                 /* Discard all messages to/from mode 4 state machines */
2026                 for (i = 0; i < internals->active_slave_count; i++) {
2027                         port = &mode_8023ad_ports[internals->active_slaves[i]];
2028
2029                         RTE_ASSERT(port->rx_ring != NULL);
2030                         while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2031                                 rte_pktmbuf_free(pkt);
2032
2033                         RTE_ASSERT(port->tx_ring != NULL);
2034                         while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2035                                 rte_pktmbuf_free(pkt);
2036                 }
2037         }
2038
2039         if (internals->mode == BONDING_MODE_TLB ||
2040                         internals->mode == BONDING_MODE_ALB) {
2041                 bond_tlb_disable(internals);
2042                 for (i = 0; i < internals->active_slave_count; i++)
2043                         tlb_last_obytets[internals->active_slaves[i]] = 0;
2044         }
2045
2046         internals->active_slave_count = 0;
2047         internals->link_status_polling_enabled = 0;
2048         for (i = 0; i < internals->slave_count; i++)
2049                 internals->slaves[i].last_link_status = 0;
2050
2051         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2052         eth_dev->data->dev_started = 0;
2053 }
2054
2055 void
2056 bond_ethdev_close(struct rte_eth_dev *dev)
2057 {
2058         struct bond_dev_private *internals = dev->data->dev_private;
2059         uint8_t bond_port_id = internals->port_id;
2060         int skipped = 0;
2061
2062         RTE_LOG(INFO, EAL, "Closing bonded device %s\n", dev->device->name);
2063         while (internals->slave_count != skipped) {
2064                 uint16_t port_id = internals->slaves[skipped].port_id;
2065
2066                 rte_eth_dev_stop(port_id);
2067
2068                 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2069                         RTE_LOG(ERR, EAL,
2070                                 "Failed to remove port %d from bonded device "
2071                                 "%s\n", port_id, dev->device->name);
2072                         skipped++;
2073                 }
2074         }
2075         bond_ethdev_free_queues(dev);
2076         rte_bitmap_reset(internals->vlan_filter_bmp);
2077 }
2078
2079 /* forward declaration */
2080 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2081
2082 static void
2083 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2084 {
2085         struct bond_dev_private *internals = dev->data->dev_private;
2086
2087         uint16_t max_nb_rx_queues = UINT16_MAX;
2088         uint16_t max_nb_tx_queues = UINT16_MAX;
2089
2090         dev_info->max_mac_addrs = 1;
2091
2092         dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2093                         internals->candidate_max_rx_pktlen :
2094                         ETHER_MAX_JUMBO_FRAME_LEN;
2095
2096         /* Max number of tx/rx queues that the bonded device can support is the
2097          * minimum values of the bonded slaves, as all slaves must be capable
2098          * of supporting the same number of tx/rx queues.
2099          */
2100         if (internals->slave_count > 0) {
2101                 struct rte_eth_dev_info slave_info;
2102                 uint8_t idx;
2103
2104                 for (idx = 0; idx < internals->slave_count; idx++) {
2105                         rte_eth_dev_info_get(internals->slaves[idx].port_id,
2106                                         &slave_info);
2107
2108                         if (slave_info.max_rx_queues < max_nb_rx_queues)
2109                                 max_nb_rx_queues = slave_info.max_rx_queues;
2110
2111                         if (slave_info.max_tx_queues < max_nb_tx_queues)
2112                                 max_nb_tx_queues = slave_info.max_tx_queues;
2113                 }
2114         }
2115
2116         dev_info->max_rx_queues = max_nb_rx_queues;
2117         dev_info->max_tx_queues = max_nb_tx_queues;
2118
2119         /**
2120          * If dedicated hw queues enabled for link bonding device in LACP mode
2121          * then we need to reduce the maximum number of data path queues by 1.
2122          */
2123         if (internals->mode == BONDING_MODE_8023AD &&
2124                 internals->mode4.dedicated_queues.enabled == 1) {
2125                 dev_info->max_rx_queues--;
2126                 dev_info->max_tx_queues--;
2127         }
2128
2129         dev_info->min_rx_bufsize = 0;
2130
2131         dev_info->rx_offload_capa = internals->rx_offload_capa;
2132         dev_info->tx_offload_capa = internals->tx_offload_capa;
2133         dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2134
2135         dev_info->reta_size = internals->reta_size;
2136 }
2137
2138 static int
2139 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2140 {
2141         int res;
2142         uint16_t i;
2143         struct bond_dev_private *internals = dev->data->dev_private;
2144
2145         /* don't do this while a slave is being added */
2146         rte_spinlock_lock(&internals->lock);
2147
2148         if (on)
2149                 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2150         else
2151                 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2152
2153         for (i = 0; i < internals->slave_count; i++) {
2154                 uint16_t port_id = internals->slaves[i].port_id;
2155
2156                 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2157                 if (res == ENOTSUP)
2158                         RTE_LOG(WARNING, PMD,
2159                                 "Setting VLAN filter on slave port %u not supported.\n",
2160                                 port_id);
2161         }
2162
2163         rte_spinlock_unlock(&internals->lock);
2164         return 0;
2165 }
2166
2167 static int
2168 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2169                 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2170                 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2171 {
2172         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2173                         rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2174                                         0, dev->data->numa_node);
2175         if (bd_rx_q == NULL)
2176                 return -1;
2177
2178         bd_rx_q->queue_id = rx_queue_id;
2179         bd_rx_q->dev_private = dev->data->dev_private;
2180
2181         bd_rx_q->nb_rx_desc = nb_rx_desc;
2182
2183         memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2184         bd_rx_q->mb_pool = mb_pool;
2185
2186         dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2187
2188         return 0;
2189 }
2190
2191 static int
2192 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2193                 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2194                 const struct rte_eth_txconf *tx_conf)
2195 {
2196         struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
2197                         rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2198                                         0, dev->data->numa_node);
2199
2200         if (bd_tx_q == NULL)
2201                 return -1;
2202
2203         bd_tx_q->queue_id = tx_queue_id;
2204         bd_tx_q->dev_private = dev->data->dev_private;
2205
2206         bd_tx_q->nb_tx_desc = nb_tx_desc;
2207         memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2208
2209         dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2210
2211         return 0;
2212 }
2213
2214 static void
2215 bond_ethdev_rx_queue_release(void *queue)
2216 {
2217         if (queue == NULL)
2218                 return;
2219
2220         rte_free(queue);
2221 }
2222
2223 static void
2224 bond_ethdev_tx_queue_release(void *queue)
2225 {
2226         if (queue == NULL)
2227                 return;
2228
2229         rte_free(queue);
2230 }
2231
2232 static void
2233 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2234 {
2235         struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2236         struct bond_dev_private *internals;
2237
2238         /* Default value for polling slave found is true as we don't want to
2239          * disable the polling thread if we cannot get the lock */
2240         int i, polling_slave_found = 1;
2241
2242         if (cb_arg == NULL)
2243                 return;
2244
2245         bonded_ethdev = (struct rte_eth_dev *)cb_arg;
2246         internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
2247
2248         if (!bonded_ethdev->data->dev_started ||
2249                 !internals->link_status_polling_enabled)
2250                 return;
2251
2252         /* If device is currently being configured then don't check slaves link
2253          * status, wait until next period */
2254         if (rte_spinlock_trylock(&internals->lock)) {
2255                 if (internals->slave_count > 0)
2256                         polling_slave_found = 0;
2257
2258                 for (i = 0; i < internals->slave_count; i++) {
2259                         if (!internals->slaves[i].link_status_poll_enabled)
2260                                 continue;
2261
2262                         slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2263                         polling_slave_found = 1;
2264
2265                         /* Update slave link status */
2266                         (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2267                                         internals->slaves[i].link_status_wait_to_complete);
2268
2269                         /* if link status has changed since last checked then call lsc
2270                          * event callback */
2271                         if (slave_ethdev->data->dev_link.link_status !=
2272                                         internals->slaves[i].last_link_status) {
2273                                 internals->slaves[i].last_link_status =
2274                                                 slave_ethdev->data->dev_link.link_status;
2275
2276                                 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2277                                                 RTE_ETH_EVENT_INTR_LSC,
2278                                                 &bonded_ethdev->data->port_id,
2279                                                 NULL);
2280                         }
2281                 }
2282                 rte_spinlock_unlock(&internals->lock);
2283         }
2284
2285         if (polling_slave_found)
2286                 /* Set alarm to continue monitoring link status of slave ethdev's */
2287                 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2288                                 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2289 }
2290
2291 static int
2292 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2293 {
2294         void (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2295
2296         struct bond_dev_private *bond_ctx;
2297         struct rte_eth_link slave_link;
2298
2299         uint32_t idx;
2300
2301         bond_ctx = ethdev->data->dev_private;
2302
2303         ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2304
2305         if (ethdev->data->dev_started == 0 ||
2306                         bond_ctx->active_slave_count == 0) {
2307                 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2308                 return 0;
2309         }
2310
2311         ethdev->data->dev_link.link_status = ETH_LINK_UP;
2312
2313         if (wait_to_complete)
2314                 link_update = rte_eth_link_get;
2315         else
2316                 link_update = rte_eth_link_get_nowait;
2317
2318         switch (bond_ctx->mode) {
2319         case BONDING_MODE_BROADCAST:
2320                 /**
2321                  * Setting link speed to UINT32_MAX to ensure we pick up the
2322                  * value of the first active slave
2323                  */
2324                 ethdev->data->dev_link.link_speed = UINT32_MAX;
2325
2326                 /**
2327                  * link speed is minimum value of all the slaves link speed as
2328                  * packet loss will occur on this slave if transmission at rates
2329                  * greater than this are attempted
2330                  */
2331                 for (idx = 1; idx < bond_ctx->active_slave_count; idx++) {
2332                         link_update(bond_ctx->active_slaves[0], &slave_link);
2333
2334                         if (slave_link.link_speed <
2335                                         ethdev->data->dev_link.link_speed)
2336                                 ethdev->data->dev_link.link_speed =
2337                                                 slave_link.link_speed;
2338                 }
2339                 break;
2340         case BONDING_MODE_ACTIVE_BACKUP:
2341                 /* Current primary slave */
2342                 link_update(bond_ctx->current_primary_port, &slave_link);
2343
2344                 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2345                 break;
2346         case BONDING_MODE_8023AD:
2347                 ethdev->data->dev_link.link_autoneg =
2348                                 bond_ctx->mode4.slave_link.link_autoneg;
2349                 ethdev->data->dev_link.link_duplex =
2350                                 bond_ctx->mode4.slave_link.link_duplex;
2351                 /* fall through to update link speed */
2352         case BONDING_MODE_ROUND_ROBIN:
2353         case BONDING_MODE_BALANCE:
2354         case BONDING_MODE_TLB:
2355         case BONDING_MODE_ALB:
2356         default:
2357                 /**
2358                  * In theses mode the maximum theoretical link speed is the sum
2359                  * of all the slaves
2360                  */
2361                 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2362
2363                 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2364                         link_update(bond_ctx->active_slaves[idx], &slave_link);
2365
2366                         ethdev->data->dev_link.link_speed +=
2367                                         slave_link.link_speed;
2368                 }
2369         }
2370
2371
2372         return 0;
2373 }
2374
2375
2376 static int
2377 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2378 {
2379         struct bond_dev_private *internals = dev->data->dev_private;
2380         struct rte_eth_stats slave_stats;
2381         int i, j;
2382
2383         for (i = 0; i < internals->slave_count; i++) {
2384                 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2385
2386                 stats->ipackets += slave_stats.ipackets;
2387                 stats->opackets += slave_stats.opackets;
2388                 stats->ibytes += slave_stats.ibytes;
2389                 stats->obytes += slave_stats.obytes;
2390                 stats->imissed += slave_stats.imissed;
2391                 stats->ierrors += slave_stats.ierrors;
2392                 stats->oerrors += slave_stats.oerrors;
2393                 stats->rx_nombuf += slave_stats.rx_nombuf;
2394
2395                 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2396                         stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2397                         stats->q_opackets[j] += slave_stats.q_opackets[j];
2398                         stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2399                         stats->q_obytes[j] += slave_stats.q_obytes[j];
2400                         stats->q_errors[j] += slave_stats.q_errors[j];
2401                 }
2402
2403         }
2404
2405         return 0;
2406 }
2407
2408 static void
2409 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2410 {
2411         struct bond_dev_private *internals = dev->data->dev_private;
2412         int i;
2413
2414         for (i = 0; i < internals->slave_count; i++)
2415                 rte_eth_stats_reset(internals->slaves[i].port_id);
2416 }
2417
2418 static void
2419 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2420 {
2421         struct bond_dev_private *internals = eth_dev->data->dev_private;
2422         int i;
2423
2424         internals->promiscuous_en = 1;
2425
2426         switch (internals->mode) {
2427         /* Promiscuous mode is propagated to all slaves */
2428         case BONDING_MODE_ROUND_ROBIN:
2429         case BONDING_MODE_BALANCE:
2430         case BONDING_MODE_BROADCAST:
2431                 for (i = 0; i < internals->slave_count; i++)
2432                         rte_eth_promiscuous_enable(internals->slaves[i].port_id);
2433                 break;
2434         /* In mode4 promiscus mode is managed when slave is added/removed */
2435         case BONDING_MODE_8023AD:
2436                 break;
2437         /* Promiscuous mode is propagated only to primary slave */
2438         case BONDING_MODE_ACTIVE_BACKUP:
2439         case BONDING_MODE_TLB:
2440         case BONDING_MODE_ALB:
2441         default:
2442                 rte_eth_promiscuous_enable(internals->current_primary_port);
2443         }
2444 }
2445
2446 static void
2447 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2448 {
2449         struct bond_dev_private *internals = dev->data->dev_private;
2450         int i;
2451
2452         internals->promiscuous_en = 0;
2453
2454         switch (internals->mode) {
2455         /* Promiscuous mode is propagated to all slaves */
2456         case BONDING_MODE_ROUND_ROBIN:
2457         case BONDING_MODE_BALANCE:
2458         case BONDING_MODE_BROADCAST:
2459                 for (i = 0; i < internals->slave_count; i++)
2460                         rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2461                 break;
2462         /* In mode4 promiscus mode is set managed when slave is added/removed */
2463         case BONDING_MODE_8023AD:
2464                 break;
2465         /* Promiscuous mode is propagated only to primary slave */
2466         case BONDING_MODE_ACTIVE_BACKUP:
2467         case BONDING_MODE_TLB:
2468         case BONDING_MODE_ALB:
2469         default:
2470                 rte_eth_promiscuous_disable(internals->current_primary_port);
2471         }
2472 }
2473
2474 static void
2475 bond_ethdev_delayed_lsc_propagation(void *arg)
2476 {
2477         if (arg == NULL)
2478                 return;
2479
2480         _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2481                         RTE_ETH_EVENT_INTR_LSC, NULL, NULL);
2482 }
2483
2484 int
2485 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2486                 void *param, void *ret_param __rte_unused)
2487 {
2488         struct rte_eth_dev *bonded_eth_dev;
2489         struct bond_dev_private *internals;
2490         struct rte_eth_link link;
2491         int rc = -1;
2492
2493         int i, valid_slave = 0;
2494         uint8_t active_pos;
2495         uint8_t lsc_flag = 0;
2496
2497         if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2498                 return rc;
2499
2500         bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
2501
2502         if (check_for_bonded_ethdev(bonded_eth_dev))
2503                 return rc;
2504
2505         internals = bonded_eth_dev->data->dev_private;
2506
2507         /* If the device isn't started don't handle interrupts */
2508         if (!bonded_eth_dev->data->dev_started)
2509                 return rc;
2510
2511         /* verify that port_id is a valid slave of bonded port */
2512         for (i = 0; i < internals->slave_count; i++) {
2513                 if (internals->slaves[i].port_id == port_id) {
2514                         valid_slave = 1;
2515                         break;
2516                 }
2517         }
2518
2519         if (!valid_slave)
2520                 return rc;
2521
2522         /* Search for port in active port list */
2523         active_pos = find_slave_by_id(internals->active_slaves,
2524                         internals->active_slave_count, port_id);
2525
2526         rte_eth_link_get_nowait(port_id, &link);
2527         if (link.link_status) {
2528                 if (active_pos < internals->active_slave_count)
2529                         return rc;
2530
2531                 /* if no active slave ports then set this port to be primary port */
2532                 if (internals->active_slave_count < 1) {
2533                         /* If first active slave, then change link status */
2534                         bonded_eth_dev->data->dev_link.link_status = ETH_LINK_UP;
2535                         internals->current_primary_port = port_id;
2536                         lsc_flag = 1;
2537
2538                         mac_address_slaves_update(bonded_eth_dev);
2539                 }
2540
2541                 activate_slave(bonded_eth_dev, port_id);
2542
2543                 /* If user has defined the primary port then default to using it */
2544                 if (internals->user_defined_primary_port &&
2545                                 internals->primary_port == port_id)
2546                         bond_ethdev_primary_set(internals, port_id);
2547         } else {
2548                 if (active_pos == internals->active_slave_count)
2549                         return rc;
2550
2551                 /* Remove from active slave list */
2552                 deactivate_slave(bonded_eth_dev, port_id);
2553
2554                 if (internals->active_slave_count < 1)
2555                         lsc_flag = 1;
2556
2557                 /* Update primary id, take first active slave from list or if none
2558                  * available set to -1 */
2559                 if (port_id == internals->current_primary_port) {
2560                         if (internals->active_slave_count > 0)
2561                                 bond_ethdev_primary_set(internals,
2562                                                 internals->active_slaves[0]);
2563                         else
2564                                 internals->current_primary_port = internals->primary_port;
2565                 }
2566         }
2567
2568         /**
2569          * Update bonded device link properties after any change to active
2570          * slaves
2571          */
2572         bond_ethdev_link_update(bonded_eth_dev, 0);
2573
2574         if (lsc_flag) {
2575                 /* Cancel any possible outstanding interrupts if delays are enabled */
2576                 if (internals->link_up_delay_ms > 0 ||
2577                         internals->link_down_delay_ms > 0)
2578                         rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2579                                         bonded_eth_dev);
2580
2581                 if (bonded_eth_dev->data->dev_link.link_status) {
2582                         if (internals->link_up_delay_ms > 0)
2583                                 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2584                                                 bond_ethdev_delayed_lsc_propagation,
2585                                                 (void *)bonded_eth_dev);
2586                         else
2587                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2588                                                 RTE_ETH_EVENT_INTR_LSC,
2589                                                 NULL, NULL);
2590
2591                 } else {
2592                         if (internals->link_down_delay_ms > 0)
2593                                 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2594                                                 bond_ethdev_delayed_lsc_propagation,
2595                                                 (void *)bonded_eth_dev);
2596                         else
2597                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2598                                                 RTE_ETH_EVENT_INTR_LSC,
2599                                                 NULL, NULL);
2600                 }
2601         }
2602         return 0;
2603 }
2604
2605 static int
2606 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2607                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2608 {
2609         unsigned i, j;
2610         int result = 0;
2611         int slave_reta_size;
2612         unsigned reta_count;
2613         struct bond_dev_private *internals = dev->data->dev_private;
2614
2615         if (reta_size != internals->reta_size)
2616                 return -EINVAL;
2617
2618          /* Copy RETA table */
2619         reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2620
2621         for (i = 0; i < reta_count; i++) {
2622                 internals->reta_conf[i].mask = reta_conf[i].mask;
2623                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2624                         if ((reta_conf[i].mask >> j) & 0x01)
2625                                 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2626         }
2627
2628         /* Fill rest of array */
2629         for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2630                 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2631                                 sizeof(internals->reta_conf[0]) * reta_count);
2632
2633         /* Propagate RETA over slaves */
2634         for (i = 0; i < internals->slave_count; i++) {
2635                 slave_reta_size = internals->slaves[i].reta_size;
2636                 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2637                                 &internals->reta_conf[0], slave_reta_size);
2638                 if (result < 0)
2639                         return result;
2640         }
2641
2642         return 0;
2643 }
2644
2645 static int
2646 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2647                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2648 {
2649         int i, j;
2650         struct bond_dev_private *internals = dev->data->dev_private;
2651
2652         if (reta_size != internals->reta_size)
2653                 return -EINVAL;
2654
2655          /* Copy RETA table */
2656         for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2657                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2658                         if ((reta_conf[i].mask >> j) & 0x01)
2659                                 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2660
2661         return 0;
2662 }
2663
2664 static int
2665 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2666                 struct rte_eth_rss_conf *rss_conf)
2667 {
2668         int i, result = 0;
2669         struct bond_dev_private *internals = dev->data->dev_private;
2670         struct rte_eth_rss_conf bond_rss_conf;
2671
2672         memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2673
2674         bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2675
2676         if (bond_rss_conf.rss_hf != 0)
2677                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2678
2679         if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2680                         sizeof(internals->rss_key)) {
2681                 if (bond_rss_conf.rss_key_len == 0)
2682                         bond_rss_conf.rss_key_len = 40;
2683                 internals->rss_key_len = bond_rss_conf.rss_key_len;
2684                 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2685                                 internals->rss_key_len);
2686         }
2687
2688         for (i = 0; i < internals->slave_count; i++) {
2689                 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2690                                 &bond_rss_conf);
2691                 if (result < 0)
2692                         return result;
2693         }
2694
2695         return 0;
2696 }
2697
2698 static int
2699 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2700                 struct rte_eth_rss_conf *rss_conf)
2701 {
2702         struct bond_dev_private *internals = dev->data->dev_private;
2703
2704         rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2705         rss_conf->rss_key_len = internals->rss_key_len;
2706         if (rss_conf->rss_key)
2707                 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2708
2709         return 0;
2710 }
2711
2712 const struct eth_dev_ops default_dev_ops = {
2713         .dev_start            = bond_ethdev_start,
2714         .dev_stop             = bond_ethdev_stop,
2715         .dev_close            = bond_ethdev_close,
2716         .dev_configure        = bond_ethdev_configure,
2717         .dev_infos_get        = bond_ethdev_info,
2718         .vlan_filter_set      = bond_ethdev_vlan_filter_set,
2719         .rx_queue_setup       = bond_ethdev_rx_queue_setup,
2720         .tx_queue_setup       = bond_ethdev_tx_queue_setup,
2721         .rx_queue_release     = bond_ethdev_rx_queue_release,
2722         .tx_queue_release     = bond_ethdev_tx_queue_release,
2723         .link_update          = bond_ethdev_link_update,
2724         .stats_get            = bond_ethdev_stats_get,
2725         .stats_reset          = bond_ethdev_stats_reset,
2726         .promiscuous_enable   = bond_ethdev_promiscuous_enable,
2727         .promiscuous_disable  = bond_ethdev_promiscuous_disable,
2728         .reta_update          = bond_ethdev_rss_reta_update,
2729         .reta_query           = bond_ethdev_rss_reta_query,
2730         .rss_hash_update      = bond_ethdev_rss_hash_update,
2731         .rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get
2732 };
2733
2734 static int
2735 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
2736 {
2737         const char *name = rte_vdev_device_name(dev);
2738         uint8_t socket_id = dev->device.numa_node;
2739         struct bond_dev_private *internals = NULL;
2740         struct rte_eth_dev *eth_dev = NULL;
2741         uint32_t vlan_filter_bmp_size;
2742
2743         /* now do all data allocation - for eth_dev structure, dummy pci driver
2744          * and internal (private) data
2745          */
2746
2747         /* reserve an ethdev entry */
2748         eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
2749         if (eth_dev == NULL) {
2750                 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
2751                 goto err;
2752         }
2753
2754         internals = eth_dev->data->dev_private;
2755         eth_dev->data->nb_rx_queues = (uint16_t)1;
2756         eth_dev->data->nb_tx_queues = (uint16_t)1;
2757
2758         eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN, 0,
2759                         socket_id);
2760         if (eth_dev->data->mac_addrs == NULL) {
2761                 RTE_BOND_LOG(ERR, "Unable to malloc mac_addrs");
2762                 goto err;
2763         }
2764
2765         eth_dev->dev_ops = &default_dev_ops;
2766         eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC;
2767
2768         rte_spinlock_init(&internals->lock);
2769
2770         internals->port_id = eth_dev->data->port_id;
2771         internals->mode = BONDING_MODE_INVALID;
2772         internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
2773         internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
2774         internals->xmit_hash = xmit_l2_hash;
2775         internals->user_defined_mac = 0;
2776
2777         internals->link_status_polling_enabled = 0;
2778
2779         internals->link_status_polling_interval_ms =
2780                 DEFAULT_POLLING_INTERVAL_10_MS;
2781         internals->link_down_delay_ms = 0;
2782         internals->link_up_delay_ms = 0;
2783
2784         internals->slave_count = 0;
2785         internals->active_slave_count = 0;
2786         internals->rx_offload_capa = 0;
2787         internals->tx_offload_capa = 0;
2788         internals->candidate_max_rx_pktlen = 0;
2789         internals->max_rx_pktlen = 0;
2790
2791         /* Initially allow to choose any offload type */
2792         internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
2793
2794         memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
2795         memset(internals->slaves, 0, sizeof(internals->slaves));
2796
2797         /* Set mode 4 default configuration */
2798         bond_mode_8023ad_setup(eth_dev, NULL);
2799         if (bond_ethdev_mode_set(eth_dev, mode)) {
2800                 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode too %d",
2801                                  eth_dev->data->port_id, mode);
2802                 goto err;
2803         }
2804
2805         vlan_filter_bmp_size =
2806                 rte_bitmap_get_memory_footprint(ETHER_MAX_VLAN_ID + 1);
2807         internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
2808                                                    RTE_CACHE_LINE_SIZE);
2809         if (internals->vlan_filter_bmpmem == NULL) {
2810                 RTE_BOND_LOG(ERR,
2811                              "Failed to allocate vlan bitmap for bonded device %u\n",
2812                              eth_dev->data->port_id);
2813                 goto err;
2814         }
2815
2816         internals->vlan_filter_bmp = rte_bitmap_init(ETHER_MAX_VLAN_ID + 1,
2817                         internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
2818         if (internals->vlan_filter_bmp == NULL) {
2819                 RTE_BOND_LOG(ERR,
2820                              "Failed to init vlan bitmap for bonded device %u\n",
2821                              eth_dev->data->port_id);
2822                 rte_free(internals->vlan_filter_bmpmem);
2823                 goto err;
2824         }
2825
2826         return eth_dev->data->port_id;
2827
2828 err:
2829         rte_free(internals);
2830         if (eth_dev != NULL) {
2831                 rte_free(eth_dev->data->mac_addrs);
2832                 rte_eth_dev_release_port(eth_dev);
2833         }
2834         return -1;
2835 }
2836
2837 static int
2838 bond_probe(struct rte_vdev_device *dev)
2839 {
2840         const char *name;
2841         struct bond_dev_private *internals;
2842         struct rte_kvargs *kvlist;
2843         uint8_t bonding_mode, socket_id/*, agg_mode*/;
2844         int  arg_count, port_id;
2845         uint8_t agg_mode;
2846
2847         if (!dev)
2848                 return -EINVAL;
2849
2850         name = rte_vdev_device_name(dev);
2851         RTE_LOG(INFO, EAL, "Initializing pmd_bond for %s\n", name);
2852
2853         kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
2854                 pmd_bond_init_valid_arguments);
2855         if (kvlist == NULL)
2856                 return -1;
2857
2858         /* Parse link bonding mode */
2859         if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
2860                 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
2861                                 &bond_ethdev_parse_slave_mode_kvarg,
2862                                 &bonding_mode) != 0) {
2863                         RTE_LOG(ERR, EAL, "Invalid mode for bonded device %s\n",
2864                                         name);
2865                         goto parse_error;
2866                 }
2867         } else {
2868                 RTE_LOG(ERR, EAL, "Mode must be specified only once for bonded "
2869                                 "device %s\n", name);
2870                 goto parse_error;
2871         }
2872
2873         /* Parse socket id to create bonding device on */
2874         arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
2875         if (arg_count == 1) {
2876                 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
2877                                 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
2878                                 != 0) {
2879                         RTE_LOG(ERR, EAL, "Invalid socket Id specified for "
2880                                         "bonded device %s\n", name);
2881                         goto parse_error;
2882                 }
2883         } else if (arg_count > 1) {
2884                 RTE_LOG(ERR, EAL, "Socket Id can be specified only once for "
2885                                 "bonded device %s\n", name);
2886                 goto parse_error;
2887         } else {
2888                 socket_id = rte_socket_id();
2889         }
2890
2891         dev->device.numa_node = socket_id;
2892
2893         /* Create link bonding eth device */
2894         port_id = bond_alloc(dev, bonding_mode);
2895         if (port_id < 0) {
2896                 RTE_LOG(ERR, EAL, "Failed to create socket %s in mode %u on "
2897                                 "socket %u.\n", name, bonding_mode, socket_id);
2898                 goto parse_error;
2899         }
2900         internals = rte_eth_devices[port_id].data->dev_private;
2901         internals->kvlist = kvlist;
2902
2903
2904         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
2905                 if (rte_kvargs_process(kvlist,
2906                                 PMD_BOND_AGG_MODE_KVARG,
2907                                 &bond_ethdev_parse_slave_agg_mode_kvarg,
2908                                 &agg_mode) != 0) {
2909                         RTE_LOG(ERR, EAL,
2910                                         "Failed to parse agg selection mode for bonded device %s\n",
2911                                         name);
2912                         goto parse_error;
2913                 }
2914
2915                 if (internals->mode == BONDING_MODE_8023AD)
2916                         rte_eth_bond_8023ad_agg_selection_set(port_id,
2917                                         agg_mode);
2918         } else {
2919                 rte_eth_bond_8023ad_agg_selection_set(port_id, AGG_STABLE);
2920         }
2921
2922         RTE_LOG(INFO, EAL, "Create bonded device %s on port %d in mode %u on "
2923                         "socket %u.\n", name, port_id, bonding_mode, socket_id);
2924         return 0;
2925
2926 parse_error:
2927         rte_kvargs_free(kvlist);
2928
2929         return -1;
2930 }
2931
2932 static int
2933 bond_remove(struct rte_vdev_device *dev)
2934 {
2935         struct rte_eth_dev *eth_dev;
2936         struct bond_dev_private *internals;
2937         const char *name;
2938
2939         if (!dev)
2940                 return -EINVAL;
2941
2942         name = rte_vdev_device_name(dev);
2943         RTE_LOG(INFO, EAL, "Uninitializing pmd_bond for %s\n", name);
2944
2945         /* now free all data allocation - for eth_dev structure,
2946          * dummy pci driver and internal (private) data
2947          */
2948
2949         /* find an ethdev entry */
2950         eth_dev = rte_eth_dev_allocated(name);
2951         if (eth_dev == NULL)
2952                 return -ENODEV;
2953
2954         RTE_ASSERT(eth_dev->device == &dev->device);
2955
2956         internals = eth_dev->data->dev_private;
2957         if (internals->slave_count != 0)
2958                 return -EBUSY;
2959
2960         if (eth_dev->data->dev_started == 1) {
2961                 bond_ethdev_stop(eth_dev);
2962                 bond_ethdev_close(eth_dev);
2963         }
2964
2965         eth_dev->dev_ops = NULL;
2966         eth_dev->rx_pkt_burst = NULL;
2967         eth_dev->tx_pkt_burst = NULL;
2968
2969         internals = eth_dev->data->dev_private;
2970         rte_bitmap_free(internals->vlan_filter_bmp);
2971         rte_free(internals->vlan_filter_bmpmem);
2972         rte_free(eth_dev->data->dev_private);
2973         rte_free(eth_dev->data->mac_addrs);
2974
2975         rte_eth_dev_release_port(eth_dev);
2976
2977         return 0;
2978 }
2979
2980 /* this part will resolve the slave portids after all the other pdev and vdev
2981  * have been allocated */
2982 static int
2983 bond_ethdev_configure(struct rte_eth_dev *dev)
2984 {
2985         const char *name = dev->device->name;
2986         struct bond_dev_private *internals = dev->data->dev_private;
2987         struct rte_kvargs *kvlist = internals->kvlist;
2988         int arg_count;
2989         uint16_t port_id = dev - rte_eth_devices;
2990         uint8_t agg_mode;
2991
2992         static const uint8_t default_rss_key[40] = {
2993                 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
2994                 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
2995                 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
2996                 0xBE, 0xAC, 0x01, 0xFA
2997         };
2998
2999         unsigned i, j;
3000
3001         /* If RSS is enabled, fill table and key with default values */
3002         if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
3003                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = internals->rss_key;
3004                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = 0;
3005                 memcpy(internals->rss_key, default_rss_key, 40);
3006
3007                 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3008                         internals->reta_conf[i].mask = ~0LL;
3009                         for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3010                                 internals->reta_conf[i].reta[j] = j % dev->data->nb_rx_queues;
3011                 }
3012         }
3013
3014         /* set the max_rx_pktlen */
3015         internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3016
3017         /*
3018          * if no kvlist, it means that this bonded device has been created
3019          * through the bonding api.
3020          */
3021         if (!kvlist)
3022                 return 0;
3023
3024         /* Parse MAC address for bonded device */
3025         arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3026         if (arg_count == 1) {
3027                 struct ether_addr bond_mac;
3028
3029                 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3030                                 &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3031                         RTE_LOG(INFO, EAL, "Invalid mac address for bonded device %s\n",
3032                                         name);
3033                         return -1;
3034                 }
3035
3036                 /* Set MAC address */
3037                 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3038                         RTE_LOG(ERR, EAL,
3039                                         "Failed to set mac address on bonded device %s\n",
3040                                         name);
3041                         return -1;
3042                 }
3043         } else if (arg_count > 1) {
3044                 RTE_LOG(ERR, EAL,
3045                                 "MAC address can be specified only once for bonded device %s\n",
3046                                 name);
3047                 return -1;
3048         }
3049
3050         /* Parse/set balance mode transmit policy */
3051         arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3052         if (arg_count == 1) {
3053                 uint8_t xmit_policy;
3054
3055                 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3056                                 &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3057                                                 0) {
3058                         RTE_LOG(INFO, EAL,
3059                                         "Invalid xmit policy specified for bonded device %s\n",
3060                                         name);
3061                         return -1;
3062                 }
3063
3064                 /* Set balance mode transmit policy*/
3065                 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3066                         RTE_LOG(ERR, EAL,
3067                                         "Failed to set balance xmit policy on bonded device %s\n",
3068                                         name);
3069                         return -1;
3070                 }
3071         } else if (arg_count > 1) {
3072                 RTE_LOG(ERR, EAL,
3073                                 "Transmit policy can be specified only once for bonded device"
3074                                 " %s\n", name);
3075                 return -1;
3076         }
3077
3078         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3079                 if (rte_kvargs_process(kvlist,
3080                                 PMD_BOND_AGG_MODE_KVARG,
3081                                 &bond_ethdev_parse_slave_agg_mode_kvarg,
3082                                 &agg_mode) != 0) {
3083                         RTE_LOG(ERR, EAL,
3084                                         "Failed to parse agg selection mode for bonded device %s\n",
3085                                         name);
3086                 }
3087                 if (internals->mode == BONDING_MODE_8023AD)
3088                                 rte_eth_bond_8023ad_agg_selection_set(port_id,
3089                                                 agg_mode);
3090         }
3091
3092         /* Parse/add slave ports to bonded device */
3093         if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3094                 struct bond_ethdev_slave_ports slave_ports;
3095                 unsigned i;
3096
3097                 memset(&slave_ports, 0, sizeof(slave_ports));
3098
3099                 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3100                                 &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3101                         RTE_LOG(ERR, EAL,
3102                                         "Failed to parse slave ports for bonded device %s\n",
3103                                         name);
3104                         return -1;
3105                 }
3106
3107                 for (i = 0; i < slave_ports.slave_count; i++) {
3108                         if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3109                                 RTE_LOG(ERR, EAL,
3110                                                 "Failed to add port %d as slave to bonded device %s\n",
3111                                                 slave_ports.slaves[i], name);
3112                         }
3113                 }
3114
3115         } else {
3116                 RTE_LOG(INFO, EAL, "No slaves specified for bonded device %s\n", name);
3117                 return -1;
3118         }
3119
3120         /* Parse/set primary slave port id*/
3121         arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3122         if (arg_count == 1) {
3123                 uint16_t primary_slave_port_id;
3124
3125                 if (rte_kvargs_process(kvlist,
3126                                 PMD_BOND_PRIMARY_SLAVE_KVARG,
3127                                 &bond_ethdev_parse_primary_slave_port_id_kvarg,
3128                                 &primary_slave_port_id) < 0) {
3129                         RTE_LOG(INFO, EAL,
3130                                         "Invalid primary slave port id specified for bonded device"
3131                                         " %s\n", name);
3132                         return -1;
3133                 }
3134
3135                 /* Set balance mode transmit policy*/
3136                 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3137                                 != 0) {
3138                         RTE_LOG(ERR, EAL,
3139                                         "Failed to set primary slave port %d on bonded device %s\n",
3140                                         primary_slave_port_id, name);
3141                         return -1;
3142                 }
3143         } else if (arg_count > 1) {
3144                 RTE_LOG(INFO, EAL,
3145                                 "Primary slave can be specified only once for bonded device"
3146                                 " %s\n", name);
3147                 return -1;
3148         }
3149
3150         /* Parse link status monitor polling interval */
3151         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3152         if (arg_count == 1) {
3153                 uint32_t lsc_poll_interval_ms;
3154
3155                 if (rte_kvargs_process(kvlist,
3156                                 PMD_BOND_LSC_POLL_PERIOD_KVARG,
3157                                 &bond_ethdev_parse_time_ms_kvarg,
3158                                 &lsc_poll_interval_ms) < 0) {
3159                         RTE_LOG(INFO, EAL,
3160                                         "Invalid lsc polling interval value specified for bonded"
3161                                         " device %s\n", name);
3162                         return -1;
3163                 }
3164
3165                 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3166                                 != 0) {
3167                         RTE_LOG(ERR, EAL,
3168                                         "Failed to set lsc monitor polling interval (%u ms) on"
3169                                         " bonded device %s\n", lsc_poll_interval_ms, name);
3170                         return -1;
3171                 }
3172         } else if (arg_count > 1) {
3173                 RTE_LOG(INFO, EAL,
3174                                 "LSC polling interval can be specified only once for bonded"
3175                                 " device %s\n", name);
3176                 return -1;
3177         }
3178
3179         /* Parse link up interrupt propagation delay */
3180         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3181         if (arg_count == 1) {
3182                 uint32_t link_up_delay_ms;
3183
3184                 if (rte_kvargs_process(kvlist,
3185                                 PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3186                                 &bond_ethdev_parse_time_ms_kvarg,
3187                                 &link_up_delay_ms) < 0) {
3188                         RTE_LOG(INFO, EAL,
3189                                         "Invalid link up propagation delay value specified for"
3190                                         " bonded device %s\n", name);
3191                         return -1;
3192                 }
3193
3194                 /* Set balance mode transmit policy*/
3195                 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3196                                 != 0) {
3197                         RTE_LOG(ERR, EAL,
3198                                         "Failed to set link up propagation delay (%u ms) on bonded"
3199                                         " device %s\n", link_up_delay_ms, name);
3200                         return -1;
3201                 }
3202         } else if (arg_count > 1) {
3203                 RTE_LOG(INFO, EAL,
3204                                 "Link up propagation delay can be specified only once for"
3205                                 " bonded device %s\n", name);
3206                 return -1;
3207         }
3208
3209         /* Parse link down interrupt propagation delay */
3210         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3211         if (arg_count == 1) {
3212                 uint32_t link_down_delay_ms;
3213
3214                 if (rte_kvargs_process(kvlist,
3215                                 PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3216                                 &bond_ethdev_parse_time_ms_kvarg,
3217                                 &link_down_delay_ms) < 0) {
3218                         RTE_LOG(INFO, EAL,
3219                                         "Invalid link down propagation delay value specified for"
3220                                         " bonded device %s\n", name);
3221                         return -1;
3222                 }
3223
3224                 /* Set balance mode transmit policy*/
3225                 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3226                                 != 0) {
3227                         RTE_LOG(ERR, EAL,
3228                                         "Failed to set link down propagation delay (%u ms) on"
3229                                         " bonded device %s\n", link_down_delay_ms, name);
3230                         return -1;
3231                 }
3232         } else if (arg_count > 1) {
3233                 RTE_LOG(INFO, EAL,
3234                                 "Link down propagation delay can be specified only once for"
3235                                 " bonded device %s\n", name);
3236                 return -1;
3237         }
3238
3239         return 0;
3240 }
3241
3242 struct rte_vdev_driver pmd_bond_drv = {
3243         .probe = bond_probe,
3244         .remove = bond_remove,
3245 };
3246
3247 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3248 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3249
3250 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3251         "slave=<ifc> "
3252         "primary=<ifc> "
3253         "mode=[0-6] "
3254         "xmit_policy=[l2 | l23 | l34] "
3255         "agg_mode=[count | stable | bandwidth] "
3256         "socket_id=<int> "
3257         "mac=<mac addr> "
3258         "lsc_poll_period_ms=<int> "
3259         "up_delay=<int> "
3260         "down_delay=<int>");