New upstream version 17.11-rc3
[deb_dpdk.git] / drivers / net / bonding / rte_eth_bond_pmd.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 #include <stdlib.h>
34 #include <netinet/in.h>
35
36 #include <rte_mbuf.h>
37 #include <rte_malloc.h>
38 #include <rte_ethdev.h>
39 #include <rte_ethdev_vdev.h>
40 #include <rte_tcp.h>
41 #include <rte_udp.h>
42 #include <rte_ip.h>
43 #include <rte_ip_frag.h>
44 #include <rte_devargs.h>
45 #include <rte_kvargs.h>
46 #include <rte_bus_vdev.h>
47 #include <rte_alarm.h>
48 #include <rte_cycles.h>
49
50 #include "rte_eth_bond.h"
51 #include "rte_eth_bond_private.h"
52 #include "rte_eth_bond_8023ad_private.h"
53
54 #define REORDER_PERIOD_MS 10
55 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
56
57 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
58
59 /* Table for statistics in mode 5 TLB */
60 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
61
62 static inline size_t
63 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
64 {
65         size_t vlan_offset = 0;
66
67         if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
68                 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
69
70                 vlan_offset = sizeof(struct vlan_hdr);
71                 *proto = vlan_hdr->eth_proto;
72
73                 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
74                         vlan_hdr = vlan_hdr + 1;
75                         *proto = vlan_hdr->eth_proto;
76                         vlan_offset += sizeof(struct vlan_hdr);
77                 }
78         }
79         return vlan_offset;
80 }
81
82 static uint16_t
83 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
84 {
85         struct bond_dev_private *internals;
86
87         uint16_t num_rx_slave = 0;
88         uint16_t num_rx_total = 0;
89
90         int i;
91
92         /* Cast to structure, containing bonded device's port id and queue id */
93         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
94
95         internals = bd_rx_q->dev_private;
96
97
98         for (i = 0; i < internals->active_slave_count && nb_pkts; i++) {
99                 /* Offset of pointer to *bufs increases as packets are received
100                  * from other slaves */
101                 num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
102                                 bd_rx_q->queue_id, bufs + num_rx_total, nb_pkts);
103                 if (num_rx_slave) {
104                         num_rx_total += num_rx_slave;
105                         nb_pkts -= num_rx_slave;
106                 }
107         }
108
109         return num_rx_total;
110 }
111
112 static uint16_t
113 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
114                 uint16_t nb_pkts)
115 {
116         struct bond_dev_private *internals;
117
118         /* Cast to structure, containing bonded device's port id and queue id */
119         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
120
121         internals = bd_rx_q->dev_private;
122
123         return rte_eth_rx_burst(internals->current_primary_port,
124                         bd_rx_q->queue_id, bufs, nb_pkts);
125 }
126
127 static inline uint8_t
128 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
129 {
130         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
131
132         return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
133                 (ethertype == ether_type_slow_be &&
134                 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
135 }
136
137 /*****************************************************************************
138  * Flow director's setup for mode 4 optimization
139  */
140
141 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
142         .dst.addr_bytes = { 0 },
143         .src.addr_bytes = { 0 },
144         .type = RTE_BE16(ETHER_TYPE_SLOW),
145 };
146
147 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
148         .dst.addr_bytes = { 0 },
149         .src.addr_bytes = { 0 },
150         .type = 0xFFFF,
151 };
152
153 static struct rte_flow_item flow_item_8023ad[] = {
154         {
155                 .type = RTE_FLOW_ITEM_TYPE_ETH,
156                 .spec = &flow_item_eth_type_8023ad,
157                 .last = NULL,
158                 .mask = &flow_item_eth_mask_type_8023ad,
159         },
160         {
161                 .type = RTE_FLOW_ITEM_TYPE_END,
162                 .spec = NULL,
163                 .last = NULL,
164                 .mask = NULL,
165         }
166 };
167
168 const struct rte_flow_attr flow_attr_8023ad = {
169         .group = 0,
170         .priority = 0,
171         .ingress = 1,
172         .egress = 0,
173         .reserved = 0,
174 };
175
176 int
177 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
178                 uint16_t slave_port) {
179         struct rte_eth_dev_info slave_info;
180         struct rte_flow_error error;
181         struct bond_dev_private *internals = (struct bond_dev_private *)
182                         (bond_dev->data->dev_private);
183
184         const struct rte_flow_action_queue lacp_queue_conf = {
185                 .index = 0,
186         };
187
188         const struct rte_flow_action actions[] = {
189                 {
190                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
191                         .conf = &lacp_queue_conf
192                 },
193                 {
194                         .type = RTE_FLOW_ACTION_TYPE_END,
195                 }
196         };
197
198         int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
199                         flow_item_8023ad, actions, &error);
200         if (ret < 0) {
201                 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
202                                 __func__, error.message, slave_port,
203                                 internals->mode4.dedicated_queues.rx_qid);
204                 return -1;
205         }
206
207         rte_eth_dev_info_get(slave_port, &slave_info);
208         if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
209                         slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
210                 RTE_BOND_LOG(ERR,
211                         "%s: Slave %d capabilities doesn't allow to allocate additional queues",
212                         __func__, slave_port);
213                 return -1;
214         }
215
216         return 0;
217 }
218
219 int
220 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
221         struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
222         struct bond_dev_private *internals = (struct bond_dev_private *)
223                         (bond_dev->data->dev_private);
224         struct rte_eth_dev_info bond_info;
225         uint16_t idx;
226
227         /* Verify if all slaves in bonding supports flow director and */
228         if (internals->slave_count > 0) {
229                 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
230
231                 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
232                 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
233
234                 for (idx = 0; idx < internals->slave_count; idx++) {
235                         if (bond_ethdev_8023ad_flow_verify(bond_dev,
236                                         internals->slaves[idx].port_id) != 0)
237                                 return -1;
238                 }
239         }
240
241         return 0;
242 }
243
244 int
245 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
246
247         struct rte_flow_error error;
248         struct bond_dev_private *internals = (struct bond_dev_private *)
249                         (bond_dev->data->dev_private);
250
251         struct rte_flow_action_queue lacp_queue_conf = {
252                 .index = internals->mode4.dedicated_queues.rx_qid,
253         };
254
255         const struct rte_flow_action actions[] = {
256                 {
257                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
258                         .conf = &lacp_queue_conf
259                 },
260                 {
261                         .type = RTE_FLOW_ACTION_TYPE_END,
262                 }
263         };
264
265         internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
266                         &flow_attr_8023ad, flow_item_8023ad, actions, &error);
267         if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
268                 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
269                                 "(slave_port=%d queue_id=%d)",
270                                 error.message, slave_port,
271                                 internals->mode4.dedicated_queues.rx_qid);
272                 return -1;
273         }
274
275         return 0;
276 }
277
278 static uint16_t
279 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
280                 uint16_t nb_pkts)
281 {
282         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
283         struct bond_dev_private *internals = bd_rx_q->dev_private;
284         uint16_t num_rx_total = 0;      /* Total number of received packets */
285         uint16_t slaves[RTE_MAX_ETHPORTS];
286         uint16_t slave_count;
287
288         uint16_t i, idx;
289
290         /* Copy slave list to protect against slave up/down changes during tx
291          * bursting */
292         slave_count = internals->active_slave_count;
293         memcpy(slaves, internals->active_slaves,
294                         sizeof(internals->active_slaves[0]) * slave_count);
295
296         for (i = 0, idx = internals->active_slave;
297                         i < slave_count && num_rx_total < nb_pkts; i++, idx++) {
298                 idx = idx % slave_count;
299
300                 /* Read packets from this slave */
301                 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
302                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
303         }
304
305         internals->active_slave = idx;
306
307         return num_rx_total;
308 }
309
310 static uint16_t
311 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
312                 uint16_t nb_pkts)
313 {
314         struct bond_dev_private *internals;
315         struct bond_tx_queue *bd_tx_q;
316
317         uint16_t num_of_slaves;
318         uint16_t slaves[RTE_MAX_ETHPORTS];
319          /* positions in slaves, not ID */
320         uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
321         uint8_t distributing_count;
322
323         uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
324         uint16_t i, op_slave_idx;
325
326         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
327
328         /* Total amount of packets in slave_bufs */
329         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
330         /* Slow packets placed in each slave */
331
332         if (unlikely(nb_pkts == 0))
333                 return 0;
334
335         bd_tx_q = (struct bond_tx_queue *)queue;
336         internals = bd_tx_q->dev_private;
337
338         /* Copy slave list to protect against slave up/down changes during tx
339          * bursting */
340         num_of_slaves = internals->active_slave_count;
341         if (num_of_slaves < 1)
342                 return num_tx_total;
343
344         memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) *
345                         num_of_slaves);
346
347         distributing_count = 0;
348         for (i = 0; i < num_of_slaves; i++) {
349                 struct port *port = &mode_8023ad_ports[slaves[i]];
350                 if (ACTOR_STATE(port, DISTRIBUTING))
351                         distributing_offsets[distributing_count++] = i;
352         }
353
354         if (likely(distributing_count > 0)) {
355                 /* Populate slaves mbuf with the packets which are to be sent */
356                 for (i = 0; i < nb_pkts; i++) {
357                         /* Select output slave using hash based on xmit policy */
358                         op_slave_idx = internals->xmit_hash(bufs[i],
359                                         distributing_count);
360
361                         /* Populate slave mbuf arrays with mbufs for that slave.
362                          * Use only slaves that are currently distributing.
363                          */
364                         uint8_t slave_offset =
365                                         distributing_offsets[op_slave_idx];
366                         slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] =
367                                         bufs[i];
368                         slave_nb_pkts[slave_offset]++;
369                 }
370         }
371
372         /* Send packet burst on each slave device */
373         for (i = 0; i < num_of_slaves; i++) {
374                 if (slave_nb_pkts[i] == 0)
375                         continue;
376
377                 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
378                                 slave_bufs[i], slave_nb_pkts[i]);
379
380                 num_tx_total += num_tx_slave;
381                 num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
382
383                 /* If tx burst fails move packets to end of bufs */
384                 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
385                         uint16_t j = nb_pkts - num_tx_fail_total;
386                         for ( ; num_tx_slave < slave_nb_pkts[i]; j++,
387                                         num_tx_slave++)
388                                 bufs[j] = slave_bufs[i][num_tx_slave];
389                 }
390         }
391
392         return num_tx_total;
393 }
394
395
396 static uint16_t
397 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
398                 uint16_t nb_pkts)
399 {
400         /* Cast to structure, containing bonded device's port id and queue id */
401         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
402         struct bond_dev_private *internals = bd_rx_q->dev_private;
403         struct ether_addr bond_mac;
404
405         struct ether_hdr *hdr;
406
407         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
408         uint16_t num_rx_total = 0;      /* Total number of received packets */
409         uint16_t slaves[RTE_MAX_ETHPORTS];
410         uint16_t slave_count, idx;
411
412         uint8_t collecting;  /* current slave collecting status */
413         const uint8_t promisc = internals->promiscuous_en;
414         uint8_t i, j, k;
415         uint8_t subtype;
416
417         rte_eth_macaddr_get(internals->port_id, &bond_mac);
418         /* Copy slave list to protect against slave up/down changes during tx
419          * bursting */
420         slave_count = internals->active_slave_count;
421         memcpy(slaves, internals->active_slaves,
422                         sizeof(internals->active_slaves[0]) * slave_count);
423
424         idx = internals->active_slave;
425         if (idx >= slave_count) {
426                 internals->active_slave = 0;
427                 idx = 0;
428         }
429         for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
430                 j = num_rx_total;
431                 collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[idx]],
432                                          COLLECTING);
433
434                 /* Read packets from this slave */
435                 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
436                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
437
438                 for (k = j; k < 2 && k < num_rx_total; k++)
439                         rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
440
441                 /* Handle slow protocol packets. */
442                 while (j < num_rx_total) {
443
444                         /* If packet is not pure L2 and is known, skip it */
445                         if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) {
446                                 j++;
447                                 continue;
448                         }
449
450                         if (j + 3 < num_rx_total)
451                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
452
453                         hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
454                         subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
455
456                         /* Remove packet from array if it is slow packet or slave is not
457                          * in collecting state or bonding interface is not in promiscuous
458                          * mode and packet address does not match. */
459                         if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]) ||
460                                 !collecting || (!promisc &&
461                                         !is_multicast_ether_addr(&hdr->d_addr) &&
462                                         !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
463
464                                 if (hdr->ether_type == ether_type_slow_be) {
465                                         bond_mode_8023ad_handle_slow_pkt(
466                                             internals, slaves[idx], bufs[j]);
467                                 } else
468                                         rte_pktmbuf_free(bufs[j]);
469
470                                 /* Packet is managed by mode 4 or dropped, shift the array */
471                                 num_rx_total--;
472                                 if (j < num_rx_total) {
473                                         memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
474                                                 (num_rx_total - j));
475                                 }
476                         } else
477                                 j++;
478                 }
479                 if (unlikely(++idx == slave_count))
480                         idx = 0;
481         }
482
483         internals->active_slave = idx;
484         return num_rx_total;
485 }
486
487 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
488 uint32_t burstnumberRX;
489 uint32_t burstnumberTX;
490
491 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
492
493 static void
494 arp_op_name(uint16_t arp_op, char *buf)
495 {
496         switch (arp_op) {
497         case ARP_OP_REQUEST:
498                 snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
499                 return;
500         case ARP_OP_REPLY:
501                 snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
502                 return;
503         case ARP_OP_REVREQUEST:
504                 snprintf(buf, sizeof("Reverse ARP Request"), "%s",
505                                 "Reverse ARP Request");
506                 return;
507         case ARP_OP_REVREPLY:
508                 snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
509                                 "Reverse ARP Reply");
510                 return;
511         case ARP_OP_INVREQUEST:
512                 snprintf(buf, sizeof("Peer Identify Request"), "%s",
513                                 "Peer Identify Request");
514                 return;
515         case ARP_OP_INVREPLY:
516                 snprintf(buf, sizeof("Peer Identify Reply"), "%s",
517                                 "Peer Identify Reply");
518                 return;
519         default:
520                 break;
521         }
522         snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
523         return;
524 }
525 #endif
526 #define MaxIPv4String   16
527 static void
528 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
529 {
530         uint32_t ipv4_addr;
531
532         ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
533         snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
534                 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
535                 ipv4_addr & 0xFF);
536 }
537
538 #define MAX_CLIENTS_NUMBER      128
539 uint8_t active_clients;
540 struct client_stats_t {
541         uint16_t port;
542         uint32_t ipv4_addr;
543         uint32_t ipv4_rx_packets;
544         uint32_t ipv4_tx_packets;
545 };
546 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
547
548 static void
549 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
550 {
551         int i = 0;
552
553         for (; i < MAX_CLIENTS_NUMBER; i++)     {
554                 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port))      {
555                         /* Just update RX packets number for this client */
556                         if (TXorRXindicator == &burstnumberRX)
557                                 client_stats[i].ipv4_rx_packets++;
558                         else
559                                 client_stats[i].ipv4_tx_packets++;
560                         return;
561                 }
562         }
563         /* We have a new client. Insert him to the table, and increment stats */
564         if (TXorRXindicator == &burstnumberRX)
565                 client_stats[active_clients].ipv4_rx_packets++;
566         else
567                 client_stats[active_clients].ipv4_tx_packets++;
568         client_stats[active_clients].ipv4_addr = addr;
569         client_stats[active_clients].port = port;
570         active_clients++;
571
572 }
573
574 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
575 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber)     \
576                 RTE_LOG(DEBUG, PMD, \
577                 "%s " \
578                 "port:%d " \
579                 "SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
580                 "SrcIP:%s " \
581                 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
582                 "DstIP:%s " \
583                 "%s " \
584                 "%d\n", \
585                 info, \
586                 port, \
587                 eth_h->s_addr.addr_bytes[0], \
588                 eth_h->s_addr.addr_bytes[1], \
589                 eth_h->s_addr.addr_bytes[2], \
590                 eth_h->s_addr.addr_bytes[3], \
591                 eth_h->s_addr.addr_bytes[4], \
592                 eth_h->s_addr.addr_bytes[5], \
593                 src_ip, \
594                 eth_h->d_addr.addr_bytes[0], \
595                 eth_h->d_addr.addr_bytes[1], \
596                 eth_h->d_addr.addr_bytes[2], \
597                 eth_h->d_addr.addr_bytes[3], \
598                 eth_h->d_addr.addr_bytes[4], \
599                 eth_h->d_addr.addr_bytes[5], \
600                 dst_ip, \
601                 arp_op, \
602                 ++burstnumber)
603 #endif
604
605 static void
606 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
607                 uint16_t port, uint32_t __attribute__((unused)) *burstnumber)
608 {
609         struct ipv4_hdr *ipv4_h;
610 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
611         struct arp_hdr *arp_h;
612         char dst_ip[16];
613         char ArpOp[24];
614         char buf[16];
615 #endif
616         char src_ip[16];
617
618         uint16_t ether_type = eth_h->ether_type;
619         uint16_t offset = get_vlan_offset(eth_h, &ether_type);
620
621 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
622         snprintf(buf, 16, "%s", info);
623 #endif
624
625         if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
626                 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
627                 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
628 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
629                 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
630                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
631 #endif
632                 update_client_stats(ipv4_h->src_addr, port, burstnumber);
633         }
634 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
635         else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
636                 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
637                 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
638                 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
639                 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
640                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
641         }
642 #endif
643 }
644 #endif
645
646 static uint16_t
647 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
648 {
649         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
650         struct bond_dev_private *internals = bd_tx_q->dev_private;
651         struct ether_hdr *eth_h;
652         uint16_t ether_type, offset;
653         uint16_t nb_recv_pkts;
654         int i;
655
656         nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
657
658         for (i = 0; i < nb_recv_pkts; i++) {
659                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
660                 ether_type = eth_h->ether_type;
661                 offset = get_vlan_offset(eth_h, &ether_type);
662
663                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
664 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
665                         mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
666 #endif
667                         bond_mode_alb_arp_recv(eth_h, offset, internals);
668                 }
669 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
670                 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
671                         mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
672 #endif
673         }
674
675         return nb_recv_pkts;
676 }
677
678 static uint16_t
679 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
680                 uint16_t nb_pkts)
681 {
682         struct bond_dev_private *internals;
683         struct bond_tx_queue *bd_tx_q;
684
685         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
686         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
687
688         uint16_t num_of_slaves;
689         uint16_t slaves[RTE_MAX_ETHPORTS];
690
691         uint16_t num_tx_total = 0, num_tx_slave;
692
693         static int slave_idx = 0;
694         int i, cslave_idx = 0, tx_fail_total = 0;
695
696         bd_tx_q = (struct bond_tx_queue *)queue;
697         internals = bd_tx_q->dev_private;
698
699         /* Copy slave list to protect against slave up/down changes during tx
700          * bursting */
701         num_of_slaves = internals->active_slave_count;
702         memcpy(slaves, internals->active_slaves,
703                         sizeof(internals->active_slaves[0]) * num_of_slaves);
704
705         if (num_of_slaves < 1)
706                 return num_tx_total;
707
708         /* Populate slaves mbuf with which packets are to be sent on it  */
709         for (i = 0; i < nb_pkts; i++) {
710                 cslave_idx = (slave_idx + i) % num_of_slaves;
711                 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
712         }
713
714         /* increment current slave index so the next call to tx burst starts on the
715          * next slave */
716         slave_idx = ++cslave_idx;
717
718         /* Send packet burst on each slave device */
719         for (i = 0; i < num_of_slaves; i++) {
720                 if (slave_nb_pkts[i] > 0) {
721                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
722                                         slave_bufs[i], slave_nb_pkts[i]);
723
724                         /* if tx burst fails move packets to end of bufs */
725                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
726                                 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
727
728                                 tx_fail_total += tx_fail_slave;
729
730                                 memcpy(&bufs[nb_pkts - tx_fail_total],
731                                                 &slave_bufs[i][num_tx_slave],
732                                                 tx_fail_slave * sizeof(bufs[0]));
733                         }
734                         num_tx_total += num_tx_slave;
735                 }
736         }
737
738         return num_tx_total;
739 }
740
741 static uint16_t
742 bond_ethdev_tx_burst_active_backup(void *queue,
743                 struct rte_mbuf **bufs, uint16_t nb_pkts)
744 {
745         struct bond_dev_private *internals;
746         struct bond_tx_queue *bd_tx_q;
747
748         bd_tx_q = (struct bond_tx_queue *)queue;
749         internals = bd_tx_q->dev_private;
750
751         if (internals->active_slave_count < 1)
752                 return 0;
753
754         return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
755                         bufs, nb_pkts);
756 }
757
758 static inline uint16_t
759 ether_hash(struct ether_hdr *eth_hdr)
760 {
761         unaligned_uint16_t *word_src_addr =
762                 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
763         unaligned_uint16_t *word_dst_addr =
764                 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
765
766         return (word_src_addr[0] ^ word_dst_addr[0]) ^
767                         (word_src_addr[1] ^ word_dst_addr[1]) ^
768                         (word_src_addr[2] ^ word_dst_addr[2]);
769 }
770
771 static inline uint32_t
772 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
773 {
774         return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
775 }
776
777 static inline uint32_t
778 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
779 {
780         unaligned_uint32_t *word_src_addr =
781                 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
782         unaligned_uint32_t *word_dst_addr =
783                 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
784
785         return (word_src_addr[0] ^ word_dst_addr[0]) ^
786                         (word_src_addr[1] ^ word_dst_addr[1]) ^
787                         (word_src_addr[2] ^ word_dst_addr[2]) ^
788                         (word_src_addr[3] ^ word_dst_addr[3]);
789 }
790
791 uint16_t
792 xmit_l2_hash(const struct rte_mbuf *buf, uint8_t slave_count)
793 {
794         struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
795
796         uint32_t hash = ether_hash(eth_hdr);
797
798         return (hash ^= hash >> 8) % slave_count;
799 }
800
801 uint16_t
802 xmit_l23_hash(const struct rte_mbuf *buf, uint8_t slave_count)
803 {
804         struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
805         uint16_t proto = eth_hdr->ether_type;
806         size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
807         uint32_t hash, l3hash = 0;
808
809         hash = ether_hash(eth_hdr);
810
811         if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
812                 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
813                                 ((char *)(eth_hdr + 1) + vlan_offset);
814                 l3hash = ipv4_hash(ipv4_hdr);
815
816         } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
817                 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
818                                 ((char *)(eth_hdr + 1) + vlan_offset);
819                 l3hash = ipv6_hash(ipv6_hdr);
820         }
821
822         hash = hash ^ l3hash;
823         hash ^= hash >> 16;
824         hash ^= hash >> 8;
825
826         return hash % slave_count;
827 }
828
829 uint16_t
830 xmit_l34_hash(const struct rte_mbuf *buf, uint8_t slave_count)
831 {
832         struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
833         uint16_t proto = eth_hdr->ether_type;
834         size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
835
836         struct udp_hdr *udp_hdr = NULL;
837         struct tcp_hdr *tcp_hdr = NULL;
838         uint32_t hash, l3hash = 0, l4hash = 0;
839
840         if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
841                 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
842                                 ((char *)(eth_hdr + 1) + vlan_offset);
843                 size_t ip_hdr_offset;
844
845                 l3hash = ipv4_hash(ipv4_hdr);
846
847                 /* there is no L4 header in fragmented packet */
848                 if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr) == 0)) {
849                         ip_hdr_offset = (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) *
850                                         IPV4_IHL_MULTIPLIER;
851
852                         if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
853                                 tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr +
854                                                 ip_hdr_offset);
855                                 l4hash = HASH_L4_PORTS(tcp_hdr);
856                         } else if (ipv4_hdr->next_proto_id == IPPROTO_UDP) {
857                                 udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr +
858                                                 ip_hdr_offset);
859                                 l4hash = HASH_L4_PORTS(udp_hdr);
860                         }
861                 }
862         } else if  (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
863                 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
864                                 ((char *)(eth_hdr + 1) + vlan_offset);
865                 l3hash = ipv6_hash(ipv6_hdr);
866
867                 if (ipv6_hdr->proto == IPPROTO_TCP) {
868                         tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
869                         l4hash = HASH_L4_PORTS(tcp_hdr);
870                 } else if (ipv6_hdr->proto == IPPROTO_UDP) {
871                         udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
872                         l4hash = HASH_L4_PORTS(udp_hdr);
873                 }
874         }
875
876         hash = l3hash ^ l4hash;
877         hash ^= hash >> 16;
878         hash ^= hash >> 8;
879
880         return hash % slave_count;
881 }
882
883 struct bwg_slave {
884         uint64_t bwg_left_int;
885         uint64_t bwg_left_remainder;
886         uint8_t slave;
887 };
888
889 void
890 bond_tlb_activate_slave(struct bond_dev_private *internals) {
891         int i;
892
893         for (i = 0; i < internals->active_slave_count; i++) {
894                 tlb_last_obytets[internals->active_slaves[i]] = 0;
895         }
896 }
897
898 static int
899 bandwidth_cmp(const void *a, const void *b)
900 {
901         const struct bwg_slave *bwg_a = a;
902         const struct bwg_slave *bwg_b = b;
903         int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
904         int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
905                         (int64_t)bwg_a->bwg_left_remainder;
906         if (diff > 0)
907                 return 1;
908         else if (diff < 0)
909                 return -1;
910         else if (diff2 > 0)
911                 return 1;
912         else if (diff2 < 0)
913                 return -1;
914         else
915                 return 0;
916 }
917
918 static void
919 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
920                 struct bwg_slave *bwg_slave)
921 {
922         struct rte_eth_link link_status;
923
924         rte_eth_link_get_nowait(port_id, &link_status);
925         uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
926         if (link_bwg == 0)
927                 return;
928         link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
929         bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
930         bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
931 }
932
933 static void
934 bond_ethdev_update_tlb_slave_cb(void *arg)
935 {
936         struct bond_dev_private *internals = arg;
937         struct rte_eth_stats slave_stats;
938         struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
939         uint8_t slave_count;
940         uint64_t tx_bytes;
941
942         uint8_t update_stats = 0;
943         uint8_t i, slave_id;
944
945         internals->slave_update_idx++;
946
947
948         if (internals->slave_update_idx >= REORDER_PERIOD_MS)
949                 update_stats = 1;
950
951         for (i = 0; i < internals->active_slave_count; i++) {
952                 slave_id = internals->active_slaves[i];
953                 rte_eth_stats_get(slave_id, &slave_stats);
954                 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
955                 bandwidth_left(slave_id, tx_bytes,
956                                 internals->slave_update_idx, &bwg_array[i]);
957                 bwg_array[i].slave = slave_id;
958
959                 if (update_stats) {
960                         tlb_last_obytets[slave_id] = slave_stats.obytes;
961                 }
962         }
963
964         if (update_stats == 1)
965                 internals->slave_update_idx = 0;
966
967         slave_count = i;
968         qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
969         for (i = 0; i < slave_count; i++)
970                 internals->tlb_slaves_order[i] = bwg_array[i].slave;
971
972         rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
973                         (struct bond_dev_private *)internals);
974 }
975
976 static uint16_t
977 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
978 {
979         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
980         struct bond_dev_private *internals = bd_tx_q->dev_private;
981
982         struct rte_eth_dev *primary_port =
983                         &rte_eth_devices[internals->primary_port];
984         uint16_t num_tx_total = 0;
985         uint16_t i, j;
986
987         uint16_t num_of_slaves = internals->active_slave_count;
988         uint16_t slaves[RTE_MAX_ETHPORTS];
989
990         struct ether_hdr *ether_hdr;
991         struct ether_addr primary_slave_addr;
992         struct ether_addr active_slave_addr;
993
994         if (num_of_slaves < 1)
995                 return num_tx_total;
996
997         memcpy(slaves, internals->tlb_slaves_order,
998                                 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
999
1000
1001         ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
1002
1003         if (nb_pkts > 3) {
1004                 for (i = 0; i < 3; i++)
1005                         rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
1006         }
1007
1008         for (i = 0; i < num_of_slaves; i++) {
1009                 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
1010                 for (j = num_tx_total; j < nb_pkts; j++) {
1011                         if (j + 3 < nb_pkts)
1012                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
1013
1014                         ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
1015                         if (is_same_ether_addr(&ether_hdr->s_addr, &primary_slave_addr))
1016                                 ether_addr_copy(&active_slave_addr, &ether_hdr->s_addr);
1017 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1018                                         mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
1019 #endif
1020                 }
1021
1022                 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1023                                 bufs + num_tx_total, nb_pkts - num_tx_total);
1024
1025                 if (num_tx_total == nb_pkts)
1026                         break;
1027         }
1028
1029         return num_tx_total;
1030 }
1031
1032 void
1033 bond_tlb_disable(struct bond_dev_private *internals)
1034 {
1035         rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
1036 }
1037
1038 void
1039 bond_tlb_enable(struct bond_dev_private *internals)
1040 {
1041         bond_ethdev_update_tlb_slave_cb(internals);
1042 }
1043
1044 static uint16_t
1045 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1046 {
1047         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1048         struct bond_dev_private *internals = bd_tx_q->dev_private;
1049
1050         struct ether_hdr *eth_h;
1051         uint16_t ether_type, offset;
1052
1053         struct client_data *client_info;
1054
1055         /*
1056          * We create transmit buffers for every slave and one additional to send
1057          * through tlb. In worst case every packet will be send on one port.
1058          */
1059         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
1060         uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
1061
1062         /*
1063          * We create separate transmit buffers for update packets as they won't
1064          * be counted in num_tx_total.
1065          */
1066         struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1067         uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1068
1069         struct rte_mbuf *upd_pkt;
1070         size_t pkt_size;
1071
1072         uint16_t num_send, num_not_send = 0;
1073         uint16_t num_tx_total = 0;
1074         uint16_t slave_idx;
1075
1076         int i, j;
1077
1078         /* Search tx buffer for ARP packets and forward them to alb */
1079         for (i = 0; i < nb_pkts; i++) {
1080                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
1081                 ether_type = eth_h->ether_type;
1082                 offset = get_vlan_offset(eth_h, &ether_type);
1083
1084                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
1085                         slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1086
1087                         /* Change src mac in eth header */
1088                         rte_eth_macaddr_get(slave_idx, &eth_h->s_addr);
1089
1090                         /* Add packet to slave tx buffer */
1091                         slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1092                         slave_bufs_pkts[slave_idx]++;
1093                 } else {
1094                         /* If packet is not ARP, send it with TLB policy */
1095                         slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1096                                         bufs[i];
1097                         slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1098                 }
1099         }
1100
1101         /* Update connected client ARP tables */
1102         if (internals->mode6.ntt) {
1103                 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1104                         client_info = &internals->mode6.client_table[i];
1105
1106                         if (client_info->in_use) {
1107                                 /* Allocate new packet to send ARP update on current slave */
1108                                 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1109                                 if (upd_pkt == NULL) {
1110                                         RTE_LOG(ERR, PMD, "Failed to allocate ARP packet from pool\n");
1111                                         continue;
1112                                 }
1113                                 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
1114                                                 + client_info->vlan_count * sizeof(struct vlan_hdr);
1115                                 upd_pkt->data_len = pkt_size;
1116                                 upd_pkt->pkt_len = pkt_size;
1117
1118                                 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1119                                                 internals);
1120
1121                                 /* Add packet to update tx buffer */
1122                                 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1123                                 update_bufs_pkts[slave_idx]++;
1124                         }
1125                 }
1126                 internals->mode6.ntt = 0;
1127         }
1128
1129         /* Send ARP packets on proper slaves */
1130         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1131                 if (slave_bufs_pkts[i] > 0) {
1132                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1133                                         slave_bufs[i], slave_bufs_pkts[i]);
1134                         for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1135                                 bufs[nb_pkts - 1 - num_not_send - j] =
1136                                                 slave_bufs[i][nb_pkts - 1 - j];
1137                         }
1138
1139                         num_tx_total += num_send;
1140                         num_not_send += slave_bufs_pkts[i] - num_send;
1141
1142 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1143         /* Print TX stats including update packets */
1144                         for (j = 0; j < slave_bufs_pkts[i]; j++) {
1145                                 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
1146                                 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1147                         }
1148 #endif
1149                 }
1150         }
1151
1152         /* Send update packets on proper slaves */
1153         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1154                 if (update_bufs_pkts[i] > 0) {
1155                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1156                                         update_bufs_pkts[i]);
1157                         for (j = num_send; j < update_bufs_pkts[i]; j++) {
1158                                 rte_pktmbuf_free(update_bufs[i][j]);
1159                         }
1160 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1161                         for (j = 0; j < update_bufs_pkts[i]; j++) {
1162                                 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
1163                                 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1164                         }
1165 #endif
1166                 }
1167         }
1168
1169         /* Send non-ARP packets using tlb policy */
1170         if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1171                 num_send = bond_ethdev_tx_burst_tlb(queue,
1172                                 slave_bufs[RTE_MAX_ETHPORTS],
1173                                 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1174
1175                 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1176                         bufs[nb_pkts - 1 - num_not_send - j] =
1177                                         slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1178                 }
1179
1180                 num_tx_total += num_send;
1181         }
1182
1183         return num_tx_total;
1184 }
1185
1186 static uint16_t
1187 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1188                 uint16_t nb_pkts)
1189 {
1190         struct bond_dev_private *internals;
1191         struct bond_tx_queue *bd_tx_q;
1192
1193         uint16_t num_of_slaves;
1194         uint16_t slaves[RTE_MAX_ETHPORTS];
1195
1196         uint16_t num_tx_total = 0, num_tx_slave = 0, tx_fail_total = 0;
1197
1198         int i, op_slave_id;
1199
1200         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
1201         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
1202
1203         bd_tx_q = (struct bond_tx_queue *)queue;
1204         internals = bd_tx_q->dev_private;
1205
1206         /* Copy slave list to protect against slave up/down changes during tx
1207          * bursting */
1208         num_of_slaves = internals->active_slave_count;
1209         memcpy(slaves, internals->active_slaves,
1210                         sizeof(internals->active_slaves[0]) * num_of_slaves);
1211
1212         if (num_of_slaves < 1)
1213                 return num_tx_total;
1214
1215         /* Populate slaves mbuf with the packets which are to be sent on it  */
1216         for (i = 0; i < nb_pkts; i++) {
1217                 /* Select output slave using hash based on xmit policy */
1218                 op_slave_id = internals->xmit_hash(bufs[i], num_of_slaves);
1219
1220                 /* Populate slave mbuf arrays with mbufs for that slave */
1221                 slave_bufs[op_slave_id][slave_nb_pkts[op_slave_id]++] = bufs[i];
1222         }
1223
1224         /* Send packet burst on each slave device */
1225         for (i = 0; i < num_of_slaves; i++) {
1226                 if (slave_nb_pkts[i] > 0) {
1227                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1228                                         slave_bufs[i], slave_nb_pkts[i]);
1229
1230                         /* if tx burst fails move packets to end of bufs */
1231                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
1232                                 int slave_tx_fail_count = slave_nb_pkts[i] - num_tx_slave;
1233
1234                                 tx_fail_total += slave_tx_fail_count;
1235                                 memcpy(&bufs[nb_pkts - tx_fail_total],
1236                                                 &slave_bufs[i][num_tx_slave],
1237                                                 slave_tx_fail_count * sizeof(bufs[0]));
1238                         }
1239
1240                         num_tx_total += num_tx_slave;
1241                 }
1242         }
1243
1244         return num_tx_total;
1245 }
1246
1247 static uint16_t
1248 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1249                 uint16_t nb_pkts)
1250 {
1251         struct bond_dev_private *internals;
1252         struct bond_tx_queue *bd_tx_q;
1253
1254         uint16_t num_of_slaves;
1255         uint16_t slaves[RTE_MAX_ETHPORTS];
1256          /* positions in slaves, not ID */
1257         uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
1258         uint8_t distributing_count;
1259
1260         uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
1261         uint16_t i, j, op_slave_idx;
1262         const uint16_t buffs_size = nb_pkts + BOND_MODE_8023AX_SLAVE_TX_PKTS + 1;
1263
1264         /* Allocate additional packets in case 8023AD mode. */
1265         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][buffs_size];
1266         void *slow_pkts[BOND_MODE_8023AX_SLAVE_TX_PKTS] = { NULL };
1267
1268         /* Total amount of packets in slave_bufs */
1269         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
1270         /* Slow packets placed in each slave */
1271         uint8_t slave_slow_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
1272
1273         bd_tx_q = (struct bond_tx_queue *)queue;
1274         internals = bd_tx_q->dev_private;
1275
1276         /* Copy slave list to protect against slave up/down changes during tx
1277          * bursting */
1278         num_of_slaves = internals->active_slave_count;
1279         if (num_of_slaves < 1)
1280                 return num_tx_total;
1281
1282         memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) * num_of_slaves);
1283
1284         distributing_count = 0;
1285         for (i = 0; i < num_of_slaves; i++) {
1286                 struct port *port = &mode_8023ad_ports[slaves[i]];
1287
1288                 slave_slow_nb_pkts[i] = rte_ring_dequeue_burst(port->tx_ring,
1289                                 slow_pkts, BOND_MODE_8023AX_SLAVE_TX_PKTS,
1290                                 NULL);
1291                 slave_nb_pkts[i] = slave_slow_nb_pkts[i];
1292
1293                 for (j = 0; j < slave_slow_nb_pkts[i]; j++)
1294                         slave_bufs[i][j] = slow_pkts[j];
1295
1296                 if (ACTOR_STATE(port, DISTRIBUTING))
1297                         distributing_offsets[distributing_count++] = i;
1298         }
1299
1300         if (likely(distributing_count > 0)) {
1301                 /* Populate slaves mbuf with the packets which are to be sent on it */
1302                 for (i = 0; i < nb_pkts; i++) {
1303                         /* Select output slave using hash based on xmit policy */
1304                         op_slave_idx = internals->xmit_hash(bufs[i], distributing_count);
1305
1306                         /* Populate slave mbuf arrays with mbufs for that slave. Use only
1307                          * slaves that are currently distributing. */
1308                         uint8_t slave_offset = distributing_offsets[op_slave_idx];
1309                         slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] = bufs[i];
1310                         slave_nb_pkts[slave_offset]++;
1311                 }
1312         }
1313
1314         /* Send packet burst on each slave device */
1315         for (i = 0; i < num_of_slaves; i++) {
1316                 if (slave_nb_pkts[i] == 0)
1317                         continue;
1318
1319                 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1320                                 slave_bufs[i], slave_nb_pkts[i]);
1321
1322                 /* If tx burst fails drop slow packets */
1323                 for ( ; num_tx_slave < slave_slow_nb_pkts[i]; num_tx_slave++)
1324                         rte_pktmbuf_free(slave_bufs[i][num_tx_slave]);
1325
1326                 num_tx_total += num_tx_slave - slave_slow_nb_pkts[i];
1327                 num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
1328
1329                 /* If tx burst fails move packets to end of bufs */
1330                 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
1331                         uint16_t j = nb_pkts - num_tx_fail_total;
1332                         for ( ; num_tx_slave < slave_nb_pkts[i]; j++, num_tx_slave++)
1333                                 bufs[j] = slave_bufs[i][num_tx_slave];
1334                 }
1335         }
1336
1337         return num_tx_total;
1338 }
1339
1340 static uint16_t
1341 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1342                 uint16_t nb_pkts)
1343 {
1344         struct bond_dev_private *internals;
1345         struct bond_tx_queue *bd_tx_q;
1346
1347         uint8_t tx_failed_flag = 0, num_of_slaves;
1348         uint16_t slaves[RTE_MAX_ETHPORTS];
1349
1350         uint16_t max_nb_of_tx_pkts = 0;
1351
1352         int slave_tx_total[RTE_MAX_ETHPORTS];
1353         int i, most_successful_tx_slave = -1;
1354
1355         bd_tx_q = (struct bond_tx_queue *)queue;
1356         internals = bd_tx_q->dev_private;
1357
1358         /* Copy slave list to protect against slave up/down changes during tx
1359          * bursting */
1360         num_of_slaves = internals->active_slave_count;
1361         memcpy(slaves, internals->active_slaves,
1362                         sizeof(internals->active_slaves[0]) * num_of_slaves);
1363
1364         if (num_of_slaves < 1)
1365                 return 0;
1366
1367         /* Increment reference count on mbufs */
1368         for (i = 0; i < nb_pkts; i++)
1369                 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1370
1371         /* Transmit burst on each active slave */
1372         for (i = 0; i < num_of_slaves; i++) {
1373                 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1374                                         bufs, nb_pkts);
1375
1376                 if (unlikely(slave_tx_total[i] < nb_pkts))
1377                         tx_failed_flag = 1;
1378
1379                 /* record the value and slave index for the slave which transmits the
1380                  * maximum number of packets */
1381                 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1382                         max_nb_of_tx_pkts = slave_tx_total[i];
1383                         most_successful_tx_slave = i;
1384                 }
1385         }
1386
1387         /* if slaves fail to transmit packets from burst, the calling application
1388          * is not expected to know about multiple references to packets so we must
1389          * handle failures of all packets except those of the most successful slave
1390          */
1391         if (unlikely(tx_failed_flag))
1392                 for (i = 0; i < num_of_slaves; i++)
1393                         if (i != most_successful_tx_slave)
1394                                 while (slave_tx_total[i] < nb_pkts)
1395                                         rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1396
1397         return max_nb_of_tx_pkts;
1398 }
1399
1400 void
1401 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1402 {
1403         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1404
1405         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1406                 /**
1407                  * If in mode 4 then save the link properties of the first
1408                  * slave, all subsequent slaves must match these properties
1409                  */
1410                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1411
1412                 bond_link->link_autoneg = slave_link->link_autoneg;
1413                 bond_link->link_duplex = slave_link->link_duplex;
1414                 bond_link->link_speed = slave_link->link_speed;
1415         } else {
1416                 /**
1417                  * In any other mode the link properties are set to default
1418                  * values of AUTONEG/DUPLEX
1419                  */
1420                 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1421                 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1422         }
1423 }
1424
1425 int
1426 link_properties_valid(struct rte_eth_dev *ethdev,
1427                 struct rte_eth_link *slave_link)
1428 {
1429         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1430
1431         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1432                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1433
1434                 if (bond_link->link_duplex != slave_link->link_duplex ||
1435                         bond_link->link_autoneg != slave_link->link_autoneg ||
1436                         bond_link->link_speed != slave_link->link_speed)
1437                         return -1;
1438         }
1439
1440         return 0;
1441 }
1442
1443 int
1444 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1445 {
1446         struct ether_addr *mac_addr;
1447
1448         if (eth_dev == NULL) {
1449                 RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__);
1450                 return -1;
1451         }
1452
1453         if (dst_mac_addr == NULL) {
1454                 RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__);
1455                 return -1;
1456         }
1457
1458         mac_addr = eth_dev->data->mac_addrs;
1459
1460         ether_addr_copy(mac_addr, dst_mac_addr);
1461         return 0;
1462 }
1463
1464 int
1465 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1466 {
1467         struct ether_addr *mac_addr;
1468
1469         if (eth_dev == NULL) {
1470                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1471                 return -1;
1472         }
1473
1474         if (new_mac_addr == NULL) {
1475                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1476                 return -1;
1477         }
1478
1479         mac_addr = eth_dev->data->mac_addrs;
1480
1481         /* If new MAC is different to current MAC then update */
1482         if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1483                 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1484
1485         return 0;
1486 }
1487
1488 int
1489 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1490 {
1491         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1492         int i;
1493
1494         /* Update slave devices MAC addresses */
1495         if (internals->slave_count < 1)
1496                 return -1;
1497
1498         switch (internals->mode) {
1499         case BONDING_MODE_ROUND_ROBIN:
1500         case BONDING_MODE_BALANCE:
1501         case BONDING_MODE_BROADCAST:
1502                 for (i = 0; i < internals->slave_count; i++) {
1503                         if (mac_address_set(&rte_eth_devices[internals->slaves[i].port_id],
1504                                         bonded_eth_dev->data->mac_addrs)) {
1505                                 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1506                                                 internals->slaves[i].port_id);
1507                                 return -1;
1508                         }
1509                 }
1510                 break;
1511         case BONDING_MODE_8023AD:
1512                 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1513                 break;
1514         case BONDING_MODE_ACTIVE_BACKUP:
1515         case BONDING_MODE_TLB:
1516         case BONDING_MODE_ALB:
1517         default:
1518                 for (i = 0; i < internals->slave_count; i++) {
1519                         if (internals->slaves[i].port_id ==
1520                                         internals->current_primary_port) {
1521                                 if (mac_address_set(&rte_eth_devices[internals->primary_port],
1522                                                 bonded_eth_dev->data->mac_addrs)) {
1523                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1524                                                         internals->current_primary_port);
1525                                         return -1;
1526                                 }
1527                         } else {
1528                                 if (mac_address_set(
1529                                                 &rte_eth_devices[internals->slaves[i].port_id],
1530                                                 &internals->slaves[i].persisted_mac_addr)) {
1531                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1532                                                         internals->slaves[i].port_id);
1533                                         return -1;
1534                                 }
1535                         }
1536                 }
1537         }
1538
1539         return 0;
1540 }
1541
1542 int
1543 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1544 {
1545         struct bond_dev_private *internals;
1546
1547         internals = eth_dev->data->dev_private;
1548
1549         switch (mode) {
1550         case BONDING_MODE_ROUND_ROBIN:
1551                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1552                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1553                 break;
1554         case BONDING_MODE_ACTIVE_BACKUP:
1555                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1556                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1557                 break;
1558         case BONDING_MODE_BALANCE:
1559                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1560                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1561                 break;
1562         case BONDING_MODE_BROADCAST:
1563                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1564                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1565                 break;
1566         case BONDING_MODE_8023AD:
1567                 if (bond_mode_8023ad_enable(eth_dev) != 0)
1568                         return -1;
1569
1570                 if (internals->mode4.dedicated_queues.enabled == 0) {
1571                         eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1572                         eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1573                         RTE_LOG(WARNING, PMD,
1574                                 "Using mode 4, it is necessary to do TX burst "
1575                                 "and RX burst at least every 100ms.\n");
1576                 } else {
1577                         /* Use flow director's optimization */
1578                         eth_dev->rx_pkt_burst =
1579                                         bond_ethdev_rx_burst_8023ad_fast_queue;
1580                         eth_dev->tx_pkt_burst =
1581                                         bond_ethdev_tx_burst_8023ad_fast_queue;
1582                 }
1583                 break;
1584         case BONDING_MODE_TLB:
1585                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1586                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1587                 break;
1588         case BONDING_MODE_ALB:
1589                 if (bond_mode_alb_enable(eth_dev) != 0)
1590                         return -1;
1591
1592                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1593                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1594                 break;
1595         default:
1596                 return -1;
1597         }
1598
1599         internals->mode = mode;
1600
1601         return 0;
1602 }
1603
1604
1605 static int
1606 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1607                 struct rte_eth_dev *slave_eth_dev)
1608 {
1609         int errval = 0;
1610         struct bond_dev_private *internals = (struct bond_dev_private *)
1611                 bonded_eth_dev->data->dev_private;
1612         struct port *port = &mode_8023ad_ports[slave_eth_dev->data->port_id];
1613
1614         if (port->slow_pool == NULL) {
1615                 char mem_name[256];
1616                 int slave_id = slave_eth_dev->data->port_id;
1617
1618                 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1619                                 slave_id);
1620                 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1621                         250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1622                         slave_eth_dev->data->numa_node);
1623
1624                 /* Any memory allocation failure in initialization is critical because
1625                  * resources can't be free, so reinitialization is impossible. */
1626                 if (port->slow_pool == NULL) {
1627                         rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1628                                 slave_id, mem_name, rte_strerror(rte_errno));
1629                 }
1630         }
1631
1632         if (internals->mode4.dedicated_queues.enabled == 1) {
1633                 /* Configure slow Rx queue */
1634
1635                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1636                                 internals->mode4.dedicated_queues.rx_qid, 128,
1637                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1638                                 NULL, port->slow_pool);
1639                 if (errval != 0) {
1640                         RTE_BOND_LOG(ERR,
1641                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1642                                         slave_eth_dev->data->port_id,
1643                                         internals->mode4.dedicated_queues.rx_qid,
1644                                         errval);
1645                         return errval;
1646                 }
1647
1648                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1649                                 internals->mode4.dedicated_queues.tx_qid, 512,
1650                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1651                                 NULL);
1652                 if (errval != 0) {
1653                         RTE_BOND_LOG(ERR,
1654                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1655                                 slave_eth_dev->data->port_id,
1656                                 internals->mode4.dedicated_queues.tx_qid,
1657                                 errval);
1658                         return errval;
1659                 }
1660         }
1661         return 0;
1662 }
1663
1664 int
1665 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1666                 struct rte_eth_dev *slave_eth_dev)
1667 {
1668         struct bond_rx_queue *bd_rx_q;
1669         struct bond_tx_queue *bd_tx_q;
1670         uint16_t nb_rx_queues;
1671         uint16_t nb_tx_queues;
1672
1673         int errval;
1674         uint16_t q_id;
1675         struct rte_flow_error flow_error;
1676
1677         struct bond_dev_private *internals = (struct bond_dev_private *)
1678                 bonded_eth_dev->data->dev_private;
1679
1680         /* Stop slave */
1681         rte_eth_dev_stop(slave_eth_dev->data->port_id);
1682
1683         /* Enable interrupts on slave device if supported */
1684         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1685                 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1686
1687         /* If RSS is enabled for bonding, try to enable it for slaves  */
1688         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1689                 if (bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len
1690                                 != 0) {
1691                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1692                                         bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
1693                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1694                                         bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
1695                 } else {
1696                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1697                 }
1698
1699                 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1700                                 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1701                 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1702                                 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1703         }
1704
1705         slave_eth_dev->data->dev_conf.rxmode.hw_vlan_filter =
1706                         bonded_eth_dev->data->dev_conf.rxmode.hw_vlan_filter;
1707
1708         nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1709         nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1710
1711         if (internals->mode == BONDING_MODE_8023AD) {
1712                 if (internals->mode4.dedicated_queues.enabled == 1) {
1713                         nb_rx_queues++;
1714                         nb_tx_queues++;
1715                 }
1716         }
1717
1718         /* Configure device */
1719         errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1720                         nb_rx_queues, nb_tx_queues,
1721                         &(slave_eth_dev->data->dev_conf));
1722         if (errval != 0) {
1723                 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u , err (%d)",
1724                                 slave_eth_dev->data->port_id, errval);
1725                 return errval;
1726         }
1727
1728         /* Setup Rx Queues */
1729         for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1730                 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1731
1732                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1733                                 bd_rx_q->nb_rx_desc,
1734                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1735                                 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1736                 if (errval != 0) {
1737                         RTE_BOND_LOG(ERR,
1738                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1739                                         slave_eth_dev->data->port_id, q_id, errval);
1740                         return errval;
1741                 }
1742         }
1743
1744         /* Setup Tx Queues */
1745         for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1746                 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1747
1748                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1749                                 bd_tx_q->nb_tx_desc,
1750                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1751                                 &bd_tx_q->tx_conf);
1752                 if (errval != 0) {
1753                         RTE_BOND_LOG(ERR,
1754                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1755                                 slave_eth_dev->data->port_id, q_id, errval);
1756                         return errval;
1757                 }
1758         }
1759
1760         if (internals->mode == BONDING_MODE_8023AD &&
1761                         internals->mode4.dedicated_queues.enabled == 1) {
1762                 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1763                                 != 0)
1764                         return errval;
1765
1766                 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1767                                 slave_eth_dev->data->port_id) != 0) {
1768                         RTE_BOND_LOG(ERR,
1769                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1770                                 slave_eth_dev->data->port_id, q_id, errval);
1771                         return -1;
1772                 }
1773
1774                 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1775                         rte_flow_destroy(slave_eth_dev->data->port_id,
1776                                         internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1777                                         &flow_error);
1778
1779                 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1780                                 slave_eth_dev->data->port_id);
1781         }
1782
1783         /* Start device */
1784         errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1785         if (errval != 0) {
1786                 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1787                                 slave_eth_dev->data->port_id, errval);
1788                 return -1;
1789         }
1790
1791         /* If RSS is enabled for bonding, synchronize RETA */
1792         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1793                 int i;
1794                 struct bond_dev_private *internals;
1795
1796                 internals = bonded_eth_dev->data->dev_private;
1797
1798                 for (i = 0; i < internals->slave_count; i++) {
1799                         if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1800                                 errval = rte_eth_dev_rss_reta_update(
1801                                                 slave_eth_dev->data->port_id,
1802                                                 &internals->reta_conf[0],
1803                                                 internals->slaves[i].reta_size);
1804                                 if (errval != 0) {
1805                                         RTE_LOG(WARNING, PMD,
1806                                                         "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1807                                                         " RSS Configuration for bonding may be inconsistent.\n",
1808                                                         slave_eth_dev->data->port_id, errval);
1809                                 }
1810                                 break;
1811                         }
1812                 }
1813         }
1814
1815         /* If lsc interrupt is set, check initial slave's link status */
1816         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1817                 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1818                 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1819                         RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1820                         NULL);
1821         }
1822
1823         return 0;
1824 }
1825
1826 void
1827 slave_remove(struct bond_dev_private *internals,
1828                 struct rte_eth_dev *slave_eth_dev)
1829 {
1830         uint8_t i;
1831
1832         for (i = 0; i < internals->slave_count; i++)
1833                 if (internals->slaves[i].port_id ==
1834                                 slave_eth_dev->data->port_id)
1835                         break;
1836
1837         if (i < (internals->slave_count - 1))
1838                 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1839                                 sizeof(internals->slaves[0]) *
1840                                 (internals->slave_count - i - 1));
1841
1842         internals->slave_count--;
1843
1844         /* force reconfiguration of slave interfaces */
1845         _rte_eth_dev_reset(slave_eth_dev);
1846 }
1847
1848 static void
1849 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1850
1851 void
1852 slave_add(struct bond_dev_private *internals,
1853                 struct rte_eth_dev *slave_eth_dev)
1854 {
1855         struct bond_slave_details *slave_details =
1856                         &internals->slaves[internals->slave_count];
1857
1858         slave_details->port_id = slave_eth_dev->data->port_id;
1859         slave_details->last_link_status = 0;
1860
1861         /* Mark slave devices that don't support interrupts so we can
1862          * compensate when we start the bond
1863          */
1864         if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1865                 slave_details->link_status_poll_enabled = 1;
1866         }
1867
1868         slave_details->link_status_wait_to_complete = 0;
1869         /* clean tlb_last_obytes when adding port for bonding device */
1870         memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1871                         sizeof(struct ether_addr));
1872 }
1873
1874 void
1875 bond_ethdev_primary_set(struct bond_dev_private *internals,
1876                 uint16_t slave_port_id)
1877 {
1878         int i;
1879
1880         if (internals->active_slave_count < 1)
1881                 internals->current_primary_port = slave_port_id;
1882         else
1883                 /* Search bonded device slave ports for new proposed primary port */
1884                 for (i = 0; i < internals->active_slave_count; i++) {
1885                         if (internals->active_slaves[i] == slave_port_id)
1886                                 internals->current_primary_port = slave_port_id;
1887                 }
1888 }
1889
1890 static void
1891 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
1892
1893 static int
1894 bond_ethdev_start(struct rte_eth_dev *eth_dev)
1895 {
1896         struct bond_dev_private *internals;
1897         int i;
1898
1899         /* slave eth dev will be started by bonded device */
1900         if (check_for_bonded_ethdev(eth_dev)) {
1901                 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
1902                                 eth_dev->data->port_id);
1903                 return -1;
1904         }
1905
1906         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
1907         eth_dev->data->dev_started = 1;
1908
1909         internals = eth_dev->data->dev_private;
1910
1911         if (internals->slave_count == 0) {
1912                 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
1913                 return -1;
1914         }
1915
1916         if (internals->user_defined_mac == 0) {
1917                 struct ether_addr *new_mac_addr = NULL;
1918
1919                 for (i = 0; i < internals->slave_count; i++)
1920                         if (internals->slaves[i].port_id == internals->primary_port)
1921                                 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
1922
1923                 if (new_mac_addr == NULL)
1924                         return -1;
1925
1926                 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
1927                         RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
1928                                         eth_dev->data->port_id);
1929                         return -1;
1930                 }
1931         }
1932
1933         /* Update all slave devices MACs*/
1934         if (mac_address_slaves_update(eth_dev) != 0)
1935                 return -1;
1936
1937         /* If bonded device is configure in promiscuous mode then re-apply config */
1938         if (internals->promiscuous_en)
1939                 bond_ethdev_promiscuous_enable(eth_dev);
1940
1941         if (internals->mode == BONDING_MODE_8023AD) {
1942                 if (internals->mode4.dedicated_queues.enabled == 1) {
1943                         internals->mode4.dedicated_queues.rx_qid =
1944                                         eth_dev->data->nb_rx_queues;
1945                         internals->mode4.dedicated_queues.tx_qid =
1946                                         eth_dev->data->nb_tx_queues;
1947                 }
1948         }
1949
1950
1951         /* Reconfigure each slave device if starting bonded device */
1952         for (i = 0; i < internals->slave_count; i++) {
1953                 struct rte_eth_dev *slave_ethdev =
1954                                 &(rte_eth_devices[internals->slaves[i].port_id]);
1955                 if (slave_configure(eth_dev, slave_ethdev) != 0) {
1956                         RTE_BOND_LOG(ERR,
1957                                 "bonded port (%d) failed to reconfigure slave device (%d)",
1958                                 eth_dev->data->port_id,
1959                                 internals->slaves[i].port_id);
1960                         return -1;
1961                 }
1962                 /* We will need to poll for link status if any slave doesn't
1963                  * support interrupts
1964                  */
1965                 if (internals->slaves[i].link_status_poll_enabled)
1966                         internals->link_status_polling_enabled = 1;
1967         }
1968         /* start polling if needed */
1969         if (internals->link_status_polling_enabled) {
1970                 rte_eal_alarm_set(
1971                         internals->link_status_polling_interval_ms * 1000,
1972                         bond_ethdev_slave_link_status_change_monitor,
1973                         (void *)&rte_eth_devices[internals->port_id]);
1974         }
1975
1976         if (internals->user_defined_primary_port)
1977                 bond_ethdev_primary_set(internals, internals->primary_port);
1978
1979         if (internals->mode == BONDING_MODE_8023AD)
1980                 bond_mode_8023ad_start(eth_dev);
1981
1982         if (internals->mode == BONDING_MODE_TLB ||
1983                         internals->mode == BONDING_MODE_ALB)
1984                 bond_tlb_enable(internals);
1985
1986         return 0;
1987 }
1988
1989 static void
1990 bond_ethdev_free_queues(struct rte_eth_dev *dev)
1991 {
1992         uint8_t i;
1993
1994         if (dev->data->rx_queues != NULL) {
1995                 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1996                         rte_free(dev->data->rx_queues[i]);
1997                         dev->data->rx_queues[i] = NULL;
1998                 }
1999                 dev->data->nb_rx_queues = 0;
2000         }
2001
2002         if (dev->data->tx_queues != NULL) {
2003                 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2004                         rte_free(dev->data->tx_queues[i]);
2005                         dev->data->tx_queues[i] = NULL;
2006                 }
2007                 dev->data->nb_tx_queues = 0;
2008         }
2009 }
2010
2011 void
2012 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2013 {
2014         struct bond_dev_private *internals = eth_dev->data->dev_private;
2015         uint8_t i;
2016
2017         if (internals->mode == BONDING_MODE_8023AD) {
2018                 struct port *port;
2019                 void *pkt = NULL;
2020
2021                 bond_mode_8023ad_stop(eth_dev);
2022
2023                 /* Discard all messages to/from mode 4 state machines */
2024                 for (i = 0; i < internals->active_slave_count; i++) {
2025                         port = &mode_8023ad_ports[internals->active_slaves[i]];
2026
2027                         RTE_ASSERT(port->rx_ring != NULL);
2028                         while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2029                                 rte_pktmbuf_free(pkt);
2030
2031                         RTE_ASSERT(port->tx_ring != NULL);
2032                         while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2033                                 rte_pktmbuf_free(pkt);
2034                 }
2035         }
2036
2037         if (internals->mode == BONDING_MODE_TLB ||
2038                         internals->mode == BONDING_MODE_ALB) {
2039                 bond_tlb_disable(internals);
2040                 for (i = 0; i < internals->active_slave_count; i++)
2041                         tlb_last_obytets[internals->active_slaves[i]] = 0;
2042         }
2043
2044         internals->active_slave_count = 0;
2045         internals->link_status_polling_enabled = 0;
2046         for (i = 0; i < internals->slave_count; i++)
2047                 internals->slaves[i].last_link_status = 0;
2048
2049         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2050         eth_dev->data->dev_started = 0;
2051 }
2052
2053 void
2054 bond_ethdev_close(struct rte_eth_dev *dev)
2055 {
2056         struct bond_dev_private *internals = dev->data->dev_private;
2057         uint8_t bond_port_id = internals->port_id;
2058         int skipped = 0;
2059
2060         RTE_LOG(INFO, EAL, "Closing bonded device %s\n", dev->device->name);
2061         while (internals->slave_count != skipped) {
2062                 uint16_t port_id = internals->slaves[skipped].port_id;
2063
2064                 rte_eth_dev_stop(port_id);
2065
2066                 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2067                         RTE_LOG(ERR, EAL,
2068                                 "Failed to remove port %d from bonded device "
2069                                 "%s\n", port_id, dev->device->name);
2070                         skipped++;
2071                 }
2072         }
2073         bond_ethdev_free_queues(dev);
2074         rte_bitmap_reset(internals->vlan_filter_bmp);
2075 }
2076
2077 /* forward declaration */
2078 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2079
2080 static void
2081 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2082 {
2083         struct bond_dev_private *internals = dev->data->dev_private;
2084
2085         uint16_t max_nb_rx_queues = UINT16_MAX;
2086         uint16_t max_nb_tx_queues = UINT16_MAX;
2087
2088         dev_info->max_mac_addrs = 1;
2089
2090         dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2091                         internals->candidate_max_rx_pktlen :
2092                         ETHER_MAX_JUMBO_FRAME_LEN;
2093
2094         /* Max number of tx/rx queues that the bonded device can support is the
2095          * minimum values of the bonded slaves, as all slaves must be capable
2096          * of supporting the same number of tx/rx queues.
2097          */
2098         if (internals->slave_count > 0) {
2099                 struct rte_eth_dev_info slave_info;
2100                 uint8_t idx;
2101
2102                 for (idx = 0; idx < internals->slave_count; idx++) {
2103                         rte_eth_dev_info_get(internals->slaves[idx].port_id,
2104                                         &slave_info);
2105
2106                         if (slave_info.max_rx_queues < max_nb_rx_queues)
2107                                 max_nb_rx_queues = slave_info.max_rx_queues;
2108
2109                         if (slave_info.max_tx_queues < max_nb_tx_queues)
2110                                 max_nb_tx_queues = slave_info.max_tx_queues;
2111                 }
2112         }
2113
2114         dev_info->max_rx_queues = max_nb_rx_queues;
2115         dev_info->max_tx_queues = max_nb_tx_queues;
2116
2117         /**
2118          * If dedicated hw queues enabled for link bonding device in LACP mode
2119          * then we need to reduce the maximum number of data path queues by 1.
2120          */
2121         if (internals->mode == BONDING_MODE_8023AD &&
2122                 internals->mode4.dedicated_queues.enabled == 1) {
2123                 dev_info->max_rx_queues--;
2124                 dev_info->max_tx_queues--;
2125         }
2126
2127         dev_info->min_rx_bufsize = 0;
2128
2129         dev_info->rx_offload_capa = internals->rx_offload_capa;
2130         dev_info->tx_offload_capa = internals->tx_offload_capa;
2131         dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2132
2133         dev_info->reta_size = internals->reta_size;
2134 }
2135
2136 static int
2137 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2138 {
2139         int res;
2140         uint16_t i;
2141         struct bond_dev_private *internals = dev->data->dev_private;
2142
2143         /* don't do this while a slave is being added */
2144         rte_spinlock_lock(&internals->lock);
2145
2146         if (on)
2147                 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2148         else
2149                 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2150
2151         for (i = 0; i < internals->slave_count; i++) {
2152                 uint16_t port_id = internals->slaves[i].port_id;
2153
2154                 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2155                 if (res == ENOTSUP)
2156                         RTE_LOG(WARNING, PMD,
2157                                 "Setting VLAN filter on slave port %u not supported.\n",
2158                                 port_id);
2159         }
2160
2161         rte_spinlock_unlock(&internals->lock);
2162         return 0;
2163 }
2164
2165 static int
2166 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2167                 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2168                 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2169 {
2170         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2171                         rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2172                                         0, dev->data->numa_node);
2173         if (bd_rx_q == NULL)
2174                 return -1;
2175
2176         bd_rx_q->queue_id = rx_queue_id;
2177         bd_rx_q->dev_private = dev->data->dev_private;
2178
2179         bd_rx_q->nb_rx_desc = nb_rx_desc;
2180
2181         memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2182         bd_rx_q->mb_pool = mb_pool;
2183
2184         dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2185
2186         return 0;
2187 }
2188
2189 static int
2190 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2191                 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2192                 const struct rte_eth_txconf *tx_conf)
2193 {
2194         struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
2195                         rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2196                                         0, dev->data->numa_node);
2197
2198         if (bd_tx_q == NULL)
2199                 return -1;
2200
2201         bd_tx_q->queue_id = tx_queue_id;
2202         bd_tx_q->dev_private = dev->data->dev_private;
2203
2204         bd_tx_q->nb_tx_desc = nb_tx_desc;
2205         memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2206
2207         dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2208
2209         return 0;
2210 }
2211
2212 static void
2213 bond_ethdev_rx_queue_release(void *queue)
2214 {
2215         if (queue == NULL)
2216                 return;
2217
2218         rte_free(queue);
2219 }
2220
2221 static void
2222 bond_ethdev_tx_queue_release(void *queue)
2223 {
2224         if (queue == NULL)
2225                 return;
2226
2227         rte_free(queue);
2228 }
2229
2230 static void
2231 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2232 {
2233         struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2234         struct bond_dev_private *internals;
2235
2236         /* Default value for polling slave found is true as we don't want to
2237          * disable the polling thread if we cannot get the lock */
2238         int i, polling_slave_found = 1;
2239
2240         if (cb_arg == NULL)
2241                 return;
2242
2243         bonded_ethdev = (struct rte_eth_dev *)cb_arg;
2244         internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
2245
2246         if (!bonded_ethdev->data->dev_started ||
2247                 !internals->link_status_polling_enabled)
2248                 return;
2249
2250         /* If device is currently being configured then don't check slaves link
2251          * status, wait until next period */
2252         if (rte_spinlock_trylock(&internals->lock)) {
2253                 if (internals->slave_count > 0)
2254                         polling_slave_found = 0;
2255
2256                 for (i = 0; i < internals->slave_count; i++) {
2257                         if (!internals->slaves[i].link_status_poll_enabled)
2258                                 continue;
2259
2260                         slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2261                         polling_slave_found = 1;
2262
2263                         /* Update slave link status */
2264                         (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2265                                         internals->slaves[i].link_status_wait_to_complete);
2266
2267                         /* if link status has changed since last checked then call lsc
2268                          * event callback */
2269                         if (slave_ethdev->data->dev_link.link_status !=
2270                                         internals->slaves[i].last_link_status) {
2271                                 internals->slaves[i].last_link_status =
2272                                                 slave_ethdev->data->dev_link.link_status;
2273
2274                                 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2275                                                 RTE_ETH_EVENT_INTR_LSC,
2276                                                 &bonded_ethdev->data->port_id,
2277                                                 NULL);
2278                         }
2279                 }
2280                 rte_spinlock_unlock(&internals->lock);
2281         }
2282
2283         if (polling_slave_found)
2284                 /* Set alarm to continue monitoring link status of slave ethdev's */
2285                 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2286                                 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2287 }
2288
2289 static int
2290 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2291 {
2292         void (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2293
2294         struct bond_dev_private *bond_ctx;
2295         struct rte_eth_link slave_link;
2296
2297         uint32_t idx;
2298
2299         bond_ctx = ethdev->data->dev_private;
2300
2301         ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2302
2303         if (ethdev->data->dev_started == 0 ||
2304                         bond_ctx->active_slave_count == 0) {
2305                 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2306                 return 0;
2307         }
2308
2309         ethdev->data->dev_link.link_status = ETH_LINK_UP;
2310
2311         if (wait_to_complete)
2312                 link_update = rte_eth_link_get;
2313         else
2314                 link_update = rte_eth_link_get_nowait;
2315
2316         switch (bond_ctx->mode) {
2317         case BONDING_MODE_BROADCAST:
2318                 /**
2319                  * Setting link speed to UINT32_MAX to ensure we pick up the
2320                  * value of the first active slave
2321                  */
2322                 ethdev->data->dev_link.link_speed = UINT32_MAX;
2323
2324                 /**
2325                  * link speed is minimum value of all the slaves link speed as
2326                  * packet loss will occur on this slave if transmission at rates
2327                  * greater than this are attempted
2328                  */
2329                 for (idx = 1; idx < bond_ctx->active_slave_count; idx++) {
2330                         link_update(bond_ctx->active_slaves[0], &slave_link);
2331
2332                         if (slave_link.link_speed <
2333                                         ethdev->data->dev_link.link_speed)
2334                                 ethdev->data->dev_link.link_speed =
2335                                                 slave_link.link_speed;
2336                 }
2337                 break;
2338         case BONDING_MODE_ACTIVE_BACKUP:
2339                 /* Current primary slave */
2340                 link_update(bond_ctx->current_primary_port, &slave_link);
2341
2342                 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2343                 break;
2344         case BONDING_MODE_8023AD:
2345                 ethdev->data->dev_link.link_autoneg =
2346                                 bond_ctx->mode4.slave_link.link_autoneg;
2347                 ethdev->data->dev_link.link_duplex =
2348                                 bond_ctx->mode4.slave_link.link_duplex;
2349                 /* fall through to update link speed */
2350         case BONDING_MODE_ROUND_ROBIN:
2351         case BONDING_MODE_BALANCE:
2352         case BONDING_MODE_TLB:
2353         case BONDING_MODE_ALB:
2354         default:
2355                 /**
2356                  * In theses mode the maximum theoretical link speed is the sum
2357                  * of all the slaves
2358                  */
2359                 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2360
2361                 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2362                         link_update(bond_ctx->active_slaves[idx], &slave_link);
2363
2364                         ethdev->data->dev_link.link_speed +=
2365                                         slave_link.link_speed;
2366                 }
2367         }
2368
2369
2370         return 0;
2371 }
2372
2373
2374 static int
2375 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2376 {
2377         struct bond_dev_private *internals = dev->data->dev_private;
2378         struct rte_eth_stats slave_stats;
2379         int i, j;
2380
2381         for (i = 0; i < internals->slave_count; i++) {
2382                 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2383
2384                 stats->ipackets += slave_stats.ipackets;
2385                 stats->opackets += slave_stats.opackets;
2386                 stats->ibytes += slave_stats.ibytes;
2387                 stats->obytes += slave_stats.obytes;
2388                 stats->imissed += slave_stats.imissed;
2389                 stats->ierrors += slave_stats.ierrors;
2390                 stats->oerrors += slave_stats.oerrors;
2391                 stats->rx_nombuf += slave_stats.rx_nombuf;
2392
2393                 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2394                         stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2395                         stats->q_opackets[j] += slave_stats.q_opackets[j];
2396                         stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2397                         stats->q_obytes[j] += slave_stats.q_obytes[j];
2398                         stats->q_errors[j] += slave_stats.q_errors[j];
2399                 }
2400
2401         }
2402
2403         return 0;
2404 }
2405
2406 static void
2407 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2408 {
2409         struct bond_dev_private *internals = dev->data->dev_private;
2410         int i;
2411
2412         for (i = 0; i < internals->slave_count; i++)
2413                 rte_eth_stats_reset(internals->slaves[i].port_id);
2414 }
2415
2416 static void
2417 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2418 {
2419         struct bond_dev_private *internals = eth_dev->data->dev_private;
2420         int i;
2421
2422         internals->promiscuous_en = 1;
2423
2424         switch (internals->mode) {
2425         /* Promiscuous mode is propagated to all slaves */
2426         case BONDING_MODE_ROUND_ROBIN:
2427         case BONDING_MODE_BALANCE:
2428         case BONDING_MODE_BROADCAST:
2429                 for (i = 0; i < internals->slave_count; i++)
2430                         rte_eth_promiscuous_enable(internals->slaves[i].port_id);
2431                 break;
2432         /* In mode4 promiscus mode is managed when slave is added/removed */
2433         case BONDING_MODE_8023AD:
2434                 break;
2435         /* Promiscuous mode is propagated only to primary slave */
2436         case BONDING_MODE_ACTIVE_BACKUP:
2437         case BONDING_MODE_TLB:
2438         case BONDING_MODE_ALB:
2439         default:
2440                 rte_eth_promiscuous_enable(internals->current_primary_port);
2441         }
2442 }
2443
2444 static void
2445 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2446 {
2447         struct bond_dev_private *internals = dev->data->dev_private;
2448         int i;
2449
2450         internals->promiscuous_en = 0;
2451
2452         switch (internals->mode) {
2453         /* Promiscuous mode is propagated to all slaves */
2454         case BONDING_MODE_ROUND_ROBIN:
2455         case BONDING_MODE_BALANCE:
2456         case BONDING_MODE_BROADCAST:
2457                 for (i = 0; i < internals->slave_count; i++)
2458                         rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2459                 break;
2460         /* In mode4 promiscus mode is set managed when slave is added/removed */
2461         case BONDING_MODE_8023AD:
2462                 break;
2463         /* Promiscuous mode is propagated only to primary slave */
2464         case BONDING_MODE_ACTIVE_BACKUP:
2465         case BONDING_MODE_TLB:
2466         case BONDING_MODE_ALB:
2467         default:
2468                 rte_eth_promiscuous_disable(internals->current_primary_port);
2469         }
2470 }
2471
2472 static void
2473 bond_ethdev_delayed_lsc_propagation(void *arg)
2474 {
2475         if (arg == NULL)
2476                 return;
2477
2478         _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2479                         RTE_ETH_EVENT_INTR_LSC, NULL, NULL);
2480 }
2481
2482 int
2483 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2484                 void *param, void *ret_param __rte_unused)
2485 {
2486         struct rte_eth_dev *bonded_eth_dev;
2487         struct bond_dev_private *internals;
2488         struct rte_eth_link link;
2489         int rc = -1;
2490
2491         int i, valid_slave = 0;
2492         uint8_t active_pos;
2493         uint8_t lsc_flag = 0;
2494
2495         if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2496                 return rc;
2497
2498         bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
2499
2500         if (check_for_bonded_ethdev(bonded_eth_dev))
2501                 return rc;
2502
2503         internals = bonded_eth_dev->data->dev_private;
2504
2505         /* If the device isn't started don't handle interrupts */
2506         if (!bonded_eth_dev->data->dev_started)
2507                 return rc;
2508
2509         /* verify that port_id is a valid slave of bonded port */
2510         for (i = 0; i < internals->slave_count; i++) {
2511                 if (internals->slaves[i].port_id == port_id) {
2512                         valid_slave = 1;
2513                         break;
2514                 }
2515         }
2516
2517         if (!valid_slave)
2518                 return rc;
2519
2520         /* Search for port in active port list */
2521         active_pos = find_slave_by_id(internals->active_slaves,
2522                         internals->active_slave_count, port_id);
2523
2524         rte_eth_link_get_nowait(port_id, &link);
2525         if (link.link_status) {
2526                 if (active_pos < internals->active_slave_count)
2527                         return rc;
2528
2529                 /* if no active slave ports then set this port to be primary port */
2530                 if (internals->active_slave_count < 1) {
2531                         /* If first active slave, then change link status */
2532                         bonded_eth_dev->data->dev_link.link_status = ETH_LINK_UP;
2533                         internals->current_primary_port = port_id;
2534                         lsc_flag = 1;
2535
2536                         mac_address_slaves_update(bonded_eth_dev);
2537                 }
2538
2539                 activate_slave(bonded_eth_dev, port_id);
2540
2541                 /* If user has defined the primary port then default to using it */
2542                 if (internals->user_defined_primary_port &&
2543                                 internals->primary_port == port_id)
2544                         bond_ethdev_primary_set(internals, port_id);
2545         } else {
2546                 if (active_pos == internals->active_slave_count)
2547                         return rc;
2548
2549                 /* Remove from active slave list */
2550                 deactivate_slave(bonded_eth_dev, port_id);
2551
2552                 if (internals->active_slave_count < 1)
2553                         lsc_flag = 1;
2554
2555                 /* Update primary id, take first active slave from list or if none
2556                  * available set to -1 */
2557                 if (port_id == internals->current_primary_port) {
2558                         if (internals->active_slave_count > 0)
2559                                 bond_ethdev_primary_set(internals,
2560                                                 internals->active_slaves[0]);
2561                         else
2562                                 internals->current_primary_port = internals->primary_port;
2563                 }
2564         }
2565
2566         /**
2567          * Update bonded device link properties after any change to active
2568          * slaves
2569          */
2570         bond_ethdev_link_update(bonded_eth_dev, 0);
2571
2572         if (lsc_flag) {
2573                 /* Cancel any possible outstanding interrupts if delays are enabled */
2574                 if (internals->link_up_delay_ms > 0 ||
2575                         internals->link_down_delay_ms > 0)
2576                         rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2577                                         bonded_eth_dev);
2578
2579                 if (bonded_eth_dev->data->dev_link.link_status) {
2580                         if (internals->link_up_delay_ms > 0)
2581                                 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2582                                                 bond_ethdev_delayed_lsc_propagation,
2583                                                 (void *)bonded_eth_dev);
2584                         else
2585                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2586                                                 RTE_ETH_EVENT_INTR_LSC,
2587                                                 NULL, NULL);
2588
2589                 } else {
2590                         if (internals->link_down_delay_ms > 0)
2591                                 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2592                                                 bond_ethdev_delayed_lsc_propagation,
2593                                                 (void *)bonded_eth_dev);
2594                         else
2595                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2596                                                 RTE_ETH_EVENT_INTR_LSC,
2597                                                 NULL, NULL);
2598                 }
2599         }
2600         return 0;
2601 }
2602
2603 static int
2604 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2605                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2606 {
2607         unsigned i, j;
2608         int result = 0;
2609         int slave_reta_size;
2610         unsigned reta_count;
2611         struct bond_dev_private *internals = dev->data->dev_private;
2612
2613         if (reta_size != internals->reta_size)
2614                 return -EINVAL;
2615
2616          /* Copy RETA table */
2617         reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2618
2619         for (i = 0; i < reta_count; i++) {
2620                 internals->reta_conf[i].mask = reta_conf[i].mask;
2621                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2622                         if ((reta_conf[i].mask >> j) & 0x01)
2623                                 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2624         }
2625
2626         /* Fill rest of array */
2627         for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2628                 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2629                                 sizeof(internals->reta_conf[0]) * reta_count);
2630
2631         /* Propagate RETA over slaves */
2632         for (i = 0; i < internals->slave_count; i++) {
2633                 slave_reta_size = internals->slaves[i].reta_size;
2634                 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2635                                 &internals->reta_conf[0], slave_reta_size);
2636                 if (result < 0)
2637                         return result;
2638         }
2639
2640         return 0;
2641 }
2642
2643 static int
2644 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2645                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2646 {
2647         int i, j;
2648         struct bond_dev_private *internals = dev->data->dev_private;
2649
2650         if (reta_size != internals->reta_size)
2651                 return -EINVAL;
2652
2653          /* Copy RETA table */
2654         for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2655                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2656                         if ((reta_conf[i].mask >> j) & 0x01)
2657                                 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2658
2659         return 0;
2660 }
2661
2662 static int
2663 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2664                 struct rte_eth_rss_conf *rss_conf)
2665 {
2666         int i, result = 0;
2667         struct bond_dev_private *internals = dev->data->dev_private;
2668         struct rte_eth_rss_conf bond_rss_conf;
2669
2670         memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2671
2672         bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2673
2674         if (bond_rss_conf.rss_hf != 0)
2675                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2676
2677         if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2678                         sizeof(internals->rss_key)) {
2679                 if (bond_rss_conf.rss_key_len == 0)
2680                         bond_rss_conf.rss_key_len = 40;
2681                 internals->rss_key_len = bond_rss_conf.rss_key_len;
2682                 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2683                                 internals->rss_key_len);
2684         }
2685
2686         for (i = 0; i < internals->slave_count; i++) {
2687                 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2688                                 &bond_rss_conf);
2689                 if (result < 0)
2690                         return result;
2691         }
2692
2693         return 0;
2694 }
2695
2696 static int
2697 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2698                 struct rte_eth_rss_conf *rss_conf)
2699 {
2700         struct bond_dev_private *internals = dev->data->dev_private;
2701
2702         rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2703         rss_conf->rss_key_len = internals->rss_key_len;
2704         if (rss_conf->rss_key)
2705                 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2706
2707         return 0;
2708 }
2709
2710 const struct eth_dev_ops default_dev_ops = {
2711         .dev_start            = bond_ethdev_start,
2712         .dev_stop             = bond_ethdev_stop,
2713         .dev_close            = bond_ethdev_close,
2714         .dev_configure        = bond_ethdev_configure,
2715         .dev_infos_get        = bond_ethdev_info,
2716         .vlan_filter_set      = bond_ethdev_vlan_filter_set,
2717         .rx_queue_setup       = bond_ethdev_rx_queue_setup,
2718         .tx_queue_setup       = bond_ethdev_tx_queue_setup,
2719         .rx_queue_release     = bond_ethdev_rx_queue_release,
2720         .tx_queue_release     = bond_ethdev_tx_queue_release,
2721         .link_update          = bond_ethdev_link_update,
2722         .stats_get            = bond_ethdev_stats_get,
2723         .stats_reset          = bond_ethdev_stats_reset,
2724         .promiscuous_enable   = bond_ethdev_promiscuous_enable,
2725         .promiscuous_disable  = bond_ethdev_promiscuous_disable,
2726         .reta_update          = bond_ethdev_rss_reta_update,
2727         .reta_query           = bond_ethdev_rss_reta_query,
2728         .rss_hash_update      = bond_ethdev_rss_hash_update,
2729         .rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get
2730 };
2731
2732 static int
2733 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
2734 {
2735         const char *name = rte_vdev_device_name(dev);
2736         uint8_t socket_id = dev->device.numa_node;
2737         struct bond_dev_private *internals = NULL;
2738         struct rte_eth_dev *eth_dev = NULL;
2739         uint32_t vlan_filter_bmp_size;
2740
2741         /* now do all data allocation - for eth_dev structure, dummy pci driver
2742          * and internal (private) data
2743          */
2744
2745         /* reserve an ethdev entry */
2746         eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
2747         if (eth_dev == NULL) {
2748                 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
2749                 goto err;
2750         }
2751
2752         internals = eth_dev->data->dev_private;
2753         eth_dev->data->nb_rx_queues = (uint16_t)1;
2754         eth_dev->data->nb_tx_queues = (uint16_t)1;
2755
2756         eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN, 0,
2757                         socket_id);
2758         if (eth_dev->data->mac_addrs == NULL) {
2759                 RTE_BOND_LOG(ERR, "Unable to malloc mac_addrs");
2760                 goto err;
2761         }
2762
2763         eth_dev->dev_ops = &default_dev_ops;
2764         eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC;
2765
2766         rte_spinlock_init(&internals->lock);
2767
2768         internals->port_id = eth_dev->data->port_id;
2769         internals->mode = BONDING_MODE_INVALID;
2770         internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
2771         internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
2772         internals->xmit_hash = xmit_l2_hash;
2773         internals->user_defined_mac = 0;
2774
2775         internals->link_status_polling_enabled = 0;
2776
2777         internals->link_status_polling_interval_ms =
2778                 DEFAULT_POLLING_INTERVAL_10_MS;
2779         internals->link_down_delay_ms = 0;
2780         internals->link_up_delay_ms = 0;
2781
2782         internals->slave_count = 0;
2783         internals->active_slave_count = 0;
2784         internals->rx_offload_capa = 0;
2785         internals->tx_offload_capa = 0;
2786         internals->candidate_max_rx_pktlen = 0;
2787         internals->max_rx_pktlen = 0;
2788
2789         /* Initially allow to choose any offload type */
2790         internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
2791
2792         memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
2793         memset(internals->slaves, 0, sizeof(internals->slaves));
2794
2795         /* Set mode 4 default configuration */
2796         bond_mode_8023ad_setup(eth_dev, NULL);
2797         if (bond_ethdev_mode_set(eth_dev, mode)) {
2798                 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode too %d",
2799                                  eth_dev->data->port_id, mode);
2800                 goto err;
2801         }
2802
2803         vlan_filter_bmp_size =
2804                 rte_bitmap_get_memory_footprint(ETHER_MAX_VLAN_ID + 1);
2805         internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
2806                                                    RTE_CACHE_LINE_SIZE);
2807         if (internals->vlan_filter_bmpmem == NULL) {
2808                 RTE_BOND_LOG(ERR,
2809                              "Failed to allocate vlan bitmap for bonded device %u\n",
2810                              eth_dev->data->port_id);
2811                 goto err;
2812         }
2813
2814         internals->vlan_filter_bmp = rte_bitmap_init(ETHER_MAX_VLAN_ID + 1,
2815                         internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
2816         if (internals->vlan_filter_bmp == NULL) {
2817                 RTE_BOND_LOG(ERR,
2818                              "Failed to init vlan bitmap for bonded device %u\n",
2819                              eth_dev->data->port_id);
2820                 rte_free(internals->vlan_filter_bmpmem);
2821                 goto err;
2822         }
2823
2824         return eth_dev->data->port_id;
2825
2826 err:
2827         rte_free(internals);
2828         if (eth_dev != NULL) {
2829                 rte_free(eth_dev->data->mac_addrs);
2830                 rte_eth_dev_release_port(eth_dev);
2831         }
2832         return -1;
2833 }
2834
2835 static int
2836 bond_probe(struct rte_vdev_device *dev)
2837 {
2838         const char *name;
2839         struct bond_dev_private *internals;
2840         struct rte_kvargs *kvlist;
2841         uint8_t bonding_mode, socket_id/*, agg_mode*/;
2842         int  arg_count, port_id;
2843         uint8_t agg_mode;
2844
2845         if (!dev)
2846                 return -EINVAL;
2847
2848         name = rte_vdev_device_name(dev);
2849         RTE_LOG(INFO, EAL, "Initializing pmd_bond for %s\n", name);
2850
2851         kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
2852                 pmd_bond_init_valid_arguments);
2853         if (kvlist == NULL)
2854                 return -1;
2855
2856         /* Parse link bonding mode */
2857         if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
2858                 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
2859                                 &bond_ethdev_parse_slave_mode_kvarg,
2860                                 &bonding_mode) != 0) {
2861                         RTE_LOG(ERR, EAL, "Invalid mode for bonded device %s\n",
2862                                         name);
2863                         goto parse_error;
2864                 }
2865         } else {
2866                 RTE_LOG(ERR, EAL, "Mode must be specified only once for bonded "
2867                                 "device %s\n", name);
2868                 goto parse_error;
2869         }
2870
2871         /* Parse socket id to create bonding device on */
2872         arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
2873         if (arg_count == 1) {
2874                 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
2875                                 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
2876                                 != 0) {
2877                         RTE_LOG(ERR, EAL, "Invalid socket Id specified for "
2878                                         "bonded device %s\n", name);
2879                         goto parse_error;
2880                 }
2881         } else if (arg_count > 1) {
2882                 RTE_LOG(ERR, EAL, "Socket Id can be specified only once for "
2883                                 "bonded device %s\n", name);
2884                 goto parse_error;
2885         } else {
2886                 socket_id = rte_socket_id();
2887         }
2888
2889         dev->device.numa_node = socket_id;
2890
2891         /* Create link bonding eth device */
2892         port_id = bond_alloc(dev, bonding_mode);
2893         if (port_id < 0) {
2894                 RTE_LOG(ERR, EAL, "Failed to create socket %s in mode %u on "
2895                                 "socket %u.\n", name, bonding_mode, socket_id);
2896                 goto parse_error;
2897         }
2898         internals = rte_eth_devices[port_id].data->dev_private;
2899         internals->kvlist = kvlist;
2900
2901
2902         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
2903                 if (rte_kvargs_process(kvlist,
2904                                 PMD_BOND_AGG_MODE_KVARG,
2905                                 &bond_ethdev_parse_slave_agg_mode_kvarg,
2906                                 &agg_mode) != 0) {
2907                         RTE_LOG(ERR, EAL,
2908                                         "Failed to parse agg selection mode for bonded device %s\n",
2909                                         name);
2910                         goto parse_error;
2911                 }
2912
2913                 if (internals->mode == BONDING_MODE_8023AD)
2914                         rte_eth_bond_8023ad_agg_selection_set(port_id,
2915                                         agg_mode);
2916         } else {
2917                 rte_eth_bond_8023ad_agg_selection_set(port_id, AGG_STABLE);
2918         }
2919
2920         RTE_LOG(INFO, EAL, "Create bonded device %s on port %d in mode %u on "
2921                         "socket %u.\n", name, port_id, bonding_mode, socket_id);
2922         return 0;
2923
2924 parse_error:
2925         rte_kvargs_free(kvlist);
2926
2927         return -1;
2928 }
2929
2930 static int
2931 bond_remove(struct rte_vdev_device *dev)
2932 {
2933         struct rte_eth_dev *eth_dev;
2934         struct bond_dev_private *internals;
2935         const char *name;
2936
2937         if (!dev)
2938                 return -EINVAL;
2939
2940         name = rte_vdev_device_name(dev);
2941         RTE_LOG(INFO, EAL, "Uninitializing pmd_bond for %s\n", name);
2942
2943         /* now free all data allocation - for eth_dev structure,
2944          * dummy pci driver and internal (private) data
2945          */
2946
2947         /* find an ethdev entry */
2948         eth_dev = rte_eth_dev_allocated(name);
2949         if (eth_dev == NULL)
2950                 return -ENODEV;
2951
2952         RTE_ASSERT(eth_dev->device == &dev->device);
2953
2954         internals = eth_dev->data->dev_private;
2955         if (internals->slave_count != 0)
2956                 return -EBUSY;
2957
2958         if (eth_dev->data->dev_started == 1) {
2959                 bond_ethdev_stop(eth_dev);
2960                 bond_ethdev_close(eth_dev);
2961         }
2962
2963         eth_dev->dev_ops = NULL;
2964         eth_dev->rx_pkt_burst = NULL;
2965         eth_dev->tx_pkt_burst = NULL;
2966
2967         internals = eth_dev->data->dev_private;
2968         rte_bitmap_free(internals->vlan_filter_bmp);
2969         rte_free(internals->vlan_filter_bmpmem);
2970         rte_free(eth_dev->data->dev_private);
2971         rte_free(eth_dev->data->mac_addrs);
2972
2973         rte_eth_dev_release_port(eth_dev);
2974
2975         return 0;
2976 }
2977
2978 /* this part will resolve the slave portids after all the other pdev and vdev
2979  * have been allocated */
2980 static int
2981 bond_ethdev_configure(struct rte_eth_dev *dev)
2982 {
2983         const char *name = dev->device->name;
2984         struct bond_dev_private *internals = dev->data->dev_private;
2985         struct rte_kvargs *kvlist = internals->kvlist;
2986         int arg_count;
2987         uint16_t port_id = dev - rte_eth_devices;
2988         uint8_t agg_mode;
2989
2990         static const uint8_t default_rss_key[40] = {
2991                 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
2992                 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
2993                 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
2994                 0xBE, 0xAC, 0x01, 0xFA
2995         };
2996
2997         unsigned i, j;
2998
2999         /* If RSS is enabled, fill table and key with default values */
3000         if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
3001                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = internals->rss_key;
3002                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = 0;
3003                 memcpy(internals->rss_key, default_rss_key, 40);
3004
3005                 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3006                         internals->reta_conf[i].mask = ~0LL;
3007                         for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3008                                 internals->reta_conf[i].reta[j] = j % dev->data->nb_rx_queues;
3009                 }
3010         }
3011
3012         /* set the max_rx_pktlen */
3013         internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3014
3015         /*
3016          * if no kvlist, it means that this bonded device has been created
3017          * through the bonding api.
3018          */
3019         if (!kvlist)
3020                 return 0;
3021
3022         /* Parse MAC address for bonded device */
3023         arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3024         if (arg_count == 1) {
3025                 struct ether_addr bond_mac;
3026
3027                 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3028                                 &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3029                         RTE_LOG(INFO, EAL, "Invalid mac address for bonded device %s\n",
3030                                         name);
3031                         return -1;
3032                 }
3033
3034                 /* Set MAC address */
3035                 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3036                         RTE_LOG(ERR, EAL,
3037                                         "Failed to set mac address on bonded device %s\n",
3038                                         name);
3039                         return -1;
3040                 }
3041         } else if (arg_count > 1) {
3042                 RTE_LOG(ERR, EAL,
3043                                 "MAC address can be specified only once for bonded device %s\n",
3044                                 name);
3045                 return -1;
3046         }
3047
3048         /* Parse/set balance mode transmit policy */
3049         arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3050         if (arg_count == 1) {
3051                 uint8_t xmit_policy;
3052
3053                 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3054                                 &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3055                                                 0) {
3056                         RTE_LOG(INFO, EAL,
3057                                         "Invalid xmit policy specified for bonded device %s\n",
3058                                         name);
3059                         return -1;
3060                 }
3061
3062                 /* Set balance mode transmit policy*/
3063                 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3064                         RTE_LOG(ERR, EAL,
3065                                         "Failed to set balance xmit policy on bonded device %s\n",
3066                                         name);
3067                         return -1;
3068                 }
3069         } else if (arg_count > 1) {
3070                 RTE_LOG(ERR, EAL,
3071                                 "Transmit policy can be specified only once for bonded device"
3072                                 " %s\n", name);
3073                 return -1;
3074         }
3075
3076         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3077                 if (rte_kvargs_process(kvlist,
3078                                 PMD_BOND_AGG_MODE_KVARG,
3079                                 &bond_ethdev_parse_slave_agg_mode_kvarg,
3080                                 &agg_mode) != 0) {
3081                         RTE_LOG(ERR, EAL,
3082                                         "Failed to parse agg selection mode for bonded device %s\n",
3083                                         name);
3084                 }
3085                 if (internals->mode == BONDING_MODE_8023AD)
3086                                 rte_eth_bond_8023ad_agg_selection_set(port_id,
3087                                                 agg_mode);
3088         }
3089
3090         /* Parse/add slave ports to bonded device */
3091         if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3092                 struct bond_ethdev_slave_ports slave_ports;
3093                 unsigned i;
3094
3095                 memset(&slave_ports, 0, sizeof(slave_ports));
3096
3097                 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3098                                 &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3099                         RTE_LOG(ERR, EAL,
3100                                         "Failed to parse slave ports for bonded device %s\n",
3101                                         name);
3102                         return -1;
3103                 }
3104
3105                 for (i = 0; i < slave_ports.slave_count; i++) {
3106                         if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3107                                 RTE_LOG(ERR, EAL,
3108                                                 "Failed to add port %d as slave to bonded device %s\n",
3109                                                 slave_ports.slaves[i], name);
3110                         }
3111                 }
3112
3113         } else {
3114                 RTE_LOG(INFO, EAL, "No slaves specified for bonded device %s\n", name);
3115                 return -1;
3116         }
3117
3118         /* Parse/set primary slave port id*/
3119         arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3120         if (arg_count == 1) {
3121                 uint16_t primary_slave_port_id;
3122
3123                 if (rte_kvargs_process(kvlist,
3124                                 PMD_BOND_PRIMARY_SLAVE_KVARG,
3125                                 &bond_ethdev_parse_primary_slave_port_id_kvarg,
3126                                 &primary_slave_port_id) < 0) {
3127                         RTE_LOG(INFO, EAL,
3128                                         "Invalid primary slave port id specified for bonded device"
3129                                         " %s\n", name);
3130                         return -1;
3131                 }
3132
3133                 /* Set balance mode transmit policy*/
3134                 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3135                                 != 0) {
3136                         RTE_LOG(ERR, EAL,
3137                                         "Failed to set primary slave port %d on bonded device %s\n",
3138                                         primary_slave_port_id, name);
3139                         return -1;
3140                 }
3141         } else if (arg_count > 1) {
3142                 RTE_LOG(INFO, EAL,
3143                                 "Primary slave can be specified only once for bonded device"
3144                                 " %s\n", name);
3145                 return -1;
3146         }
3147
3148         /* Parse link status monitor polling interval */
3149         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3150         if (arg_count == 1) {
3151                 uint32_t lsc_poll_interval_ms;
3152
3153                 if (rte_kvargs_process(kvlist,
3154                                 PMD_BOND_LSC_POLL_PERIOD_KVARG,
3155                                 &bond_ethdev_parse_time_ms_kvarg,
3156                                 &lsc_poll_interval_ms) < 0) {
3157                         RTE_LOG(INFO, EAL,
3158                                         "Invalid lsc polling interval value specified for bonded"
3159                                         " device %s\n", name);
3160                         return -1;
3161                 }
3162
3163                 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3164                                 != 0) {
3165                         RTE_LOG(ERR, EAL,
3166                                         "Failed to set lsc monitor polling interval (%u ms) on"
3167                                         " bonded device %s\n", lsc_poll_interval_ms, name);
3168                         return -1;
3169                 }
3170         } else if (arg_count > 1) {
3171                 RTE_LOG(INFO, EAL,
3172                                 "LSC polling interval can be specified only once for bonded"
3173                                 " device %s\n", name);
3174                 return -1;
3175         }
3176
3177         /* Parse link up interrupt propagation delay */
3178         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3179         if (arg_count == 1) {
3180                 uint32_t link_up_delay_ms;
3181
3182                 if (rte_kvargs_process(kvlist,
3183                                 PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3184                                 &bond_ethdev_parse_time_ms_kvarg,
3185                                 &link_up_delay_ms) < 0) {
3186                         RTE_LOG(INFO, EAL,
3187                                         "Invalid link up propagation delay value specified for"
3188                                         " bonded device %s\n", name);
3189                         return -1;
3190                 }
3191
3192                 /* Set balance mode transmit policy*/
3193                 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3194                                 != 0) {
3195                         RTE_LOG(ERR, EAL,
3196                                         "Failed to set link up propagation delay (%u ms) on bonded"
3197                                         " device %s\n", link_up_delay_ms, name);
3198                         return -1;
3199                 }
3200         } else if (arg_count > 1) {
3201                 RTE_LOG(INFO, EAL,
3202                                 "Link up propagation delay can be specified only once for"
3203                                 " bonded device %s\n", name);
3204                 return -1;
3205         }
3206
3207         /* Parse link down interrupt propagation delay */
3208         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3209         if (arg_count == 1) {
3210                 uint32_t link_down_delay_ms;
3211
3212                 if (rte_kvargs_process(kvlist,
3213                                 PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3214                                 &bond_ethdev_parse_time_ms_kvarg,
3215                                 &link_down_delay_ms) < 0) {
3216                         RTE_LOG(INFO, EAL,
3217                                         "Invalid link down propagation delay value specified for"
3218                                         " bonded device %s\n", name);
3219                         return -1;
3220                 }
3221
3222                 /* Set balance mode transmit policy*/
3223                 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3224                                 != 0) {
3225                         RTE_LOG(ERR, EAL,
3226                                         "Failed to set link down propagation delay (%u ms) on"
3227                                         " bonded device %s\n", link_down_delay_ms, name);
3228                         return -1;
3229                 }
3230         } else if (arg_count > 1) {
3231                 RTE_LOG(INFO, EAL,
3232                                 "Link down propagation delay can be specified only once for"
3233                                 " bonded device %s\n", name);
3234                 return -1;
3235         }
3236
3237         return 0;
3238 }
3239
3240 struct rte_vdev_driver pmd_bond_drv = {
3241         .probe = bond_probe,
3242         .remove = bond_remove,
3243 };
3244
3245 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3246 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3247
3248 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3249         "slave=<ifc> "
3250         "primary=<ifc> "
3251         "mode=[0-6] "
3252         "xmit_policy=[l2 | l23 | l34] "
3253         "agg_mode=[count | stable | bandwidth] "
3254         "socket_id=<int> "
3255         "mac=<mac addr> "
3256         "lsc_poll_period_ms=<int> "
3257         "up_delay=<int> "
3258         "down_delay=<int>");