New upstream version 16.11.9
[deb_dpdk.git] / drivers / net / bonding / rte_eth_bond_pmd.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 #include <stdlib.h>
34 #include <netinet/in.h>
35
36 #include <rte_mbuf.h>
37 #include <rte_malloc.h>
38 #include <rte_ethdev.h>
39 #include <rte_tcp.h>
40 #include <rte_udp.h>
41 #include <rte_ip.h>
42 #include <rte_ip_frag.h>
43 #include <rte_devargs.h>
44 #include <rte_kvargs.h>
45 #include <rte_vdev.h>
46 #include <rte_alarm.h>
47 #include <rte_cycles.h>
48
49 #include "rte_eth_bond.h"
50 #include "rte_eth_bond_private.h"
51 #include "rte_eth_bond_8023ad_private.h"
52
53 #define REORDER_PERIOD_MS 10
54
55 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
56
57 /* Table for statistics in mode 5 TLB */
58 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
59
60 static inline size_t
61 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
62 {
63         size_t vlan_offset = 0;
64
65         if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto ||
66                 rte_cpu_to_be_16(ETHER_TYPE_QINQ) == *proto) {
67                 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
68
69                 vlan_offset = sizeof(struct vlan_hdr);
70                 *proto = vlan_hdr->eth_proto;
71
72                 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
73                         vlan_hdr = vlan_hdr + 1;
74                         *proto = vlan_hdr->eth_proto;
75                         vlan_offset += sizeof(struct vlan_hdr);
76                 }
77         }
78         return vlan_offset;
79 }
80
81 static uint16_t
82 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
83 {
84         struct bond_dev_private *internals;
85
86         uint16_t num_rx_total = 0;
87         uint16_t slave_count;
88         uint16_t active_slave;
89         int i;
90
91         /* Cast to structure, containing bonded device's port id and queue id */
92         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
93         internals = bd_rx_q->dev_private;
94         slave_count = internals->active_slave_count;
95         active_slave = internals->active_slave;
96
97         for (i = 0; i < slave_count && nb_pkts; i++) {
98                 uint16_t num_rx_slave;
99
100                 /* Offset of pointer to *bufs increases as packets are received
101                  * from other slaves */
102                 num_rx_slave =
103                         rte_eth_rx_burst(internals->active_slaves[active_slave],
104                                          bd_rx_q->queue_id,
105                                          bufs + num_rx_total, nb_pkts);
106                 num_rx_total += num_rx_slave;
107                 nb_pkts -= num_rx_slave;
108                 if (++active_slave == slave_count)
109                         active_slave = 0;
110         }
111
112         if (++internals->active_slave == slave_count)
113                 internals->active_slave = 0;
114         return num_rx_total;
115 }
116
117 static uint16_t
118 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
119                 uint16_t nb_pkts)
120 {
121         struct bond_dev_private *internals;
122
123         /* Cast to structure, containing bonded device's port id and queue id */
124         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
125
126         internals = bd_rx_q->dev_private;
127
128         return rte_eth_rx_burst(internals->current_primary_port,
129                         bd_rx_q->queue_id, bufs, nb_pkts);
130 }
131
132 static inline uint8_t
133 is_lacp_packets(uint16_t ethertype, uint8_t subtype, uint16_t vlan_tci)
134 {
135         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
136
137         return !vlan_tci && (ethertype == ether_type_slow_be &&
138                 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
139 }
140
141 static uint16_t
142 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
143                 uint16_t nb_pkts)
144 {
145         /* Cast to structure, containing bonded device's port id and queue id */
146         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
147         struct bond_dev_private *internals = bd_rx_q->dev_private;
148         struct ether_addr bond_mac;
149
150         struct ether_hdr *hdr;
151
152         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
153         uint16_t num_rx_total = 0;      /* Total number of received packets */
154         uint8_t slaves[RTE_MAX_ETHPORTS];
155         uint8_t slave_count, idx;
156
157         uint8_t collecting;  /* current slave collecting status */
158         const uint8_t promisc = internals->promiscuous_en;
159         uint8_t i, j, k;
160         uint8_t subtype;
161
162         rte_eth_macaddr_get(internals->port_id, &bond_mac);
163         /* Copy slave list to protect against slave up/down changes during tx
164          * bursting */
165         slave_count = internals->active_slave_count;
166         memcpy(slaves, internals->active_slaves,
167                         sizeof(internals->active_slaves[0]) * slave_count);
168
169         idx = internals->active_slave;
170         if (idx >= slave_count) {
171                 internals->active_slave = 0;
172                 idx = 0;
173         }
174         for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
175                 j = num_rx_total;
176                 collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[idx]],
177                                          COLLECTING);
178
179                 /* Read packets from this slave */
180                 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
181                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
182
183                 for (k = j; k < 2 && k < num_rx_total; k++)
184                         rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
185
186                 /* Handle slow protocol packets. */
187                 while (j < num_rx_total) {
188                         if (j + 3 < num_rx_total)
189                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
190
191                         hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
192                         subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
193
194                         /* Remove packet from array if it is slow packet or slave is not
195                          * in collecting state or bondign interface is not in promiscus
196                          * mode and packet address does not match. */
197                         if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]->vlan_tci) ||
198                                 !collecting || (!promisc &&
199                                         !is_multicast_ether_addr(&hdr->d_addr) &&
200                                         !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
201
202                                 if (hdr->ether_type == ether_type_slow_be) {
203                                         bond_mode_8023ad_handle_slow_pkt(
204                                             internals, slaves[idx], bufs[j]);
205                                 } else
206                                         rte_pktmbuf_free(bufs[j]);
207
208                                 /* Packet is managed by mode 4 or dropped, shift the array */
209                                 num_rx_total--;
210                                 if (j < num_rx_total) {
211                                         memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
212                                                 (num_rx_total - j));
213                                 }
214                         } else
215                                 j++;
216                 }
217                 if (unlikely(++idx == slave_count))
218                         idx = 0;
219         }
220
221         if (++internals->active_slave == slave_count)
222                 internals->active_slave = 0;
223
224         return num_rx_total;
225 }
226
227 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
228 uint32_t burstnumberRX;
229 uint32_t burstnumberTX;
230
231 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
232
233 static void
234 arp_op_name(uint16_t arp_op, char *buf)
235 {
236         switch (arp_op) {
237         case ARP_OP_REQUEST:
238                 snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
239                 return;
240         case ARP_OP_REPLY:
241                 snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
242                 return;
243         case ARP_OP_REVREQUEST:
244                 snprintf(buf, sizeof("Reverse ARP Request"), "%s",
245                                 "Reverse ARP Request");
246                 return;
247         case ARP_OP_REVREPLY:
248                 snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
249                                 "Reverse ARP Reply");
250                 return;
251         case ARP_OP_INVREQUEST:
252                 snprintf(buf, sizeof("Peer Identify Request"), "%s",
253                                 "Peer Identify Request");
254                 return;
255         case ARP_OP_INVREPLY:
256                 snprintf(buf, sizeof("Peer Identify Reply"), "%s",
257                                 "Peer Identify Reply");
258                 return;
259         default:
260                 break;
261         }
262         snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
263         return;
264 }
265 #endif
266 #define MaxIPv4String   16
267 static void
268 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
269 {
270         uint32_t ipv4_addr;
271
272         ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
273         snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
274                 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
275                 ipv4_addr & 0xFF);
276 }
277
278 #define MAX_CLIENTS_NUMBER      128
279 uint8_t active_clients;
280 struct client_stats_t {
281         uint8_t port;
282         uint32_t ipv4_addr;
283         uint32_t ipv4_rx_packets;
284         uint32_t ipv4_tx_packets;
285 };
286 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
287
288 static void
289 update_client_stats(uint32_t addr, uint8_t port, uint32_t *TXorRXindicator)
290 {
291         int i = 0;
292
293         for (; i < MAX_CLIENTS_NUMBER; i++)     {
294                 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port))      {
295                         /* Just update RX packets number for this client */
296                         if (TXorRXindicator == &burstnumberRX)
297                                 client_stats[i].ipv4_rx_packets++;
298                         else
299                                 client_stats[i].ipv4_tx_packets++;
300                         return;
301                 }
302         }
303         /* We have a new client. Insert him to the table, and increment stats */
304         if (TXorRXindicator == &burstnumberRX)
305                 client_stats[active_clients].ipv4_rx_packets++;
306         else
307                 client_stats[active_clients].ipv4_tx_packets++;
308         client_stats[active_clients].ipv4_addr = addr;
309         client_stats[active_clients].port = port;
310         active_clients++;
311
312 }
313
314 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
315 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber)     \
316                 RTE_LOG(DEBUG, PMD, \
317                 "%s " \
318                 "port:%d " \
319                 "SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
320                 "SrcIP:%s " \
321                 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
322                 "DstIP:%s " \
323                 "%s " \
324                 "%d\n", \
325                 info, \
326                 port, \
327                 eth_h->s_addr.addr_bytes[0], \
328                 eth_h->s_addr.addr_bytes[1], \
329                 eth_h->s_addr.addr_bytes[2], \
330                 eth_h->s_addr.addr_bytes[3], \
331                 eth_h->s_addr.addr_bytes[4], \
332                 eth_h->s_addr.addr_bytes[5], \
333                 src_ip, \
334                 eth_h->d_addr.addr_bytes[0], \
335                 eth_h->d_addr.addr_bytes[1], \
336                 eth_h->d_addr.addr_bytes[2], \
337                 eth_h->d_addr.addr_bytes[3], \
338                 eth_h->d_addr.addr_bytes[4], \
339                 eth_h->d_addr.addr_bytes[5], \
340                 dst_ip, \
341                 arp_op, \
342                 ++burstnumber)
343 #endif
344
345 static void
346 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
347                 uint8_t port, uint32_t __attribute__((unused)) *burstnumber)
348 {
349         struct ipv4_hdr *ipv4_h;
350 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
351         struct arp_hdr *arp_h;
352         char dst_ip[16];
353         char ArpOp[24];
354         char buf[16];
355 #endif
356         char src_ip[16];
357
358         uint16_t ether_type = eth_h->ether_type;
359         uint16_t offset = get_vlan_offset(eth_h, &ether_type);
360
361 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
362         snprintf(buf, 16, "%s", info);
363 #endif
364
365         if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
366                 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
367                 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
368 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
369                 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
370                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
371 #endif
372                 update_client_stats(ipv4_h->src_addr, port, burstnumber);
373         }
374 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
375         else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
376                 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
377                 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
378                 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
379                 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
380                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
381         }
382 #endif
383 }
384 #endif
385
386 static uint16_t
387 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
388 {
389         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
390         struct bond_dev_private *internals = bd_tx_q->dev_private;
391         struct ether_hdr *eth_h;
392         uint16_t ether_type, offset;
393         uint16_t nb_recv_pkts;
394         int i;
395
396         nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
397
398         for (i = 0; i < nb_recv_pkts; i++) {
399                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
400                 ether_type = eth_h->ether_type;
401                 offset = get_vlan_offset(eth_h, &ether_type);
402
403                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
404 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
405                         mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
406 #endif
407                         bond_mode_alb_arp_recv(eth_h, offset, internals);
408                 }
409 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
410                 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
411                         mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
412 #endif
413         }
414
415         return nb_recv_pkts;
416 }
417
418 static uint16_t
419 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
420                 uint16_t nb_pkts)
421 {
422         struct bond_dev_private *internals;
423         struct bond_tx_queue *bd_tx_q;
424
425         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
426         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
427
428         uint8_t num_of_slaves;
429         uint8_t slaves[RTE_MAX_ETHPORTS];
430
431         uint16_t num_tx_total = 0, num_tx_slave;
432
433         static int slave_idx = 0;
434         int i, cslave_idx = 0, tx_fail_total = 0;
435
436         bd_tx_q = (struct bond_tx_queue *)queue;
437         internals = bd_tx_q->dev_private;
438
439         /* Copy slave list to protect against slave up/down changes during tx
440          * bursting */
441         num_of_slaves = internals->active_slave_count;
442         memcpy(slaves, internals->active_slaves,
443                         sizeof(internals->active_slaves[0]) * num_of_slaves);
444
445         if (num_of_slaves < 1)
446                 return num_tx_total;
447
448         /* Populate slaves mbuf with which packets are to be sent on it  */
449         for (i = 0; i < nb_pkts; i++) {
450                 cslave_idx = (slave_idx + i) % num_of_slaves;
451                 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
452         }
453
454         /* increment current slave index so the next call to tx burst starts on the
455          * next slave */
456         slave_idx = ++cslave_idx;
457
458         /* Send packet burst on each slave device */
459         for (i = 0; i < num_of_slaves; i++) {
460                 if (slave_nb_pkts[i] > 0) {
461                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
462                                         slave_bufs[i], slave_nb_pkts[i]);
463
464                         /* if tx burst fails move packets to end of bufs */
465                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
466                                 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
467
468                                 tx_fail_total += tx_fail_slave;
469
470                                 memcpy(&bufs[nb_pkts - tx_fail_total],
471                                                 &slave_bufs[i][num_tx_slave],
472                                                 tx_fail_slave * sizeof(bufs[0]));
473                         }
474                         num_tx_total += num_tx_slave;
475                 }
476         }
477
478         return num_tx_total;
479 }
480
481 static uint16_t
482 bond_ethdev_tx_burst_active_backup(void *queue,
483                 struct rte_mbuf **bufs, uint16_t nb_pkts)
484 {
485         struct bond_dev_private *internals;
486         struct bond_tx_queue *bd_tx_q;
487
488         bd_tx_q = (struct bond_tx_queue *)queue;
489         internals = bd_tx_q->dev_private;
490
491         if (internals->active_slave_count < 1)
492                 return 0;
493
494         return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
495                         bufs, nb_pkts);
496 }
497
498 static inline uint16_t
499 ether_hash(struct ether_hdr *eth_hdr)
500 {
501         unaligned_uint16_t *word_src_addr =
502                 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
503         unaligned_uint16_t *word_dst_addr =
504                 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
505
506         return (word_src_addr[0] ^ word_dst_addr[0]) ^
507                         (word_src_addr[1] ^ word_dst_addr[1]) ^
508                         (word_src_addr[2] ^ word_dst_addr[2]);
509 }
510
511 static inline uint32_t
512 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
513 {
514         return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
515 }
516
517 static inline uint32_t
518 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
519 {
520         unaligned_uint32_t *word_src_addr =
521                 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
522         unaligned_uint32_t *word_dst_addr =
523                 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
524
525         return (word_src_addr[0] ^ word_dst_addr[0]) ^
526                         (word_src_addr[1] ^ word_dst_addr[1]) ^
527                         (word_src_addr[2] ^ word_dst_addr[2]) ^
528                         (word_src_addr[3] ^ word_dst_addr[3]);
529 }
530
531 uint16_t
532 xmit_l2_hash(const struct rte_mbuf *buf, uint8_t slave_count)
533 {
534         struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
535
536         uint32_t hash = ether_hash(eth_hdr);
537
538         return (hash ^= hash >> 8) % slave_count;
539 }
540
541 uint16_t
542 xmit_l23_hash(const struct rte_mbuf *buf, uint8_t slave_count)
543 {
544         struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
545         uint16_t proto = eth_hdr->ether_type;
546         size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
547         uint32_t hash, l3hash = 0;
548
549         hash = ether_hash(eth_hdr);
550
551         if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
552                 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
553                                 ((char *)(eth_hdr + 1) + vlan_offset);
554                 l3hash = ipv4_hash(ipv4_hdr);
555
556         } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
557                 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
558                                 ((char *)(eth_hdr + 1) + vlan_offset);
559                 l3hash = ipv6_hash(ipv6_hdr);
560         }
561
562         hash = hash ^ l3hash;
563         hash ^= hash >> 16;
564         hash ^= hash >> 8;
565
566         return hash % slave_count;
567 }
568
569 uint16_t
570 xmit_l34_hash(const struct rte_mbuf *buf, uint8_t slave_count)
571 {
572         struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
573         uint16_t proto = eth_hdr->ether_type;
574         size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
575
576         struct udp_hdr *udp_hdr = NULL;
577         struct tcp_hdr *tcp_hdr = NULL;
578         uint32_t hash, l3hash = 0, l4hash = 0;
579
580         if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
581                 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
582                                 ((char *)(eth_hdr + 1) + vlan_offset);
583                 size_t ip_hdr_offset;
584
585                 l3hash = ipv4_hash(ipv4_hdr);
586
587                 /* there is no L4 header in fragmented packet */
588                 if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr) == 0)) {
589                         ip_hdr_offset = (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) *
590                                         IPV4_IHL_MULTIPLIER;
591
592                         if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
593                                 tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr +
594                                                 ip_hdr_offset);
595                                 l4hash = HASH_L4_PORTS(tcp_hdr);
596                         } else if (ipv4_hdr->next_proto_id == IPPROTO_UDP) {
597                                 udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr +
598                                                 ip_hdr_offset);
599                                 l4hash = HASH_L4_PORTS(udp_hdr);
600                         }
601                 }
602         } else if  (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
603                 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
604                                 ((char *)(eth_hdr + 1) + vlan_offset);
605                 l3hash = ipv6_hash(ipv6_hdr);
606
607                 if (ipv6_hdr->proto == IPPROTO_TCP) {
608                         tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
609                         l4hash = HASH_L4_PORTS(tcp_hdr);
610                 } else if (ipv6_hdr->proto == IPPROTO_UDP) {
611                         udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
612                         l4hash = HASH_L4_PORTS(udp_hdr);
613                 }
614         }
615
616         hash = l3hash ^ l4hash;
617         hash ^= hash >> 16;
618         hash ^= hash >> 8;
619
620         return hash % slave_count;
621 }
622
623 struct bwg_slave {
624         uint64_t bwg_left_int;
625         uint64_t bwg_left_remainder;
626         uint8_t slave;
627 };
628
629 void
630 bond_tlb_activate_slave(struct bond_dev_private *internals) {
631         int i;
632
633         for (i = 0; i < internals->active_slave_count; i++) {
634                 tlb_last_obytets[internals->active_slaves[i]] = 0;
635         }
636 }
637
638 static int
639 bandwidth_cmp(const void *a, const void *b)
640 {
641         const struct bwg_slave *bwg_a = a;
642         const struct bwg_slave *bwg_b = b;
643         int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
644         int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
645                         (int64_t)bwg_a->bwg_left_remainder;
646         if (diff > 0)
647                 return 1;
648         else if (diff < 0)
649                 return -1;
650         else if (diff2 > 0)
651                 return 1;
652         else if (diff2 < 0)
653                 return -1;
654         else
655                 return 0;
656 }
657
658 static void
659 bandwidth_left(uint8_t port_id, uint64_t load, uint8_t update_idx,
660                 struct bwg_slave *bwg_slave)
661 {
662         struct rte_eth_link link_status;
663
664         rte_eth_link_get_nowait(port_id, &link_status);
665         uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
666         if (link_bwg == 0)
667                 return;
668         link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
669         bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
670         bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
671 }
672
673 static void
674 bond_ethdev_update_tlb_slave_cb(void *arg)
675 {
676         struct bond_dev_private *internals = arg;
677         struct rte_eth_stats slave_stats;
678         struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
679         uint8_t slave_count;
680         uint64_t tx_bytes;
681
682         uint8_t update_stats = 0;
683         uint8_t i, slave_id;
684
685         internals->slave_update_idx++;
686
687
688         if (internals->slave_update_idx >= REORDER_PERIOD_MS)
689                 update_stats = 1;
690
691         for (i = 0; i < internals->active_slave_count; i++) {
692                 slave_id = internals->active_slaves[i];
693                 rte_eth_stats_get(slave_id, &slave_stats);
694                 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
695                 bandwidth_left(slave_id, tx_bytes,
696                                 internals->slave_update_idx, &bwg_array[i]);
697                 bwg_array[i].slave = slave_id;
698
699                 if (update_stats) {
700                         tlb_last_obytets[slave_id] = slave_stats.obytes;
701                 }
702         }
703
704         if (update_stats == 1)
705                 internals->slave_update_idx = 0;
706
707         slave_count = i;
708         qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
709         for (i = 0; i < slave_count; i++)
710                 internals->tlb_slaves_order[i] = bwg_array[i].slave;
711
712         rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
713                         (struct bond_dev_private *)internals);
714 }
715
716 static uint16_t
717 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
718 {
719         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
720         struct bond_dev_private *internals = bd_tx_q->dev_private;
721
722         struct rte_eth_dev *primary_port =
723                         &rte_eth_devices[internals->primary_port];
724         uint16_t num_tx_total = 0;
725         uint8_t i, j;
726
727         uint8_t num_of_slaves = internals->active_slave_count;
728         uint8_t slaves[RTE_MAX_ETHPORTS];
729
730         struct ether_hdr *ether_hdr;
731         struct ether_addr primary_slave_addr;
732         struct ether_addr active_slave_addr;
733
734         if (num_of_slaves < 1)
735                 return num_tx_total;
736
737         memcpy(slaves, internals->tlb_slaves_order,
738                                 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
739
740
741         ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
742
743         if (nb_pkts > 3) {
744                 for (i = 0; i < 3; i++)
745                         rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
746         }
747
748         for (i = 0; i < num_of_slaves; i++) {
749                 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
750                 for (j = num_tx_total; j < nb_pkts; j++) {
751                         if (j + 3 < nb_pkts)
752                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
753
754                         ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
755                         if (is_same_ether_addr(&ether_hdr->s_addr, &primary_slave_addr))
756                                 ether_addr_copy(&active_slave_addr, &ether_hdr->s_addr);
757 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
758                                         mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
759 #endif
760                 }
761
762                 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
763                                 bufs + num_tx_total, nb_pkts - num_tx_total);
764
765                 if (num_tx_total == nb_pkts)
766                         break;
767         }
768
769         return num_tx_total;
770 }
771
772 void
773 bond_tlb_disable(struct bond_dev_private *internals)
774 {
775         rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
776 }
777
778 void
779 bond_tlb_enable(struct bond_dev_private *internals)
780 {
781         bond_ethdev_update_tlb_slave_cb(internals);
782 }
783
784 static uint16_t
785 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
786 {
787         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
788         struct bond_dev_private *internals = bd_tx_q->dev_private;
789
790         struct ether_hdr *eth_h;
791         uint16_t ether_type, offset;
792
793         struct client_data *client_info;
794
795         /*
796          * We create transmit buffers for every slave and one additional to send
797          * through tlb. In worst case every packet will be send on one port.
798          */
799         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
800         uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
801
802         /*
803          * We create separate transmit buffers for update packets as they wont be
804          * counted in num_tx_total.
805          */
806         struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
807         uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
808
809         struct rte_mbuf *upd_pkt;
810         size_t pkt_size;
811
812         uint16_t num_send, num_not_send = 0;
813         uint16_t num_tx_total = 0;
814         uint8_t slave_idx;
815
816         int i, j;
817
818         /* Search tx buffer for ARP packets and forward them to alb */
819         for (i = 0; i < nb_pkts; i++) {
820                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
821                 ether_type = eth_h->ether_type;
822                 offset = get_vlan_offset(eth_h, &ether_type);
823
824                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
825                         slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
826
827                         /* Change src mac in eth header */
828                         rte_eth_macaddr_get(slave_idx, &eth_h->s_addr);
829
830                         /* Add packet to slave tx buffer */
831                         slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
832                         slave_bufs_pkts[slave_idx]++;
833                 } else {
834                         /* If packet is not ARP, send it with TLB policy */
835                         slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
836                                         bufs[i];
837                         slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
838                 }
839         }
840
841         /* Update connected client ARP tables */
842         if (internals->mode6.ntt) {
843                 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
844                         client_info = &internals->mode6.client_table[i];
845
846                         if (client_info->in_use) {
847                                 /* Allocate new packet to send ARP update on current slave */
848                                 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
849                                 if (upd_pkt == NULL) {
850                                         RTE_LOG(ERR, PMD, "Failed to allocate ARP packet from pool\n");
851                                         continue;
852                                 }
853                                 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
854                                                 + client_info->vlan_count * sizeof(struct vlan_hdr);
855                                 upd_pkt->data_len = pkt_size;
856                                 upd_pkt->pkt_len = pkt_size;
857
858                                 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
859                                                 internals);
860
861                                 /* Add packet to update tx buffer */
862                                 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
863                                 update_bufs_pkts[slave_idx]++;
864                         }
865                 }
866                 internals->mode6.ntt = 0;
867         }
868
869         /* Send ARP packets on proper slaves */
870         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
871                 if (slave_bufs_pkts[i] > 0) {
872                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
873                                         slave_bufs[i], slave_bufs_pkts[i]);
874                         for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
875                                 bufs[nb_pkts - 1 - num_not_send - j] =
876                                                 slave_bufs[i][nb_pkts - 1 - j];
877                         }
878
879                         num_tx_total += num_send;
880                         num_not_send += slave_bufs_pkts[i] - num_send;
881
882 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
883         /* Print TX stats including update packets */
884                         for (j = 0; j < slave_bufs_pkts[i]; j++) {
885                                 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
886                                 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
887                         }
888 #endif
889                 }
890         }
891
892         /* Send update packets on proper slaves */
893         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
894                 if (update_bufs_pkts[i] > 0) {
895                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
896                                         update_bufs_pkts[i]);
897                         for (j = num_send; j < update_bufs_pkts[i]; j++) {
898                                 rte_pktmbuf_free(update_bufs[i][j]);
899                         }
900 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
901                         for (j = 0; j < update_bufs_pkts[i]; j++) {
902                                 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
903                                 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
904                         }
905 #endif
906                 }
907         }
908
909         /* Send non-ARP packets using tlb policy */
910         if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
911                 num_send = bond_ethdev_tx_burst_tlb(queue,
912                                 slave_bufs[RTE_MAX_ETHPORTS],
913                                 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
914
915                 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
916                         bufs[nb_pkts - 1 - num_not_send - j] =
917                                         slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
918                 }
919
920                 num_tx_total += num_send;
921                 num_not_send += slave_bufs_pkts[RTE_MAX_ETHPORTS] - num_send;
922         }
923
924         return num_tx_total;
925 }
926
927 static uint16_t
928 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
929                 uint16_t nb_pkts)
930 {
931         struct bond_dev_private *internals;
932         struct bond_tx_queue *bd_tx_q;
933
934         uint8_t num_of_slaves;
935         uint8_t slaves[RTE_MAX_ETHPORTS];
936
937         uint16_t num_tx_total = 0, num_tx_slave = 0, tx_fail_total = 0;
938
939         int i, op_slave_id;
940
941         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
942         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
943
944         bd_tx_q = (struct bond_tx_queue *)queue;
945         internals = bd_tx_q->dev_private;
946
947         /* Copy slave list to protect against slave up/down changes during tx
948          * bursting */
949         num_of_slaves = internals->active_slave_count;
950         memcpy(slaves, internals->active_slaves,
951                         sizeof(internals->active_slaves[0]) * num_of_slaves);
952
953         if (num_of_slaves < 1)
954                 return num_tx_total;
955
956         /* Populate slaves mbuf with the packets which are to be sent on it  */
957         for (i = 0; i < nb_pkts; i++) {
958                 /* Select output slave using hash based on xmit policy */
959                 op_slave_id = internals->xmit_hash(bufs[i], num_of_slaves);
960
961                 /* Populate slave mbuf arrays with mbufs for that slave */
962                 slave_bufs[op_slave_id][slave_nb_pkts[op_slave_id]++] = bufs[i];
963         }
964
965         /* Send packet burst on each slave device */
966         for (i = 0; i < num_of_slaves; i++) {
967                 if (slave_nb_pkts[i] > 0) {
968                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
969                                         slave_bufs[i], slave_nb_pkts[i]);
970
971                         /* if tx burst fails move packets to end of bufs */
972                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
973                                 int slave_tx_fail_count = slave_nb_pkts[i] - num_tx_slave;
974
975                                 tx_fail_total += slave_tx_fail_count;
976                                 memcpy(&bufs[nb_pkts - tx_fail_total],
977                                                 &slave_bufs[i][num_tx_slave],
978                                                 slave_tx_fail_count * sizeof(bufs[0]));
979                         }
980
981                         num_tx_total += num_tx_slave;
982                 }
983         }
984
985         return num_tx_total;
986 }
987
988 static uint16_t
989 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
990                 uint16_t nb_pkts)
991 {
992         struct bond_dev_private *internals;
993         struct bond_tx_queue *bd_tx_q;
994
995         uint8_t num_of_slaves;
996         uint8_t slaves[RTE_MAX_ETHPORTS];
997          /* positions in slaves, not ID */
998         uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
999         uint8_t distributing_count;
1000
1001         uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
1002         uint16_t i, j, op_slave_idx;
1003         const uint16_t buffs_size = nb_pkts + BOND_MODE_8023AX_SLAVE_TX_PKTS + 1;
1004
1005         /* Allocate additional packets in case 8023AD mode. */
1006         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][buffs_size];
1007         void *slow_pkts[BOND_MODE_8023AX_SLAVE_TX_PKTS] = { NULL };
1008
1009         /* Total amount of packets in slave_bufs */
1010         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
1011         /* Slow packets placed in each slave */
1012         uint8_t slave_slow_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
1013
1014         bd_tx_q = (struct bond_tx_queue *)queue;
1015         internals = bd_tx_q->dev_private;
1016
1017         /* Copy slave list to protect against slave up/down changes during tx
1018          * bursting */
1019         num_of_slaves = internals->active_slave_count;
1020         if (num_of_slaves < 1)
1021                 return num_tx_total;
1022
1023         memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) * num_of_slaves);
1024
1025         distributing_count = 0;
1026         for (i = 0; i < num_of_slaves; i++) {
1027                 struct port *port = &mode_8023ad_ports[slaves[i]];
1028
1029                 slave_slow_nb_pkts[i] = rte_ring_dequeue_burst(port->tx_ring,
1030                                 slow_pkts, BOND_MODE_8023AX_SLAVE_TX_PKTS);
1031                 slave_nb_pkts[i] = slave_slow_nb_pkts[i];
1032
1033                 for (j = 0; j < slave_slow_nb_pkts[i]; j++)
1034                         slave_bufs[i][j] = slow_pkts[j];
1035
1036                 if (ACTOR_STATE(port, DISTRIBUTING))
1037                         distributing_offsets[distributing_count++] = i;
1038         }
1039
1040         if (likely(distributing_count > 0)) {
1041                 /* Populate slaves mbuf with the packets which are to be sent on it */
1042                 for (i = 0; i < nb_pkts; i++) {
1043                         /* Select output slave using hash based on xmit policy */
1044                         op_slave_idx = internals->xmit_hash(bufs[i], distributing_count);
1045
1046                         /* Populate slave mbuf arrays with mbufs for that slave. Use only
1047                          * slaves that are currently distributing. */
1048                         uint8_t slave_offset = distributing_offsets[op_slave_idx];
1049                         slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] = bufs[i];
1050                         slave_nb_pkts[slave_offset]++;
1051                 }
1052         }
1053
1054         /* Send packet burst on each slave device */
1055         for (i = 0; i < num_of_slaves; i++) {
1056                 if (slave_nb_pkts[i] == 0)
1057                         continue;
1058
1059                 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1060                                 slave_bufs[i], slave_nb_pkts[i]);
1061
1062                 /* If tx burst fails drop slow packets */
1063                 for ( ; num_tx_slave < slave_slow_nb_pkts[i]; num_tx_slave++)
1064                         rte_pktmbuf_free(slave_bufs[i][num_tx_slave]);
1065
1066                 num_tx_total += num_tx_slave - slave_slow_nb_pkts[i];
1067                 num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
1068
1069                 /* If tx burst fails move packets to end of bufs */
1070                 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
1071                         uint16_t j = nb_pkts - num_tx_fail_total;
1072                         for ( ; num_tx_slave < slave_nb_pkts[i]; j++, num_tx_slave++)
1073                                 bufs[j] = slave_bufs[i][num_tx_slave];
1074                 }
1075         }
1076
1077         return num_tx_total;
1078 }
1079
1080 static uint16_t
1081 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1082                 uint16_t nb_pkts)
1083 {
1084         struct bond_dev_private *internals;
1085         struct bond_tx_queue *bd_tx_q;
1086
1087         uint8_t tx_failed_flag = 0, num_of_slaves;
1088         uint8_t slaves[RTE_MAX_ETHPORTS];
1089
1090         uint16_t max_nb_of_tx_pkts = 0;
1091
1092         int slave_tx_total[RTE_MAX_ETHPORTS];
1093         int i, most_successful_tx_slave = -1;
1094
1095         bd_tx_q = (struct bond_tx_queue *)queue;
1096         internals = bd_tx_q->dev_private;
1097
1098         /* Copy slave list to protect against slave up/down changes during tx
1099          * bursting */
1100         num_of_slaves = internals->active_slave_count;
1101         memcpy(slaves, internals->active_slaves,
1102                         sizeof(internals->active_slaves[0]) * num_of_slaves);
1103
1104         if (num_of_slaves < 1)
1105                 return 0;
1106
1107         /* Increment reference count on mbufs */
1108         for (i = 0; i < nb_pkts; i++)
1109                 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1110
1111         /* Transmit burst on each active slave */
1112         for (i = 0; i < num_of_slaves; i++) {
1113                 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1114                                         bufs, nb_pkts);
1115
1116                 if (unlikely(slave_tx_total[i] < nb_pkts))
1117                         tx_failed_flag = 1;
1118
1119                 /* record the value and slave index for the slave which transmits the
1120                  * maximum number of packets */
1121                 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1122                         max_nb_of_tx_pkts = slave_tx_total[i];
1123                         most_successful_tx_slave = i;
1124                 }
1125         }
1126
1127         /* if slaves fail to transmit packets from burst, the calling application
1128          * is not expected to know about multiple references to packets so we must
1129          * handle failures of all packets except those of the most successful slave
1130          */
1131         if (unlikely(tx_failed_flag))
1132                 for (i = 0; i < num_of_slaves; i++)
1133                         if (i != most_successful_tx_slave)
1134                                 while (slave_tx_total[i] < nb_pkts)
1135                                         rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1136
1137         return max_nb_of_tx_pkts;
1138 }
1139
1140 void
1141 link_properties_set(struct rte_eth_dev *bonded_eth_dev,
1142                 struct rte_eth_link *slave_dev_link)
1143 {
1144         struct rte_eth_link *bonded_dev_link = &bonded_eth_dev->data->dev_link;
1145         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1146
1147         if (slave_dev_link->link_status &&
1148                 bonded_eth_dev->data->dev_started) {
1149                 bonded_dev_link->link_duplex = slave_dev_link->link_duplex;
1150                 bonded_dev_link->link_speed = slave_dev_link->link_speed;
1151
1152                 internals->link_props_set = 1;
1153         }
1154 }
1155
1156 void
1157 link_properties_reset(struct rte_eth_dev *bonded_eth_dev)
1158 {
1159         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1160
1161         memset(&(bonded_eth_dev->data->dev_link), 0,
1162                         sizeof(bonded_eth_dev->data->dev_link));
1163
1164         internals->link_props_set = 0;
1165 }
1166
1167 int
1168 link_properties_valid(struct rte_eth_link *bonded_dev_link,
1169                 struct rte_eth_link *slave_dev_link)
1170 {
1171         if (bonded_dev_link->link_duplex != slave_dev_link->link_duplex ||
1172                 bonded_dev_link->link_speed !=  slave_dev_link->link_speed)
1173                 return -1;
1174
1175         return 0;
1176 }
1177
1178 int
1179 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1180 {
1181         struct ether_addr *mac_addr;
1182
1183         if (eth_dev == NULL) {
1184                 RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__);
1185                 return -1;
1186         }
1187
1188         if (dst_mac_addr == NULL) {
1189                 RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__);
1190                 return -1;
1191         }
1192
1193         mac_addr = eth_dev->data->mac_addrs;
1194
1195         ether_addr_copy(mac_addr, dst_mac_addr);
1196         return 0;
1197 }
1198
1199 int
1200 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1201 {
1202         struct ether_addr *mac_addr;
1203
1204         if (eth_dev == NULL) {
1205                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1206                 return -1;
1207         }
1208
1209         if (new_mac_addr == NULL) {
1210                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1211                 return -1;
1212         }
1213
1214         mac_addr = eth_dev->data->mac_addrs;
1215
1216         /* If new MAC is different to current MAC then update */
1217         if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1218                 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1219
1220         return 0;
1221 }
1222
1223 int
1224 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1225 {
1226         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1227         int i;
1228
1229         /* Update slave devices MAC addresses */
1230         if (internals->slave_count < 1)
1231                 return -1;
1232
1233         switch (internals->mode) {
1234         case BONDING_MODE_ROUND_ROBIN:
1235         case BONDING_MODE_BALANCE:
1236         case BONDING_MODE_BROADCAST:
1237                 for (i = 0; i < internals->slave_count; i++) {
1238                         if (rte_eth_dev_default_mac_addr_set(
1239                                         internals->slaves[i].port_id,
1240                                         bonded_eth_dev->data->mac_addrs)) {
1241                                 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1242                                                 internals->slaves[i].port_id);
1243                                 return -1;
1244                         }
1245                 }
1246                 break;
1247         case BONDING_MODE_8023AD:
1248                 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1249                 break;
1250         case BONDING_MODE_ACTIVE_BACKUP:
1251         case BONDING_MODE_TLB:
1252         case BONDING_MODE_ALB:
1253         default:
1254                 for (i = 0; i < internals->slave_count; i++) {
1255                         if (internals->slaves[i].port_id ==
1256                                         internals->current_primary_port) {
1257                                 if (rte_eth_dev_default_mac_addr_set(
1258                                                 internals->primary_port,
1259                                                 bonded_eth_dev->data->mac_addrs)) {
1260                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1261                                                         internals->current_primary_port);
1262                                         return -1;
1263                                 }
1264                         } else {
1265                                 if (rte_eth_dev_default_mac_addr_set(
1266                                                 internals->slaves[i].port_id,
1267                                                 &internals->slaves[i].persisted_mac_addr)) {
1268                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1269                                                         internals->slaves[i].port_id);
1270                                         return -1;
1271                                 }
1272                         }
1273                 }
1274         }
1275
1276         return 0;
1277 }
1278
1279 int
1280 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1281 {
1282         struct bond_dev_private *internals;
1283
1284         internals = eth_dev->data->dev_private;
1285
1286         switch (mode) {
1287         case BONDING_MODE_ROUND_ROBIN:
1288                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1289                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1290                 break;
1291         case BONDING_MODE_ACTIVE_BACKUP:
1292                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1293                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1294                 break;
1295         case BONDING_MODE_BALANCE:
1296                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1297                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1298                 break;
1299         case BONDING_MODE_BROADCAST:
1300                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1301                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1302                 break;
1303         case BONDING_MODE_8023AD:
1304                 if (bond_mode_8023ad_enable(eth_dev) != 0)
1305                         return -1;
1306
1307                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1308                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1309                 RTE_LOG(WARNING, PMD,
1310                                 "Using mode 4, it is necessary to do TX burst and RX burst "
1311                                 "at least every 100ms.\n");
1312                 break;
1313         case BONDING_MODE_TLB:
1314                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1315                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1316                 break;
1317         case BONDING_MODE_ALB:
1318                 if (bond_mode_alb_enable(eth_dev) != 0)
1319                         return -1;
1320
1321                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1322                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1323                 break;
1324         default:
1325                 return -1;
1326         }
1327
1328         internals->mode = mode;
1329
1330         return 0;
1331 }
1332
1333 int
1334 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1335                 struct rte_eth_dev *slave_eth_dev)
1336 {
1337         struct bond_rx_queue *bd_rx_q;
1338         struct bond_tx_queue *bd_tx_q;
1339
1340         uint16_t old_nb_tx_queues = slave_eth_dev->data->nb_tx_queues;
1341         uint16_t old_nb_rx_queues = slave_eth_dev->data->nb_rx_queues;
1342         int errval;
1343         uint16_t q_id;
1344
1345         struct bond_dev_private *internals = (struct bond_dev_private *)
1346                 bonded_eth_dev->data->dev_private;
1347
1348         /* Stop slave */
1349         rte_eth_dev_stop(slave_eth_dev->data->port_id);
1350
1351         /* Enable interrupts on slave device if supported */
1352         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1353                 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1354
1355         /* If RSS is enabled for bonding, try to enable it for slaves  */
1356         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1357                 if (internals->rss_key_len != 0) {
1358                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1359                                         internals->rss_key_len;
1360                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1361                                         internals->rss_key;
1362                 } else {
1363                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1364                 }
1365
1366                 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1367                                 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1368                 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1369                                 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1370         }
1371
1372         slave_eth_dev->data->dev_conf.rxmode.hw_vlan_filter =
1373                         bonded_eth_dev->data->dev_conf.rxmode.hw_vlan_filter;
1374
1375         /* Configure device */
1376         errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1377                         bonded_eth_dev->data->nb_rx_queues,
1378                         bonded_eth_dev->data->nb_tx_queues,
1379                         &(slave_eth_dev->data->dev_conf));
1380         if (errval != 0) {
1381                 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u , err (%d)",
1382                                 slave_eth_dev->data->port_id, errval);
1383                 return errval;
1384         }
1385
1386         /* Setup Rx Queues */
1387         /* Use existing queues, if any */
1388         for (q_id = old_nb_rx_queues;
1389              q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1390                 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1391
1392                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1393                                 bd_rx_q->nb_rx_desc,
1394                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1395                                 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1396                 if (errval != 0) {
1397                         RTE_BOND_LOG(ERR,
1398                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1399                                         slave_eth_dev->data->port_id, q_id, errval);
1400                         return errval;
1401                 }
1402         }
1403
1404         /* Setup Tx Queues */
1405         /* Use existing queues, if any */
1406         for (q_id = old_nb_tx_queues;
1407              q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1408                 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1409
1410                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1411                                 bd_tx_q->nb_tx_desc,
1412                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1413                                 &bd_tx_q->tx_conf);
1414                 if (errval != 0) {
1415                         RTE_BOND_LOG(ERR,
1416                                         "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1417                                         slave_eth_dev->data->port_id, q_id, errval);
1418                         return errval;
1419                 }
1420         }
1421
1422         /* Start device */
1423         errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1424         if (errval != 0) {
1425                 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1426                                 slave_eth_dev->data->port_id, errval);
1427                 return -1;
1428         }
1429
1430         /* If RSS is enabled for bonding, synchronize RETA */
1431         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1432                 int i;
1433                 struct bond_dev_private *internals;
1434
1435                 internals = bonded_eth_dev->data->dev_private;
1436
1437                 for (i = 0; i < internals->slave_count; i++) {
1438                         if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1439                                 errval = rte_eth_dev_rss_reta_update(
1440                                                 slave_eth_dev->data->port_id,
1441                                                 &internals->reta_conf[0],
1442                                                 internals->slaves[i].reta_size);
1443                                 if (errval != 0) {
1444                                         RTE_LOG(WARNING, PMD,
1445                                                         "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1446                                                         " RSS Configuration for bonding may be inconsistent.\n",
1447                                                         slave_eth_dev->data->port_id, errval);
1448                                 }
1449                                 break;
1450                         }
1451                 }
1452         }
1453
1454         /* If lsc interrupt is set, check initial slave's link status */
1455         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1456                 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1457                 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1458                         RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id);
1459         }
1460
1461         return 0;
1462 }
1463
1464 void
1465 slave_remove(struct bond_dev_private *internals,
1466                 struct rte_eth_dev *slave_eth_dev)
1467 {
1468         uint8_t i;
1469
1470         for (i = 0; i < internals->slave_count; i++)
1471                 if (internals->slaves[i].port_id ==
1472                                 slave_eth_dev->data->port_id)
1473                         break;
1474
1475         if (i < (internals->slave_count - 1))
1476                 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1477                                 sizeof(internals->slaves[0]) *
1478                                 (internals->slave_count - i - 1));
1479
1480         internals->slave_count--;
1481 }
1482
1483 static void
1484 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1485
1486 void
1487 slave_add(struct bond_dev_private *internals,
1488                 struct rte_eth_dev *slave_eth_dev)
1489 {
1490         struct bond_slave_details *slave_details =
1491                         &internals->slaves[internals->slave_count];
1492
1493         slave_details->port_id = slave_eth_dev->data->port_id;
1494         slave_details->last_link_status = 0;
1495
1496         /* Mark slave devices that don't support interrupts so we can
1497          * compensate when we start the bond
1498          */
1499         if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1500                 slave_details->link_status_poll_enabled = 1;
1501         }
1502
1503         slave_details->link_status_wait_to_complete = 0;
1504         /* clean tlb_last_obytes when adding port for bonding device */
1505         memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1506                         sizeof(struct ether_addr));
1507 }
1508
1509 void
1510 bond_ethdev_primary_set(struct bond_dev_private *internals,
1511                 uint8_t slave_port_id)
1512 {
1513         int i;
1514
1515         if (internals->active_slave_count < 1)
1516                 internals->current_primary_port = slave_port_id;
1517         else
1518                 /* Search bonded device slave ports for new proposed primary port */
1519                 for (i = 0; i < internals->active_slave_count; i++) {
1520                         if (internals->active_slaves[i] == slave_port_id)
1521                                 internals->current_primary_port = slave_port_id;
1522                 }
1523 }
1524
1525 static void
1526 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
1527
1528 static int
1529 bond_ethdev_start(struct rte_eth_dev *eth_dev)
1530 {
1531         struct bond_dev_private *internals;
1532         int i;
1533
1534         /* slave eth dev will be started by bonded device */
1535         if (check_for_bonded_ethdev(eth_dev)) {
1536                 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
1537                                 eth_dev->data->port_id);
1538                 return -1;
1539         }
1540
1541         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
1542         eth_dev->data->dev_started = 1;
1543
1544         internals = eth_dev->data->dev_private;
1545
1546         if (internals->slave_count == 0) {
1547                 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
1548                 goto out_err;
1549         }
1550
1551         if (internals->user_defined_mac == 0) {
1552                 struct ether_addr *new_mac_addr = NULL;
1553
1554                 for (i = 0; i < internals->slave_count; i++)
1555                         if (internals->slaves[i].port_id == internals->primary_port)
1556                                 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
1557
1558                 if (new_mac_addr == NULL)
1559                         goto out_err;
1560
1561                 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
1562                         RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
1563                                         eth_dev->data->port_id);
1564                         goto out_err;
1565                 }
1566         }
1567
1568         /* If bonded device is configure in promiscuous mode then re-apply config */
1569         if (internals->promiscuous_en)
1570                 bond_ethdev_promiscuous_enable(eth_dev);
1571
1572         /* Reconfigure each slave device if starting bonded device */
1573         for (i = 0; i < internals->slave_count; i++) {
1574                 if (slave_configure(eth_dev,
1575                                 &(rte_eth_devices[internals->slaves[i].port_id])) != 0) {
1576                         RTE_BOND_LOG(ERR,
1577                                         "bonded port (%d) failed to reconfigure slave device (%d)",
1578                                         eth_dev->data->port_id, internals->slaves[i].port_id);
1579                         goto out_err;
1580                 }
1581                 /* We will need to poll for link status if any slave doesn't
1582                  * support interrupts
1583                  */
1584                 if (internals->slaves[i].link_status_poll_enabled)
1585                         internals->link_status_polling_enabled = 1;
1586         }
1587
1588         /* start polling if needed */
1589         if (internals->link_status_polling_enabled) {
1590                 rte_eal_alarm_set(
1591                         internals->link_status_polling_interval_ms * 1000,
1592                         bond_ethdev_slave_link_status_change_monitor,
1593                         (void *)&rte_eth_devices[internals->port_id]);
1594         }
1595
1596         /* Update all slave devices MACs*/
1597         if (mac_address_slaves_update(eth_dev) != 0)
1598                 goto out_err;
1599
1600         if (internals->user_defined_primary_port)
1601                 bond_ethdev_primary_set(internals, internals->primary_port);
1602
1603         if (internals->mode == BONDING_MODE_8023AD)
1604                 bond_mode_8023ad_start(eth_dev);
1605
1606         if (internals->mode == BONDING_MODE_TLB ||
1607                         internals->mode == BONDING_MODE_ALB)
1608                 bond_tlb_enable(internals);
1609
1610         return 0;
1611
1612 out_err:
1613         eth_dev->data->dev_started = 0;
1614         return -1;
1615 }
1616
1617 static void
1618 bond_ethdev_free_queues(struct rte_eth_dev *dev)
1619 {
1620         uint8_t i;
1621
1622         if (dev->data->rx_queues != NULL) {
1623                 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1624                         rte_free(dev->data->rx_queues[i]);
1625                         dev->data->rx_queues[i] = NULL;
1626                 }
1627                 dev->data->nb_rx_queues = 0;
1628         }
1629
1630         if (dev->data->tx_queues != NULL) {
1631                 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1632                         rte_free(dev->data->tx_queues[i]);
1633                         dev->data->tx_queues[i] = NULL;
1634                 }
1635                 dev->data->nb_tx_queues = 0;
1636         }
1637 }
1638
1639 void
1640 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
1641 {
1642         struct bond_dev_private *internals = eth_dev->data->dev_private;
1643         uint8_t i;
1644
1645         if (internals->mode == BONDING_MODE_8023AD) {
1646                 struct port *port;
1647                 void *pkt = NULL;
1648
1649                 bond_mode_8023ad_stop(eth_dev);
1650
1651                 /* Discard all messages to/from mode 4 state machines */
1652                 for (i = 0; i < internals->active_slave_count; i++) {
1653                         port = &mode_8023ad_ports[internals->active_slaves[i]];
1654
1655                         RTE_ASSERT(port->rx_ring != NULL);
1656                         while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
1657                                 rte_pktmbuf_free(pkt);
1658
1659                         RTE_ASSERT(port->tx_ring != NULL);
1660                         while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
1661                                 rte_pktmbuf_free(pkt);
1662                 }
1663         }
1664
1665         if (internals->mode == BONDING_MODE_TLB ||
1666                         internals->mode == BONDING_MODE_ALB) {
1667                 bond_tlb_disable(internals);
1668                 for (i = 0; i < internals->active_slave_count; i++)
1669                         tlb_last_obytets[internals->active_slaves[i]] = 0;
1670         }
1671
1672         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
1673         eth_dev->data->dev_started = 0;
1674
1675         internals->link_status_polling_enabled = 0;
1676         for (i = 0; i < internals->slave_count; i++) {
1677                 uint16_t slave_id = internals->slaves[i].port_id;
1678                 if (find_slave_by_id(internals->active_slaves,
1679                                 internals->active_slave_count, slave_id) !=
1680                                                 internals->active_slave_count) {
1681                         internals->slaves[i].last_link_status = 0;
1682                         rte_eth_dev_stop(slave_id);
1683                         deactivate_slave(eth_dev, slave_id);
1684                 }
1685         }
1686 }
1687
1688 void
1689 bond_ethdev_close(struct rte_eth_dev *dev)
1690 {
1691         struct bond_dev_private *internals = dev->data->dev_private;
1692
1693         bond_ethdev_free_queues(dev);
1694         rte_bitmap_reset(internals->vlan_filter_bmp);
1695 }
1696
1697 /* forward declaration */
1698 static int bond_ethdev_configure(struct rte_eth_dev *dev);
1699
1700 static void
1701 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
1702 {
1703         struct bond_dev_private *internals = dev->data->dev_private;
1704         uint16_t max_nb_rx_queues = UINT16_MAX;
1705         uint16_t max_nb_tx_queues = UINT16_MAX;
1706
1707         dev_info->max_mac_addrs = 1;
1708
1709         dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen
1710                                   ? internals->candidate_max_rx_pktlen
1711                                   : ETHER_MAX_JUMBO_FRAME_LEN;
1712
1713         if (internals->slave_count > 0) {
1714                 /* Max number of tx/rx queues that the bonded device can
1715                  * support is the minimum values of the bonded slaves, as
1716                  * all slaves must be capable of supporting the same number
1717                  * of tx/rx queues.
1718                  */
1719                 struct rte_eth_dev_info slave_info;
1720                 uint8_t idx;
1721
1722                 for (idx = 0; idx < internals->slave_count; idx++) {
1723                         rte_eth_dev_info_get(internals->slaves[idx].port_id,
1724                                         &slave_info);
1725
1726                         if (slave_info.max_rx_queues < max_nb_rx_queues)
1727                                 max_nb_rx_queues = slave_info.max_rx_queues;
1728
1729                         if (slave_info.max_tx_queues < max_nb_tx_queues)
1730                                 max_nb_tx_queues = slave_info.max_tx_queues;
1731                 }
1732         }
1733
1734         dev_info->max_rx_queues = max_nb_rx_queues;
1735         dev_info->max_tx_queues = max_nb_tx_queues;
1736
1737         dev_info->min_rx_bufsize = 0;
1738         dev_info->pci_dev = NULL;
1739
1740         dev_info->rx_offload_capa = internals->rx_offload_capa;
1741         dev_info->tx_offload_capa = internals->tx_offload_capa;
1742         dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
1743
1744         dev_info->reta_size = internals->reta_size;
1745 }
1746
1747 static int
1748 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
1749 {
1750         int res;
1751         uint8_t i;
1752         struct bond_dev_private *internals = dev->data->dev_private;
1753
1754         /* don't do this while a slave is being added */
1755         rte_spinlock_lock(&internals->lock);
1756
1757         if (on)
1758                 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
1759         else
1760                 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
1761
1762         for (i = 0; i < internals->slave_count; i++) {
1763                 uint8_t port_id = internals->slaves[i].port_id;
1764
1765                 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
1766                 if (res == ENOTSUP)
1767                         RTE_LOG(WARNING, PMD,
1768                                 "Setting VLAN filter on slave port %u not supported.\n",
1769                                 port_id);
1770         }
1771
1772         rte_spinlock_unlock(&internals->lock);
1773         return 0;
1774 }
1775
1776 static int
1777 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
1778                 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
1779                 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
1780 {
1781         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
1782                         rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
1783                                         0, dev->data->numa_node);
1784         if (bd_rx_q == NULL)
1785                 return -1;
1786
1787         bd_rx_q->queue_id = rx_queue_id;
1788         bd_rx_q->dev_private = dev->data->dev_private;
1789
1790         bd_rx_q->nb_rx_desc = nb_rx_desc;
1791
1792         memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
1793         bd_rx_q->mb_pool = mb_pool;
1794
1795         dev->data->rx_queues[rx_queue_id] = bd_rx_q;
1796
1797         return 0;
1798 }
1799
1800 static int
1801 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
1802                 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
1803                 const struct rte_eth_txconf *tx_conf)
1804 {
1805         struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
1806                         rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
1807                                         0, dev->data->numa_node);
1808
1809         if (bd_tx_q == NULL)
1810                 return -1;
1811
1812         bd_tx_q->queue_id = tx_queue_id;
1813         bd_tx_q->dev_private = dev->data->dev_private;
1814
1815         bd_tx_q->nb_tx_desc = nb_tx_desc;
1816         memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
1817
1818         dev->data->tx_queues[tx_queue_id] = bd_tx_q;
1819
1820         return 0;
1821 }
1822
1823 static void
1824 bond_ethdev_rx_queue_release(void *queue)
1825 {
1826         if (queue == NULL)
1827                 return;
1828
1829         rte_free(queue);
1830 }
1831
1832 static void
1833 bond_ethdev_tx_queue_release(void *queue)
1834 {
1835         if (queue == NULL)
1836                 return;
1837
1838         rte_free(queue);
1839 }
1840
1841 static void
1842 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
1843 {
1844         struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
1845         struct bond_dev_private *internals;
1846
1847         /* Default value for polling slave found is true as we don't want to
1848          * disable the polling thread if we cannot get the lock */
1849         int i, polling_slave_found = 1;
1850
1851         if (cb_arg == NULL)
1852                 return;
1853
1854         bonded_ethdev = (struct rte_eth_dev *)cb_arg;
1855         internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
1856
1857         if (!bonded_ethdev->data->dev_started ||
1858                 !internals->link_status_polling_enabled)
1859                 return;
1860
1861         /* If device is currently being configured then don't check slaves link
1862          * status, wait until next period */
1863         if (rte_spinlock_trylock(&internals->lock)) {
1864                 if (internals->slave_count > 0)
1865                         polling_slave_found = 0;
1866
1867                 for (i = 0; i < internals->slave_count; i++) {
1868                         if (!internals->slaves[i].link_status_poll_enabled)
1869                                 continue;
1870
1871                         slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
1872                         polling_slave_found = 1;
1873
1874                         /* Update slave link status */
1875                         (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
1876                                         internals->slaves[i].link_status_wait_to_complete);
1877
1878                         /* if link status has changed since last checked then call lsc
1879                          * event callback */
1880                         if (slave_ethdev->data->dev_link.link_status !=
1881                                         internals->slaves[i].last_link_status) {
1882                                 internals->slaves[i].last_link_status =
1883                                                 slave_ethdev->data->dev_link.link_status;
1884
1885                                 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
1886                                                 RTE_ETH_EVENT_INTR_LSC,
1887                                                 &bonded_ethdev->data->port_id);
1888                         }
1889                 }
1890                 rte_spinlock_unlock(&internals->lock);
1891         }
1892
1893         if (polling_slave_found)
1894                 /* Set alarm to continue monitoring link status of slave ethdev's */
1895                 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
1896                                 bond_ethdev_slave_link_status_change_monitor, cb_arg);
1897 }
1898
1899 static int
1900 bond_ethdev_link_update(struct rte_eth_dev *bonded_eth_dev,
1901                 int wait_to_complete)
1902 {
1903         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1904
1905         if (!bonded_eth_dev->data->dev_started ||
1906                 internals->active_slave_count == 0) {
1907                 bonded_eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
1908                 return 0;
1909         } else {
1910                 struct rte_eth_dev *slave_eth_dev;
1911                 int i, link_up = 0;
1912
1913                 for (i = 0; i < internals->active_slave_count; i++) {
1914                         slave_eth_dev = &rte_eth_devices[internals->active_slaves[i]];
1915
1916                         (*slave_eth_dev->dev_ops->link_update)(slave_eth_dev,
1917                                         wait_to_complete);
1918                         if (slave_eth_dev->data->dev_link.link_status == ETH_LINK_UP) {
1919                                 link_up = 1;
1920                                 break;
1921                         }
1922                 }
1923
1924                 bonded_eth_dev->data->dev_link.link_status = link_up;
1925         }
1926
1927         return 0;
1928 }
1929
1930 static void
1931 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
1932 {
1933         struct bond_dev_private *internals = dev->data->dev_private;
1934         struct rte_eth_stats slave_stats;
1935         int i, j;
1936
1937         for (i = 0; i < internals->slave_count; i++) {
1938                 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
1939
1940                 stats->ipackets += slave_stats.ipackets;
1941                 stats->opackets += slave_stats.opackets;
1942                 stats->ibytes += slave_stats.ibytes;
1943                 stats->obytes += slave_stats.obytes;
1944                 stats->imissed += slave_stats.imissed;
1945                 stats->ierrors += slave_stats.ierrors;
1946                 stats->oerrors += slave_stats.oerrors;
1947                 stats->rx_nombuf += slave_stats.rx_nombuf;
1948
1949                 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
1950                         stats->q_ipackets[j] += slave_stats.q_ipackets[j];
1951                         stats->q_opackets[j] += slave_stats.q_opackets[j];
1952                         stats->q_ibytes[j] += slave_stats.q_ibytes[j];
1953                         stats->q_obytes[j] += slave_stats.q_obytes[j];
1954                         stats->q_errors[j] += slave_stats.q_errors[j];
1955                 }
1956
1957         }
1958 }
1959
1960 static void
1961 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
1962 {
1963         struct bond_dev_private *internals = dev->data->dev_private;
1964         int i;
1965
1966         for (i = 0; i < internals->slave_count; i++)
1967                 rte_eth_stats_reset(internals->slaves[i].port_id);
1968 }
1969
1970 static void
1971 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
1972 {
1973         struct bond_dev_private *internals = eth_dev->data->dev_private;
1974         int i;
1975
1976         internals->promiscuous_en = 1;
1977
1978         switch (internals->mode) {
1979         /* Promiscuous mode is propagated to all slaves */
1980         case BONDING_MODE_ROUND_ROBIN:
1981         case BONDING_MODE_BALANCE:
1982         case BONDING_MODE_BROADCAST:
1983                 for (i = 0; i < internals->slave_count; i++)
1984                         rte_eth_promiscuous_enable(internals->slaves[i].port_id);
1985                 break;
1986         /* In mode4 promiscus mode is managed when slave is added/removed */
1987         case BONDING_MODE_8023AD:
1988                 break;
1989         /* Promiscuous mode is propagated only to primary slave */
1990         case BONDING_MODE_ACTIVE_BACKUP:
1991         case BONDING_MODE_TLB:
1992         case BONDING_MODE_ALB:
1993         default:
1994                 rte_eth_promiscuous_enable(internals->current_primary_port);
1995         }
1996 }
1997
1998 static void
1999 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2000 {
2001         struct bond_dev_private *internals = dev->data->dev_private;
2002         int i;
2003
2004         internals->promiscuous_en = 0;
2005
2006         switch (internals->mode) {
2007         /* Promiscuous mode is propagated to all slaves */
2008         case BONDING_MODE_ROUND_ROBIN:
2009         case BONDING_MODE_BALANCE:
2010         case BONDING_MODE_BROADCAST:
2011                 for (i = 0; i < internals->slave_count; i++)
2012                         rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2013                 break;
2014         /* In mode4 promiscus mode is set managed when slave is added/removed */
2015         case BONDING_MODE_8023AD:
2016                 break;
2017         /* Promiscuous mode is propagated only to primary slave */
2018         case BONDING_MODE_ACTIVE_BACKUP:
2019         case BONDING_MODE_TLB:
2020         case BONDING_MODE_ALB:
2021         default:
2022                 rte_eth_promiscuous_disable(internals->current_primary_port);
2023         }
2024 }
2025
2026 static void
2027 bond_ethdev_delayed_lsc_propagation(void *arg)
2028 {
2029         if (arg == NULL)
2030                 return;
2031
2032         _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2033                         RTE_ETH_EVENT_INTR_LSC, NULL);
2034 }
2035
2036 void
2037 bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type,
2038                 void *param)
2039 {
2040         struct rte_eth_dev *bonded_eth_dev, *slave_eth_dev;
2041         struct bond_dev_private *internals;
2042         struct rte_eth_link link;
2043
2044         int i, valid_slave = 0;
2045         uint8_t active_pos;
2046         uint8_t lsc_flag = 0;
2047
2048         if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2049                 return;
2050
2051         bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
2052         slave_eth_dev = &rte_eth_devices[port_id];
2053
2054         if (check_for_bonded_ethdev(bonded_eth_dev))
2055                 return;
2056
2057         internals = bonded_eth_dev->data->dev_private;
2058
2059         /* If the device isn't started don't handle interrupts */
2060         if (!bonded_eth_dev->data->dev_started)
2061                 return;
2062
2063         /* verify that port_id is a valid slave of bonded port */
2064         for (i = 0; i < internals->slave_count; i++) {
2065                 if (internals->slaves[i].port_id == port_id) {
2066                         valid_slave = 1;
2067                         break;
2068                 }
2069         }
2070
2071         if (!valid_slave)
2072                 return;
2073
2074         /* Search for port in active port list */
2075         active_pos = find_slave_by_id(internals->active_slaves,
2076                         internals->active_slave_count, port_id);
2077
2078         rte_eth_link_get_nowait(port_id, &link);
2079         if (link.link_status) {
2080                 if (active_pos < internals->active_slave_count)
2081                         return;
2082
2083                 /* if no active slave ports then set this port to be primary port */
2084                 if (internals->active_slave_count < 1) {
2085                         /* If first active slave, then change link status */
2086                         bonded_eth_dev->data->dev_link.link_status = ETH_LINK_UP;
2087                         internals->current_primary_port = port_id;
2088                         lsc_flag = 1;
2089
2090                         mac_address_slaves_update(bonded_eth_dev);
2091
2092                         /* Inherit eth dev link properties from first active slave */
2093                         link_properties_set(bonded_eth_dev,
2094                                         &(slave_eth_dev->data->dev_link));
2095                 } else {
2096                         if (link_properties_valid(
2097                                 &bonded_eth_dev->data->dev_link, &link) != 0) {
2098                                 slave_eth_dev->data->dev_flags &=
2099                                         (~RTE_ETH_DEV_BONDED_SLAVE);
2100                                 RTE_LOG(ERR, PMD,
2101                                         "port %u invalid speed/duplex\n",
2102                                         port_id);
2103                                 return;
2104                         }
2105                 }
2106
2107                 activate_slave(bonded_eth_dev, port_id);
2108
2109                 /* If user has defined the primary port then default to using it */
2110                 if (internals->user_defined_primary_port &&
2111                                 internals->primary_port == port_id)
2112                         bond_ethdev_primary_set(internals, port_id);
2113         } else {
2114                 if (active_pos == internals->active_slave_count)
2115                         return;
2116
2117                 /* Remove from active slave list */
2118                 deactivate_slave(bonded_eth_dev, port_id);
2119
2120                 /* No active slaves, change link status to down and reset other
2121                  * link properties */
2122                 if (internals->active_slave_count < 1) {
2123                         lsc_flag = 1;
2124                         bonded_eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2125
2126                         link_properties_reset(bonded_eth_dev);
2127                 }
2128
2129                 /* Update primary id, take first active slave from list or if none
2130                  * available set to -1 */
2131                 if (port_id == internals->current_primary_port) {
2132                         if (internals->active_slave_count > 0)
2133                                 bond_ethdev_primary_set(internals,
2134                                                 internals->active_slaves[0]);
2135                         else
2136                                 internals->current_primary_port = internals->primary_port;
2137                 }
2138         }
2139
2140         if (lsc_flag) {
2141                 /* Cancel any possible outstanding interrupts if delays are enabled */
2142                 if (internals->link_up_delay_ms > 0 ||
2143                         internals->link_down_delay_ms > 0)
2144                         rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2145                                         bonded_eth_dev);
2146
2147                 if (bonded_eth_dev->data->dev_link.link_status) {
2148                         if (internals->link_up_delay_ms > 0)
2149                                 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2150                                                 bond_ethdev_delayed_lsc_propagation,
2151                                                 (void *)bonded_eth_dev);
2152                         else
2153                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2154                                                 RTE_ETH_EVENT_INTR_LSC, NULL);
2155
2156                 } else {
2157                         if (internals->link_down_delay_ms > 0)
2158                                 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2159                                                 bond_ethdev_delayed_lsc_propagation,
2160                                                 (void *)bonded_eth_dev);
2161                         else
2162                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2163                                                 RTE_ETH_EVENT_INTR_LSC, NULL);
2164                 }
2165         }
2166 }
2167
2168 static int
2169 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2170                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2171 {
2172         unsigned i, j;
2173         int result = 0;
2174         int slave_reta_size;
2175         unsigned reta_count;
2176         struct bond_dev_private *internals = dev->data->dev_private;
2177
2178         if (reta_size != internals->reta_size)
2179                 return -EINVAL;
2180
2181          /* Copy RETA table */
2182         reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2183
2184         for (i = 0; i < reta_count; i++) {
2185                 internals->reta_conf[i].mask = reta_conf[i].mask;
2186                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2187                         if ((reta_conf[i].mask >> j) & 0x01)
2188                                 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2189         }
2190
2191         /* Fill rest of array */
2192         for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2193                 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2194                                 sizeof(internals->reta_conf[0]) * reta_count);
2195
2196         /* Propagate RETA over slaves */
2197         for (i = 0; i < internals->slave_count; i++) {
2198                 slave_reta_size = internals->slaves[i].reta_size;
2199                 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2200                                 &internals->reta_conf[0], slave_reta_size);
2201                 if (result < 0)
2202                         return result;
2203         }
2204
2205         return 0;
2206 }
2207
2208 static int
2209 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2210                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2211 {
2212         int i, j;
2213         struct bond_dev_private *internals = dev->data->dev_private;
2214
2215         if (reta_size != internals->reta_size)
2216                 return -EINVAL;
2217
2218          /* Copy RETA table */
2219         for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2220                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2221                         if ((reta_conf[i].mask >> j) & 0x01)
2222                                 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2223
2224         return 0;
2225 }
2226
2227 static int
2228 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2229                 struct rte_eth_rss_conf *rss_conf)
2230 {
2231         int i, result = 0;
2232         struct bond_dev_private *internals = dev->data->dev_private;
2233         struct rte_eth_rss_conf bond_rss_conf;
2234
2235         memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2236
2237         bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2238
2239         if (bond_rss_conf.rss_hf != 0)
2240                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2241
2242         if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2243                         sizeof(internals->rss_key)) {
2244                 if (bond_rss_conf.rss_key_len == 0)
2245                         bond_rss_conf.rss_key_len = 40;
2246                 internals->rss_key_len = bond_rss_conf.rss_key_len;
2247                 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2248                                 internals->rss_key_len);
2249         }
2250
2251         for (i = 0; i < internals->slave_count; i++) {
2252                 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2253                                 &bond_rss_conf);
2254                 if (result < 0)
2255                         return result;
2256         }
2257
2258         return 0;
2259 }
2260
2261 static int
2262 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2263                 struct rte_eth_rss_conf *rss_conf)
2264 {
2265         struct bond_dev_private *internals = dev->data->dev_private;
2266
2267         rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2268         rss_conf->rss_key_len = internals->rss_key_len;
2269         if (rss_conf->rss_key)
2270                 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2271
2272         return 0;
2273 }
2274
2275 const struct eth_dev_ops default_dev_ops = {
2276         .dev_start            = bond_ethdev_start,
2277         .dev_stop             = bond_ethdev_stop,
2278         .dev_close            = bond_ethdev_close,
2279         .dev_configure        = bond_ethdev_configure,
2280         .dev_infos_get        = bond_ethdev_info,
2281         .vlan_filter_set      = bond_ethdev_vlan_filter_set,
2282         .rx_queue_setup       = bond_ethdev_rx_queue_setup,
2283         .tx_queue_setup       = bond_ethdev_tx_queue_setup,
2284         .rx_queue_release     = bond_ethdev_rx_queue_release,
2285         .tx_queue_release     = bond_ethdev_tx_queue_release,
2286         .link_update          = bond_ethdev_link_update,
2287         .stats_get            = bond_ethdev_stats_get,
2288         .stats_reset          = bond_ethdev_stats_reset,
2289         .promiscuous_enable   = bond_ethdev_promiscuous_enable,
2290         .promiscuous_disable  = bond_ethdev_promiscuous_disable,
2291         .reta_update          = bond_ethdev_rss_reta_update,
2292         .reta_query           = bond_ethdev_rss_reta_query,
2293         .rss_hash_update      = bond_ethdev_rss_hash_update,
2294         .rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get
2295 };
2296
2297 static int
2298 bond_probe(const char *name, const char *params)
2299 {
2300         struct bond_dev_private *internals;
2301         struct rte_kvargs *kvlist;
2302         uint8_t bonding_mode, socket_id;
2303         int  arg_count, port_id;
2304
2305         RTE_LOG(INFO, EAL, "Initializing pmd_bond for %s\n", name);
2306
2307         kvlist = rte_kvargs_parse(params, pmd_bond_init_valid_arguments);
2308         if (kvlist == NULL)
2309                 return -1;
2310
2311         /* Parse link bonding mode */
2312         if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
2313                 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
2314                                 &bond_ethdev_parse_slave_mode_kvarg,
2315                                 &bonding_mode) != 0) {
2316                         RTE_LOG(ERR, EAL, "Invalid mode for bonded device %s\n",
2317                                         name);
2318                         goto parse_error;
2319                 }
2320         } else {
2321                 RTE_LOG(ERR, EAL, "Mode must be specified only once for bonded "
2322                                 "device %s\n", name);
2323                 goto parse_error;
2324         }
2325
2326         /* Parse socket id to create bonding device on */
2327         arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
2328         if (arg_count == 1) {
2329                 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
2330                                 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
2331                                 != 0) {
2332                         RTE_LOG(ERR, EAL, "Invalid socket Id specified for "
2333                                         "bonded device %s\n", name);
2334                         goto parse_error;
2335                 }
2336         } else if (arg_count > 1) {
2337                 RTE_LOG(ERR, EAL, "Socket Id can be specified only once for "
2338                                 "bonded device %s\n", name);
2339                 goto parse_error;
2340         } else {
2341                 socket_id = rte_socket_id();
2342         }
2343
2344         /* Create link bonding eth device */
2345         port_id = rte_eth_bond_create(name, bonding_mode, socket_id);
2346         if (port_id < 0) {
2347                 RTE_LOG(ERR, EAL, "Failed to create socket %s in mode %u on "
2348                                 "socket %u.\n", name, bonding_mode, socket_id);
2349                 goto parse_error;
2350         }
2351         internals = rte_eth_devices[port_id].data->dev_private;
2352         internals->kvlist = kvlist;
2353
2354         RTE_LOG(INFO, EAL, "Create bonded device %s on port %d in mode %u on "
2355                         "socket %u.\n", name, port_id, bonding_mode, socket_id);
2356         return 0;
2357
2358 parse_error:
2359         rte_kvargs_free(kvlist);
2360
2361         return -1;
2362 }
2363
2364 static int
2365 bond_remove(const char *name)
2366 {
2367         int  ret;
2368
2369         if (name == NULL)
2370                 return -EINVAL;
2371
2372         RTE_LOG(INFO, EAL, "Uninitializing pmd_bond for %s\n", name);
2373
2374         /* free link bonding eth device */
2375         ret = rte_eth_bond_free(name);
2376         if (ret < 0)
2377                 RTE_LOG(ERR, EAL, "Failed to free %s\n", name);
2378
2379         return ret;
2380 }
2381
2382 /* this part will resolve the slave portids after all the other pdev and vdev
2383  * have been allocated */
2384 static int
2385 bond_ethdev_configure(struct rte_eth_dev *dev)
2386 {
2387         char *name = dev->data->name;
2388         struct bond_dev_private *internals = dev->data->dev_private;
2389         struct rte_kvargs *kvlist = internals->kvlist;
2390         int arg_count;
2391         uint8_t port_id = dev - rte_eth_devices;
2392
2393         static const uint8_t default_rss_key[40] = {
2394                 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
2395                 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
2396                 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
2397                 0xBE, 0xAC, 0x01, 0xFA
2398         };
2399
2400         unsigned i, j;
2401
2402         /*
2403          * If RSS is enabled, fill table with default values and
2404          * set key to the the value specified in port RSS configuration.
2405          * Fall back to default RSS key if the key is not specified
2406          */
2407         if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
2408                 if (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key != NULL) {
2409                         internals->rss_key_len =
2410                                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
2411                         memcpy(internals->rss_key,
2412                                dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key,
2413                                internals->rss_key_len);
2414                 } else {
2415                         internals->rss_key_len = sizeof(default_rss_key);
2416                         memcpy(internals->rss_key, default_rss_key,
2417                                internals->rss_key_len);
2418                 }
2419
2420                 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
2421                         internals->reta_conf[i].mask = ~0LL;
2422                         for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2423                                 internals->reta_conf[i].reta[j] =
2424                                                 (i * RTE_RETA_GROUP_SIZE + j) %
2425                                                 dev->data->nb_rx_queues;
2426                 }
2427         }
2428
2429         /* set the max_rx_pktlen */
2430         internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
2431
2432         /*
2433          * if no kvlist, it means that this bonded device has been created
2434          * through the bonding api.
2435          */
2436         if (!kvlist)
2437                 return 0;
2438
2439         /* Parse MAC address for bonded device */
2440         arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
2441         if (arg_count == 1) {
2442                 struct ether_addr bond_mac;
2443
2444                 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
2445                                 &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
2446                         RTE_LOG(INFO, EAL, "Invalid mac address for bonded device %s\n",
2447                                         name);
2448                         return -1;
2449                 }
2450
2451                 /* Set MAC address */
2452                 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
2453                         RTE_LOG(ERR, EAL,
2454                                         "Failed to set mac address on bonded device %s\n",
2455                                         name);
2456                         return -1;
2457                 }
2458         } else if (arg_count > 1) {
2459                 RTE_LOG(ERR, EAL,
2460                                 "MAC address can be specified only once for bonded device %s\n",
2461                                 name);
2462                 return -1;
2463         }
2464
2465         /* Parse/set balance mode transmit policy */
2466         arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
2467         if (arg_count == 1) {
2468                 uint8_t xmit_policy;
2469
2470                 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
2471                                 &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
2472                                                 0) {
2473                         RTE_LOG(INFO, EAL,
2474                                         "Invalid xmit policy specified for bonded device %s\n",
2475                                         name);
2476                         return -1;
2477                 }
2478
2479                 /* Set balance mode transmit policy*/
2480                 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
2481                         RTE_LOG(ERR, EAL,
2482                                         "Failed to set balance xmit policy on bonded device %s\n",
2483                                         name);
2484                         return -1;
2485                 }
2486         } else if (arg_count > 1) {
2487                 RTE_LOG(ERR, EAL,
2488                                 "Transmit policy can be specified only once for bonded device"
2489                                 " %s\n", name);
2490                 return -1;
2491         }
2492
2493         /* Parse/add slave ports to bonded device */
2494         if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
2495                 struct bond_ethdev_slave_ports slave_ports;
2496                 unsigned i;
2497
2498                 memset(&slave_ports, 0, sizeof(slave_ports));
2499
2500                 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
2501                                 &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
2502                         RTE_LOG(ERR, EAL,
2503                                         "Failed to parse slave ports for bonded device %s\n",
2504                                         name);
2505                         return -1;
2506                 }
2507
2508                 for (i = 0; i < slave_ports.slave_count; i++) {
2509                         if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
2510                                 RTE_LOG(ERR, EAL,
2511                                                 "Failed to add port %d as slave to bonded device %s\n",
2512                                                 slave_ports.slaves[i], name);
2513                         }
2514                 }
2515
2516         } else {
2517                 RTE_LOG(INFO, EAL, "No slaves specified for bonded device %s\n", name);
2518                 return -1;
2519         }
2520
2521         /* Parse/set primary slave port id*/
2522         arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
2523         if (arg_count == 1) {
2524                 uint8_t primary_slave_port_id;
2525
2526                 if (rte_kvargs_process(kvlist,
2527                                 PMD_BOND_PRIMARY_SLAVE_KVARG,
2528                                 &bond_ethdev_parse_primary_slave_port_id_kvarg,
2529                                 &primary_slave_port_id) < 0) {
2530                         RTE_LOG(INFO, EAL,
2531                                         "Invalid primary slave port id specified for bonded device"
2532                                         " %s\n", name);
2533                         return -1;
2534                 }
2535
2536                 /* Set balance mode transmit policy*/
2537                 if (rte_eth_bond_primary_set(port_id, (uint8_t)primary_slave_port_id)
2538                                 != 0) {
2539                         RTE_LOG(ERR, EAL,
2540                                         "Failed to set primary slave port %d on bonded device %s\n",
2541                                         primary_slave_port_id, name);
2542                         return -1;
2543                 }
2544         } else if (arg_count > 1) {
2545                 RTE_LOG(INFO, EAL,
2546                                 "Primary slave can be specified only once for bonded device"
2547                                 " %s\n", name);
2548                 return -1;
2549         }
2550
2551         /* Parse link status monitor polling interval */
2552         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
2553         if (arg_count == 1) {
2554                 uint32_t lsc_poll_interval_ms;
2555
2556                 if (rte_kvargs_process(kvlist,
2557                                 PMD_BOND_LSC_POLL_PERIOD_KVARG,
2558                                 &bond_ethdev_parse_time_ms_kvarg,
2559                                 &lsc_poll_interval_ms) < 0) {
2560                         RTE_LOG(INFO, EAL,
2561                                         "Invalid lsc polling interval value specified for bonded"
2562                                         " device %s\n", name);
2563                         return -1;
2564                 }
2565
2566                 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
2567                                 != 0) {
2568                         RTE_LOG(ERR, EAL,
2569                                         "Failed to set lsc monitor polling interval (%u ms) on"
2570                                         " bonded device %s\n", lsc_poll_interval_ms, name);
2571                         return -1;
2572                 }
2573         } else if (arg_count > 1) {
2574                 RTE_LOG(INFO, EAL,
2575                                 "LSC polling interval can be specified only once for bonded"
2576                                 " device %s\n", name);
2577                 return -1;
2578         }
2579
2580         /* Parse link up interrupt propagation delay */
2581         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
2582         if (arg_count == 1) {
2583                 uint32_t link_up_delay_ms;
2584
2585                 if (rte_kvargs_process(kvlist,
2586                                 PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
2587                                 &bond_ethdev_parse_time_ms_kvarg,
2588                                 &link_up_delay_ms) < 0) {
2589                         RTE_LOG(INFO, EAL,
2590                                         "Invalid link up propagation delay value specified for"
2591                                         " bonded device %s\n", name);
2592                         return -1;
2593                 }
2594
2595                 /* Set balance mode transmit policy*/
2596                 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
2597                                 != 0) {
2598                         RTE_LOG(ERR, EAL,
2599                                         "Failed to set link up propagation delay (%u ms) on bonded"
2600                                         " device %s\n", link_up_delay_ms, name);
2601                         return -1;
2602                 }
2603         } else if (arg_count > 1) {
2604                 RTE_LOG(INFO, EAL,
2605                                 "Link up propagation delay can be specified only once for"
2606                                 " bonded device %s\n", name);
2607                 return -1;
2608         }
2609
2610         /* Parse link down interrupt propagation delay */
2611         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
2612         if (arg_count == 1) {
2613                 uint32_t link_down_delay_ms;
2614
2615                 if (rte_kvargs_process(kvlist,
2616                                 PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
2617                                 &bond_ethdev_parse_time_ms_kvarg,
2618                                 &link_down_delay_ms) < 0) {
2619                         RTE_LOG(INFO, EAL,
2620                                         "Invalid link down propagation delay value specified for"
2621                                         " bonded device %s\n", name);
2622                         return -1;
2623                 }
2624
2625                 /* Set balance mode transmit policy*/
2626                 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
2627                                 != 0) {
2628                         RTE_LOG(ERR, EAL,
2629                                         "Failed to set link down propagation delay (%u ms) on"
2630                                         " bonded device %s\n", link_down_delay_ms, name);
2631                         return -1;
2632                 }
2633         } else if (arg_count > 1) {
2634                 RTE_LOG(INFO, EAL,
2635                                 "Link down propagation delay can be specified only once for"
2636                                 " bonded device %s\n", name);
2637                 return -1;
2638         }
2639
2640         return 0;
2641 }
2642
2643 static struct rte_vdev_driver bond_drv = {
2644         .probe = bond_probe,
2645         .remove = bond_remove,
2646 };
2647
2648 RTE_PMD_REGISTER_VDEV(net_bonding, bond_drv);
2649 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
2650
2651 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
2652         "slave=<ifc> "
2653         "primary=<ifc> "
2654         "mode=[0-6] "
2655         "xmit_policy=[l2 | l23 | l34] "
2656         "socket_id=<int> "
2657         "mac=<mac addr> "
2658         "lsc_poll_period_ms=<int> "
2659         "up_delay=<int> "
2660         "down_delay=<int>");