New upstream version 17.11.5
[deb_dpdk.git] / drivers / net / bonding / rte_eth_bond_pmd.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 #include <stdlib.h>
34 #include <netinet/in.h>
35
36 #include <rte_mbuf.h>
37 #include <rte_malloc.h>
38 #include <rte_ethdev.h>
39 #include <rte_ethdev_vdev.h>
40 #include <rte_tcp.h>
41 #include <rte_udp.h>
42 #include <rte_ip.h>
43 #include <rte_ip_frag.h>
44 #include <rte_devargs.h>
45 #include <rte_kvargs.h>
46 #include <rte_bus_vdev.h>
47 #include <rte_alarm.h>
48 #include <rte_cycles.h>
49
50 #include "rte_eth_bond.h"
51 #include "rte_eth_bond_private.h"
52 #include "rte_eth_bond_8023ad_private.h"
53
54 #define REORDER_PERIOD_MS 10
55 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
56
57 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
58
59 /* Table for statistics in mode 5 TLB */
60 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
61
62 static inline size_t
63 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
64 {
65         size_t vlan_offset = 0;
66
67         if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto ||
68                 rte_cpu_to_be_16(ETHER_TYPE_QINQ) == *proto) {
69                 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
70
71                 vlan_offset = sizeof(struct vlan_hdr);
72                 *proto = vlan_hdr->eth_proto;
73
74                 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
75                         vlan_hdr = vlan_hdr + 1;
76                         *proto = vlan_hdr->eth_proto;
77                         vlan_offset += sizeof(struct vlan_hdr);
78                 }
79         }
80         return vlan_offset;
81 }
82
83 static uint16_t
84 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
85 {
86         struct bond_dev_private *internals;
87
88         uint16_t num_rx_total = 0;
89         uint16_t slave_count;
90         uint16_t active_slave;
91         int i;
92
93         /* Cast to structure, containing bonded device's port id and queue id */
94         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
95         internals = bd_rx_q->dev_private;
96         slave_count = internals->active_slave_count;
97         active_slave = internals->active_slave;
98
99         for (i = 0; i < slave_count && nb_pkts; i++) {
100                 uint16_t num_rx_slave;
101
102                 /* Offset of pointer to *bufs increases as packets are received
103                  * from other slaves */
104                 num_rx_slave =
105                         rte_eth_rx_burst(internals->active_slaves[active_slave],
106                                          bd_rx_q->queue_id,
107                                          bufs + num_rx_total, nb_pkts);
108                 num_rx_total += num_rx_slave;
109                 nb_pkts -= num_rx_slave;
110                 if (++active_slave == slave_count)
111                         active_slave = 0;
112         }
113
114         if (++internals->active_slave == slave_count)
115                 internals->active_slave = 0;
116         return num_rx_total;
117 }
118
119 static uint16_t
120 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
121                 uint16_t nb_pkts)
122 {
123         struct bond_dev_private *internals;
124
125         /* Cast to structure, containing bonded device's port id and queue id */
126         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
127
128         internals = bd_rx_q->dev_private;
129
130         return rte_eth_rx_burst(internals->current_primary_port,
131                         bd_rx_q->queue_id, bufs, nb_pkts);
132 }
133
134 static inline uint8_t
135 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
136 {
137         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
138
139         return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
140                 (ethertype == ether_type_slow_be &&
141                 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
142 }
143
144 /*****************************************************************************
145  * Flow director's setup for mode 4 optimization
146  */
147
148 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
149         .dst.addr_bytes = { 0 },
150         .src.addr_bytes = { 0 },
151         .type = RTE_BE16(ETHER_TYPE_SLOW),
152 };
153
154 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
155         .dst.addr_bytes = { 0 },
156         .src.addr_bytes = { 0 },
157         .type = 0xFFFF,
158 };
159
160 static struct rte_flow_item flow_item_8023ad[] = {
161         {
162                 .type = RTE_FLOW_ITEM_TYPE_ETH,
163                 .spec = &flow_item_eth_type_8023ad,
164                 .last = NULL,
165                 .mask = &flow_item_eth_mask_type_8023ad,
166         },
167         {
168                 .type = RTE_FLOW_ITEM_TYPE_END,
169                 .spec = NULL,
170                 .last = NULL,
171                 .mask = NULL,
172         }
173 };
174
175 const struct rte_flow_attr flow_attr_8023ad = {
176         .group = 0,
177         .priority = 0,
178         .ingress = 1,
179         .egress = 0,
180         .reserved = 0,
181 };
182
183 int
184 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
185                 uint16_t slave_port) {
186         struct rte_eth_dev_info slave_info;
187         struct rte_flow_error error;
188         struct bond_dev_private *internals = (struct bond_dev_private *)
189                         (bond_dev->data->dev_private);
190
191         const struct rte_flow_action_queue lacp_queue_conf = {
192                 .index = 0,
193         };
194
195         const struct rte_flow_action actions[] = {
196                 {
197                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
198                         .conf = &lacp_queue_conf
199                 },
200                 {
201                         .type = RTE_FLOW_ACTION_TYPE_END,
202                 }
203         };
204
205         int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
206                         flow_item_8023ad, actions, &error);
207         if (ret < 0) {
208                 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
209                                 __func__, error.message, slave_port,
210                                 internals->mode4.dedicated_queues.rx_qid);
211                 return -1;
212         }
213
214         rte_eth_dev_info_get(slave_port, &slave_info);
215         if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
216                         slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
217                 RTE_BOND_LOG(ERR,
218                         "%s: Slave %d capabilities doesn't allow to allocate additional queues",
219                         __func__, slave_port);
220                 return -1;
221         }
222
223         return 0;
224 }
225
226 int
227 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
228         struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
229         struct bond_dev_private *internals = (struct bond_dev_private *)
230                         (bond_dev->data->dev_private);
231         struct rte_eth_dev_info bond_info;
232         uint16_t idx;
233
234         /* Verify if all slaves in bonding supports flow director and */
235         if (internals->slave_count > 0) {
236                 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
237
238                 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
239                 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
240
241                 for (idx = 0; idx < internals->slave_count; idx++) {
242                         if (bond_ethdev_8023ad_flow_verify(bond_dev,
243                                         internals->slaves[idx].port_id) != 0)
244                                 return -1;
245                 }
246         }
247
248         return 0;
249 }
250
251 int
252 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
253
254         struct rte_flow_error error;
255         struct bond_dev_private *internals = (struct bond_dev_private *)
256                         (bond_dev->data->dev_private);
257
258         struct rte_flow_action_queue lacp_queue_conf = {
259                 .index = internals->mode4.dedicated_queues.rx_qid,
260         };
261
262         const struct rte_flow_action actions[] = {
263                 {
264                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
265                         .conf = &lacp_queue_conf
266                 },
267                 {
268                         .type = RTE_FLOW_ACTION_TYPE_END,
269                 }
270         };
271
272         internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
273                         &flow_attr_8023ad, flow_item_8023ad, actions, &error);
274         if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
275                 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
276                                 "(slave_port=%d queue_id=%d)",
277                                 error.message, slave_port,
278                                 internals->mode4.dedicated_queues.rx_qid);
279                 return -1;
280         }
281
282         return 0;
283 }
284
285 static uint16_t
286 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
287                 uint16_t nb_pkts)
288 {
289         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
290         struct bond_dev_private *internals = bd_rx_q->dev_private;
291         uint16_t num_rx_total = 0;      /* Total number of received packets */
292         uint16_t slaves[RTE_MAX_ETHPORTS];
293         uint16_t slave_count;
294         uint16_t active_slave;
295         uint16_t i;
296
297         /* Copy slave list to protect against slave up/down changes during tx
298          * bursting */
299         slave_count = internals->active_slave_count;
300         active_slave = internals->active_slave;
301         memcpy(slaves, internals->active_slaves,
302                         sizeof(internals->active_slaves[0]) * slave_count);
303
304         for (i = 0; i < slave_count && nb_pkts; i++) {
305                 uint16_t num_rx_slave;
306
307                 /* Read packets from this slave */
308                 num_rx_slave = rte_eth_rx_burst(slaves[active_slave],
309                                                 bd_rx_q->queue_id,
310                                                 bufs + num_rx_total, nb_pkts);
311                 num_rx_total += num_rx_slave;
312                 nb_pkts -= num_rx_slave;
313
314                 if (++active_slave == slave_count)
315                         active_slave = 0;
316         }
317
318         if (++internals->active_slave == slave_count)
319                 internals->active_slave = 0;
320
321         return num_rx_total;
322 }
323
324 static uint16_t
325 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
326                 uint16_t nb_pkts)
327 {
328         struct bond_dev_private *internals;
329         struct bond_tx_queue *bd_tx_q;
330
331         uint16_t num_of_slaves;
332         uint16_t slaves[RTE_MAX_ETHPORTS];
333          /* positions in slaves, not ID */
334         uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
335         uint8_t distributing_count;
336
337         uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
338         uint16_t i, op_slave_idx;
339
340         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
341
342         /* Total amount of packets in slave_bufs */
343         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
344         /* Slow packets placed in each slave */
345
346         if (unlikely(nb_pkts == 0))
347                 return 0;
348
349         bd_tx_q = (struct bond_tx_queue *)queue;
350         internals = bd_tx_q->dev_private;
351
352         /* Copy slave list to protect against slave up/down changes during tx
353          * bursting */
354         num_of_slaves = internals->active_slave_count;
355         if (num_of_slaves < 1)
356                 return num_tx_total;
357
358         memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) *
359                         num_of_slaves);
360
361         distributing_count = 0;
362         for (i = 0; i < num_of_slaves; i++) {
363                 struct port *port = &mode_8023ad_ports[slaves[i]];
364                 if (ACTOR_STATE(port, DISTRIBUTING))
365                         distributing_offsets[distributing_count++] = i;
366         }
367
368         if (likely(distributing_count > 0)) {
369                 /* Populate slaves mbuf with the packets which are to be sent */
370                 for (i = 0; i < nb_pkts; i++) {
371                         /* Select output slave using hash based on xmit policy */
372                         op_slave_idx = internals->xmit_hash(bufs[i],
373                                         distributing_count);
374
375                         /* Populate slave mbuf arrays with mbufs for that slave.
376                          * Use only slaves that are currently distributing.
377                          */
378                         uint8_t slave_offset =
379                                         distributing_offsets[op_slave_idx];
380                         slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] =
381                                         bufs[i];
382                         slave_nb_pkts[slave_offset]++;
383                 }
384         }
385
386         /* Send packet burst on each slave device */
387         for (i = 0; i < num_of_slaves; i++) {
388                 if (slave_nb_pkts[i] == 0)
389                         continue;
390
391                 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
392                                 slave_bufs[i], slave_nb_pkts[i]);
393
394                 num_tx_total += num_tx_slave;
395                 num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
396
397                 /* If tx burst fails move packets to end of bufs */
398                 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
399                         uint16_t j = nb_pkts - num_tx_fail_total;
400                         for ( ; num_tx_slave < slave_nb_pkts[i]; j++,
401                                         num_tx_slave++)
402                                 bufs[j] = slave_bufs[i][num_tx_slave];
403                 }
404         }
405
406         return num_tx_total;
407 }
408
409
410 static uint16_t
411 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
412                 uint16_t nb_pkts)
413 {
414         /* Cast to structure, containing bonded device's port id and queue id */
415         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
416         struct bond_dev_private *internals = bd_rx_q->dev_private;
417         struct ether_addr bond_mac;
418
419         struct ether_hdr *hdr;
420
421         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
422         uint16_t num_rx_total = 0;      /* Total number of received packets */
423         uint16_t slaves[RTE_MAX_ETHPORTS];
424         uint16_t slave_count, idx;
425
426         uint8_t collecting;  /* current slave collecting status */
427         const uint8_t promisc = internals->promiscuous_en;
428         uint8_t i, j, k;
429         uint8_t subtype;
430
431         rte_eth_macaddr_get(internals->port_id, &bond_mac);
432         /* Copy slave list to protect against slave up/down changes during tx
433          * bursting */
434         slave_count = internals->active_slave_count;
435         memcpy(slaves, internals->active_slaves,
436                         sizeof(internals->active_slaves[0]) * slave_count);
437
438         idx = internals->active_slave;
439         if (idx >= slave_count) {
440                 internals->active_slave = 0;
441                 idx = 0;
442         }
443         for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
444                 j = num_rx_total;
445                 collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[idx]],
446                                          COLLECTING);
447
448                 /* Read packets from this slave */
449                 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
450                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
451
452                 for (k = j; k < 2 && k < num_rx_total; k++)
453                         rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
454
455                 /* Handle slow protocol packets. */
456                 while (j < num_rx_total) {
457
458                         /* If packet is not pure L2 and is known, skip it */
459                         if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) {
460                                 j++;
461                                 continue;
462                         }
463
464                         if (j + 3 < num_rx_total)
465                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
466
467                         hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
468                         subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
469
470                         /* Remove packet from array if it is slow packet or slave is not
471                          * in collecting state or bonding interface is not in promiscuous
472                          * mode and packet address does not match. */
473                         if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]) ||
474                                 !collecting || (!promisc &&
475                                         !is_multicast_ether_addr(&hdr->d_addr) &&
476                                         !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
477
478                                 if (hdr->ether_type == ether_type_slow_be) {
479                                         bond_mode_8023ad_handle_slow_pkt(
480                                             internals, slaves[idx], bufs[j]);
481                                 } else
482                                         rte_pktmbuf_free(bufs[j]);
483
484                                 /* Packet is managed by mode 4 or dropped, shift the array */
485                                 num_rx_total--;
486                                 if (j < num_rx_total) {
487                                         memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
488                                                 (num_rx_total - j));
489                                 }
490                         } else
491                                 j++;
492                 }
493                 if (unlikely(++idx == slave_count))
494                         idx = 0;
495         }
496
497         if (++internals->active_slave == slave_count)
498                 internals->active_slave = 0;
499
500         return num_rx_total;
501 }
502
503 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
504 uint32_t burstnumberRX;
505 uint32_t burstnumberTX;
506
507 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
508
509 static void
510 arp_op_name(uint16_t arp_op, char *buf)
511 {
512         switch (arp_op) {
513         case ARP_OP_REQUEST:
514                 snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
515                 return;
516         case ARP_OP_REPLY:
517                 snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
518                 return;
519         case ARP_OP_REVREQUEST:
520                 snprintf(buf, sizeof("Reverse ARP Request"), "%s",
521                                 "Reverse ARP Request");
522                 return;
523         case ARP_OP_REVREPLY:
524                 snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
525                                 "Reverse ARP Reply");
526                 return;
527         case ARP_OP_INVREQUEST:
528                 snprintf(buf, sizeof("Peer Identify Request"), "%s",
529                                 "Peer Identify Request");
530                 return;
531         case ARP_OP_INVREPLY:
532                 snprintf(buf, sizeof("Peer Identify Reply"), "%s",
533                                 "Peer Identify Reply");
534                 return;
535         default:
536                 break;
537         }
538         snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
539         return;
540 }
541 #endif
542 #define MaxIPv4String   16
543 static void
544 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
545 {
546         uint32_t ipv4_addr;
547
548         ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
549         snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
550                 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
551                 ipv4_addr & 0xFF);
552 }
553
554 #define MAX_CLIENTS_NUMBER      128
555 uint8_t active_clients;
556 struct client_stats_t {
557         uint16_t port;
558         uint32_t ipv4_addr;
559         uint32_t ipv4_rx_packets;
560         uint32_t ipv4_tx_packets;
561 };
562 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
563
564 static void
565 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
566 {
567         int i = 0;
568
569         for (; i < MAX_CLIENTS_NUMBER; i++)     {
570                 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port))      {
571                         /* Just update RX packets number for this client */
572                         if (TXorRXindicator == &burstnumberRX)
573                                 client_stats[i].ipv4_rx_packets++;
574                         else
575                                 client_stats[i].ipv4_tx_packets++;
576                         return;
577                 }
578         }
579         /* We have a new client. Insert him to the table, and increment stats */
580         if (TXorRXindicator == &burstnumberRX)
581                 client_stats[active_clients].ipv4_rx_packets++;
582         else
583                 client_stats[active_clients].ipv4_tx_packets++;
584         client_stats[active_clients].ipv4_addr = addr;
585         client_stats[active_clients].port = port;
586         active_clients++;
587
588 }
589
590 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
591 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber)     \
592                 RTE_LOG(DEBUG, PMD, \
593                 "%s " \
594                 "port:%d " \
595                 "SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
596                 "SrcIP:%s " \
597                 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
598                 "DstIP:%s " \
599                 "%s " \
600                 "%d\n", \
601                 info, \
602                 port, \
603                 eth_h->s_addr.addr_bytes[0], \
604                 eth_h->s_addr.addr_bytes[1], \
605                 eth_h->s_addr.addr_bytes[2], \
606                 eth_h->s_addr.addr_bytes[3], \
607                 eth_h->s_addr.addr_bytes[4], \
608                 eth_h->s_addr.addr_bytes[5], \
609                 src_ip, \
610                 eth_h->d_addr.addr_bytes[0], \
611                 eth_h->d_addr.addr_bytes[1], \
612                 eth_h->d_addr.addr_bytes[2], \
613                 eth_h->d_addr.addr_bytes[3], \
614                 eth_h->d_addr.addr_bytes[4], \
615                 eth_h->d_addr.addr_bytes[5], \
616                 dst_ip, \
617                 arp_op, \
618                 ++burstnumber)
619 #endif
620
621 static void
622 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
623                 uint16_t port, uint32_t __attribute__((unused)) *burstnumber)
624 {
625         struct ipv4_hdr *ipv4_h;
626 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
627         struct arp_hdr *arp_h;
628         char dst_ip[16];
629         char ArpOp[24];
630         char buf[16];
631 #endif
632         char src_ip[16];
633
634         uint16_t ether_type = eth_h->ether_type;
635         uint16_t offset = get_vlan_offset(eth_h, &ether_type);
636
637 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
638         snprintf(buf, 16, "%s", info);
639 #endif
640
641         if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
642                 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
643                 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
644 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
645                 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
646                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
647 #endif
648                 update_client_stats(ipv4_h->src_addr, port, burstnumber);
649         }
650 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
651         else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
652                 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
653                 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
654                 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
655                 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
656                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
657         }
658 #endif
659 }
660 #endif
661
662 static uint16_t
663 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
664 {
665         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
666         struct bond_dev_private *internals = bd_tx_q->dev_private;
667         struct ether_hdr *eth_h;
668         uint16_t ether_type, offset;
669         uint16_t nb_recv_pkts;
670         int i;
671
672         nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
673
674         for (i = 0; i < nb_recv_pkts; i++) {
675                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
676                 ether_type = eth_h->ether_type;
677                 offset = get_vlan_offset(eth_h, &ether_type);
678
679                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
680 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
681                         mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
682 #endif
683                         bond_mode_alb_arp_recv(eth_h, offset, internals);
684                 }
685 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
686                 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
687                         mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
688 #endif
689         }
690
691         return nb_recv_pkts;
692 }
693
694 static uint16_t
695 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
696                 uint16_t nb_pkts)
697 {
698         struct bond_dev_private *internals;
699         struct bond_tx_queue *bd_tx_q;
700
701         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
702         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
703
704         uint16_t num_of_slaves;
705         uint16_t slaves[RTE_MAX_ETHPORTS];
706
707         uint16_t num_tx_total = 0, num_tx_slave;
708
709         static int slave_idx = 0;
710         int i, cslave_idx = 0, tx_fail_total = 0;
711
712         bd_tx_q = (struct bond_tx_queue *)queue;
713         internals = bd_tx_q->dev_private;
714
715         /* Copy slave list to protect against slave up/down changes during tx
716          * bursting */
717         num_of_slaves = internals->active_slave_count;
718         memcpy(slaves, internals->active_slaves,
719                         sizeof(internals->active_slaves[0]) * num_of_slaves);
720
721         if (num_of_slaves < 1)
722                 return num_tx_total;
723
724         /* Populate slaves mbuf with which packets are to be sent on it  */
725         for (i = 0; i < nb_pkts; i++) {
726                 cslave_idx = (slave_idx + i) % num_of_slaves;
727                 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
728         }
729
730         /* increment current slave index so the next call to tx burst starts on the
731          * next slave */
732         slave_idx = ++cslave_idx;
733
734         /* Send packet burst on each slave device */
735         for (i = 0; i < num_of_slaves; i++) {
736                 if (slave_nb_pkts[i] > 0) {
737                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
738                                         slave_bufs[i], slave_nb_pkts[i]);
739
740                         /* if tx burst fails move packets to end of bufs */
741                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
742                                 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
743
744                                 tx_fail_total += tx_fail_slave;
745
746                                 memcpy(&bufs[nb_pkts - tx_fail_total],
747                                                 &slave_bufs[i][num_tx_slave],
748                                                 tx_fail_slave * sizeof(bufs[0]));
749                         }
750                         num_tx_total += num_tx_slave;
751                 }
752         }
753
754         return num_tx_total;
755 }
756
757 static uint16_t
758 bond_ethdev_tx_burst_active_backup(void *queue,
759                 struct rte_mbuf **bufs, uint16_t nb_pkts)
760 {
761         struct bond_dev_private *internals;
762         struct bond_tx_queue *bd_tx_q;
763
764         bd_tx_q = (struct bond_tx_queue *)queue;
765         internals = bd_tx_q->dev_private;
766
767         if (internals->active_slave_count < 1)
768                 return 0;
769
770         return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
771                         bufs, nb_pkts);
772 }
773
774 static inline uint16_t
775 ether_hash(struct ether_hdr *eth_hdr)
776 {
777         unaligned_uint16_t *word_src_addr =
778                 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
779         unaligned_uint16_t *word_dst_addr =
780                 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
781
782         return (word_src_addr[0] ^ word_dst_addr[0]) ^
783                         (word_src_addr[1] ^ word_dst_addr[1]) ^
784                         (word_src_addr[2] ^ word_dst_addr[2]);
785 }
786
787 static inline uint32_t
788 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
789 {
790         return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
791 }
792
793 static inline uint32_t
794 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
795 {
796         unaligned_uint32_t *word_src_addr =
797                 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
798         unaligned_uint32_t *word_dst_addr =
799                 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
800
801         return (word_src_addr[0] ^ word_dst_addr[0]) ^
802                         (word_src_addr[1] ^ word_dst_addr[1]) ^
803                         (word_src_addr[2] ^ word_dst_addr[2]) ^
804                         (word_src_addr[3] ^ word_dst_addr[3]);
805 }
806
807 uint16_t
808 xmit_l2_hash(const struct rte_mbuf *buf, uint8_t slave_count)
809 {
810         struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
811
812         uint32_t hash = ether_hash(eth_hdr);
813
814         return (hash ^= hash >> 8) % slave_count;
815 }
816
817 uint16_t
818 xmit_l23_hash(const struct rte_mbuf *buf, uint8_t slave_count)
819 {
820         struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
821         uint16_t proto = eth_hdr->ether_type;
822         size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
823         uint32_t hash, l3hash = 0;
824
825         hash = ether_hash(eth_hdr);
826
827         if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
828                 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
829                                 ((char *)(eth_hdr + 1) + vlan_offset);
830                 l3hash = ipv4_hash(ipv4_hdr);
831
832         } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
833                 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
834                                 ((char *)(eth_hdr + 1) + vlan_offset);
835                 l3hash = ipv6_hash(ipv6_hdr);
836         }
837
838         hash = hash ^ l3hash;
839         hash ^= hash >> 16;
840         hash ^= hash >> 8;
841
842         return hash % slave_count;
843 }
844
845 uint16_t
846 xmit_l34_hash(const struct rte_mbuf *buf, uint8_t slave_count)
847 {
848         struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
849         uint16_t proto = eth_hdr->ether_type;
850         size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
851
852         struct udp_hdr *udp_hdr = NULL;
853         struct tcp_hdr *tcp_hdr = NULL;
854         uint32_t hash, l3hash = 0, l4hash = 0;
855
856         if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
857                 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
858                                 ((char *)(eth_hdr + 1) + vlan_offset);
859                 size_t ip_hdr_offset;
860
861                 l3hash = ipv4_hash(ipv4_hdr);
862
863                 /* there is no L4 header in fragmented packet */
864                 if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr) == 0)) {
865                         ip_hdr_offset = (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) *
866                                         IPV4_IHL_MULTIPLIER;
867
868                         if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
869                                 tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr +
870                                                 ip_hdr_offset);
871                                 l4hash = HASH_L4_PORTS(tcp_hdr);
872                         } else if (ipv4_hdr->next_proto_id == IPPROTO_UDP) {
873                                 udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr +
874                                                 ip_hdr_offset);
875                                 l4hash = HASH_L4_PORTS(udp_hdr);
876                         }
877                 }
878         } else if  (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
879                 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
880                                 ((char *)(eth_hdr + 1) + vlan_offset);
881                 l3hash = ipv6_hash(ipv6_hdr);
882
883                 if (ipv6_hdr->proto == IPPROTO_TCP) {
884                         tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
885                         l4hash = HASH_L4_PORTS(tcp_hdr);
886                 } else if (ipv6_hdr->proto == IPPROTO_UDP) {
887                         udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
888                         l4hash = HASH_L4_PORTS(udp_hdr);
889                 }
890         }
891
892         hash = l3hash ^ l4hash;
893         hash ^= hash >> 16;
894         hash ^= hash >> 8;
895
896         return hash % slave_count;
897 }
898
899 struct bwg_slave {
900         uint64_t bwg_left_int;
901         uint64_t bwg_left_remainder;
902         uint8_t slave;
903 };
904
905 void
906 bond_tlb_activate_slave(struct bond_dev_private *internals) {
907         int i;
908
909         for (i = 0; i < internals->active_slave_count; i++) {
910                 tlb_last_obytets[internals->active_slaves[i]] = 0;
911         }
912 }
913
914 static int
915 bandwidth_cmp(const void *a, const void *b)
916 {
917         const struct bwg_slave *bwg_a = a;
918         const struct bwg_slave *bwg_b = b;
919         int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
920         int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
921                         (int64_t)bwg_a->bwg_left_remainder;
922         if (diff > 0)
923                 return 1;
924         else if (diff < 0)
925                 return -1;
926         else if (diff2 > 0)
927                 return 1;
928         else if (diff2 < 0)
929                 return -1;
930         else
931                 return 0;
932 }
933
934 static void
935 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
936                 struct bwg_slave *bwg_slave)
937 {
938         struct rte_eth_link link_status;
939
940         rte_eth_link_get_nowait(port_id, &link_status);
941         uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
942         if (link_bwg == 0)
943                 return;
944         link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
945         bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
946         bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
947 }
948
949 static void
950 bond_ethdev_update_tlb_slave_cb(void *arg)
951 {
952         struct bond_dev_private *internals = arg;
953         struct rte_eth_stats slave_stats;
954         struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
955         uint8_t slave_count;
956         uint64_t tx_bytes;
957
958         uint8_t update_stats = 0;
959         uint8_t i, slave_id;
960
961         internals->slave_update_idx++;
962
963
964         if (internals->slave_update_idx >= REORDER_PERIOD_MS)
965                 update_stats = 1;
966
967         for (i = 0; i < internals->active_slave_count; i++) {
968                 slave_id = internals->active_slaves[i];
969                 rte_eth_stats_get(slave_id, &slave_stats);
970                 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
971                 bandwidth_left(slave_id, tx_bytes,
972                                 internals->slave_update_idx, &bwg_array[i]);
973                 bwg_array[i].slave = slave_id;
974
975                 if (update_stats) {
976                         tlb_last_obytets[slave_id] = slave_stats.obytes;
977                 }
978         }
979
980         if (update_stats == 1)
981                 internals->slave_update_idx = 0;
982
983         slave_count = i;
984         qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
985         for (i = 0; i < slave_count; i++)
986                 internals->tlb_slaves_order[i] = bwg_array[i].slave;
987
988         rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
989                         (struct bond_dev_private *)internals);
990 }
991
992 static uint16_t
993 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
994 {
995         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
996         struct bond_dev_private *internals = bd_tx_q->dev_private;
997
998         struct rte_eth_dev *primary_port =
999                         &rte_eth_devices[internals->primary_port];
1000         uint16_t num_tx_total = 0;
1001         uint16_t i, j;
1002
1003         uint16_t num_of_slaves = internals->active_slave_count;
1004         uint16_t slaves[RTE_MAX_ETHPORTS];
1005
1006         struct ether_hdr *ether_hdr;
1007         struct ether_addr primary_slave_addr;
1008         struct ether_addr active_slave_addr;
1009
1010         if (num_of_slaves < 1)
1011                 return num_tx_total;
1012
1013         memcpy(slaves, internals->tlb_slaves_order,
1014                                 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
1015
1016
1017         ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
1018
1019         if (nb_pkts > 3) {
1020                 for (i = 0; i < 3; i++)
1021                         rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
1022         }
1023
1024         for (i = 0; i < num_of_slaves; i++) {
1025                 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
1026                 for (j = num_tx_total; j < nb_pkts; j++) {
1027                         if (j + 3 < nb_pkts)
1028                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
1029
1030                         ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
1031                         if (is_same_ether_addr(&ether_hdr->s_addr, &primary_slave_addr))
1032                                 ether_addr_copy(&active_slave_addr, &ether_hdr->s_addr);
1033 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1034                                         mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
1035 #endif
1036                 }
1037
1038                 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1039                                 bufs + num_tx_total, nb_pkts - num_tx_total);
1040
1041                 if (num_tx_total == nb_pkts)
1042                         break;
1043         }
1044
1045         return num_tx_total;
1046 }
1047
1048 void
1049 bond_tlb_disable(struct bond_dev_private *internals)
1050 {
1051         rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
1052 }
1053
1054 void
1055 bond_tlb_enable(struct bond_dev_private *internals)
1056 {
1057         bond_ethdev_update_tlb_slave_cb(internals);
1058 }
1059
1060 static uint16_t
1061 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1062 {
1063         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1064         struct bond_dev_private *internals = bd_tx_q->dev_private;
1065
1066         struct ether_hdr *eth_h;
1067         uint16_t ether_type, offset;
1068
1069         struct client_data *client_info;
1070
1071         /*
1072          * We create transmit buffers for every slave and one additional to send
1073          * through tlb. In worst case every packet will be send on one port.
1074          */
1075         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
1076         uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
1077
1078         /*
1079          * We create separate transmit buffers for update packets as they won't
1080          * be counted in num_tx_total.
1081          */
1082         struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1083         uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1084
1085         struct rte_mbuf *upd_pkt;
1086         size_t pkt_size;
1087
1088         uint16_t num_send, num_not_send = 0;
1089         uint16_t num_tx_total = 0;
1090         uint16_t slave_idx;
1091
1092         int i, j;
1093
1094         /* Search tx buffer for ARP packets and forward them to alb */
1095         for (i = 0; i < nb_pkts; i++) {
1096                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
1097                 ether_type = eth_h->ether_type;
1098                 offset = get_vlan_offset(eth_h, &ether_type);
1099
1100                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
1101                         slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1102
1103                         /* Change src mac in eth header */
1104                         rte_eth_macaddr_get(slave_idx, &eth_h->s_addr);
1105
1106                         /* Add packet to slave tx buffer */
1107                         slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1108                         slave_bufs_pkts[slave_idx]++;
1109                 } else {
1110                         /* If packet is not ARP, send it with TLB policy */
1111                         slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1112                                         bufs[i];
1113                         slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1114                 }
1115         }
1116
1117         /* Update connected client ARP tables */
1118         if (internals->mode6.ntt) {
1119                 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1120                         client_info = &internals->mode6.client_table[i];
1121
1122                         if (client_info->in_use) {
1123                                 /* Allocate new packet to send ARP update on current slave */
1124                                 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1125                                 if (upd_pkt == NULL) {
1126                                         RTE_LOG(ERR, PMD, "Failed to allocate ARP packet from pool\n");
1127                                         continue;
1128                                 }
1129                                 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
1130                                                 + client_info->vlan_count * sizeof(struct vlan_hdr);
1131                                 upd_pkt->data_len = pkt_size;
1132                                 upd_pkt->pkt_len = pkt_size;
1133
1134                                 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1135                                                 internals);
1136
1137                                 /* Add packet to update tx buffer */
1138                                 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1139                                 update_bufs_pkts[slave_idx]++;
1140                         }
1141                 }
1142                 internals->mode6.ntt = 0;
1143         }
1144
1145         /* Send ARP packets on proper slaves */
1146         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1147                 if (slave_bufs_pkts[i] > 0) {
1148                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1149                                         slave_bufs[i], slave_bufs_pkts[i]);
1150                         for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1151                                 bufs[nb_pkts - 1 - num_not_send - j] =
1152                                                 slave_bufs[i][nb_pkts - 1 - j];
1153                         }
1154
1155                         num_tx_total += num_send;
1156                         num_not_send += slave_bufs_pkts[i] - num_send;
1157
1158 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1159         /* Print TX stats including update packets */
1160                         for (j = 0; j < slave_bufs_pkts[i]; j++) {
1161                                 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
1162                                 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1163                         }
1164 #endif
1165                 }
1166         }
1167
1168         /* Send update packets on proper slaves */
1169         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1170                 if (update_bufs_pkts[i] > 0) {
1171                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1172                                         update_bufs_pkts[i]);
1173                         for (j = num_send; j < update_bufs_pkts[i]; j++) {
1174                                 rte_pktmbuf_free(update_bufs[i][j]);
1175                         }
1176 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1177                         for (j = 0; j < update_bufs_pkts[i]; j++) {
1178                                 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
1179                                 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1180                         }
1181 #endif
1182                 }
1183         }
1184
1185         /* Send non-ARP packets using tlb policy */
1186         if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1187                 num_send = bond_ethdev_tx_burst_tlb(queue,
1188                                 slave_bufs[RTE_MAX_ETHPORTS],
1189                                 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1190
1191                 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1192                         bufs[nb_pkts - 1 - num_not_send - j] =
1193                                         slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1194                 }
1195
1196                 num_tx_total += num_send;
1197         }
1198
1199         return num_tx_total;
1200 }
1201
1202 static uint16_t
1203 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1204                 uint16_t nb_pkts)
1205 {
1206         struct bond_dev_private *internals;
1207         struct bond_tx_queue *bd_tx_q;
1208
1209         uint16_t num_of_slaves;
1210         uint16_t slaves[RTE_MAX_ETHPORTS];
1211
1212         uint16_t num_tx_total = 0, num_tx_slave = 0, tx_fail_total = 0;
1213
1214         int i, op_slave_id;
1215
1216         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
1217         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
1218
1219         bd_tx_q = (struct bond_tx_queue *)queue;
1220         internals = bd_tx_q->dev_private;
1221
1222         /* Copy slave list to protect against slave up/down changes during tx
1223          * bursting */
1224         num_of_slaves = internals->active_slave_count;
1225         memcpy(slaves, internals->active_slaves,
1226                         sizeof(internals->active_slaves[0]) * num_of_slaves);
1227
1228         if (num_of_slaves < 1)
1229                 return num_tx_total;
1230
1231         /* Populate slaves mbuf with the packets which are to be sent on it  */
1232         for (i = 0; i < nb_pkts; i++) {
1233                 /* Select output slave using hash based on xmit policy */
1234                 op_slave_id = internals->xmit_hash(bufs[i], num_of_slaves);
1235
1236                 /* Populate slave mbuf arrays with mbufs for that slave */
1237                 slave_bufs[op_slave_id][slave_nb_pkts[op_slave_id]++] = bufs[i];
1238         }
1239
1240         /* Send packet burst on each slave device */
1241         for (i = 0; i < num_of_slaves; i++) {
1242                 if (slave_nb_pkts[i] > 0) {
1243                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1244                                         slave_bufs[i], slave_nb_pkts[i]);
1245
1246                         /* if tx burst fails move packets to end of bufs */
1247                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
1248                                 int slave_tx_fail_count = slave_nb_pkts[i] - num_tx_slave;
1249
1250                                 tx_fail_total += slave_tx_fail_count;
1251                                 memcpy(&bufs[nb_pkts - tx_fail_total],
1252                                                 &slave_bufs[i][num_tx_slave],
1253                                                 slave_tx_fail_count * sizeof(bufs[0]));
1254                         }
1255
1256                         num_tx_total += num_tx_slave;
1257                 }
1258         }
1259
1260         return num_tx_total;
1261 }
1262
1263 static uint16_t
1264 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1265                 uint16_t nb_pkts)
1266 {
1267         struct bond_dev_private *internals;
1268         struct bond_tx_queue *bd_tx_q;
1269
1270         uint16_t num_of_slaves;
1271         uint16_t slaves[RTE_MAX_ETHPORTS];
1272          /* positions in slaves, not ID */
1273         uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
1274         uint8_t distributing_count;
1275
1276         uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
1277         uint16_t i, j, op_slave_idx;
1278         const uint16_t buffs_size = nb_pkts + BOND_MODE_8023AX_SLAVE_TX_PKTS + 1;
1279
1280         /* Allocate additional packets in case 8023AD mode. */
1281         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][buffs_size];
1282         void *slow_pkts[BOND_MODE_8023AX_SLAVE_TX_PKTS] = { NULL };
1283
1284         /* Total amount of packets in slave_bufs */
1285         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
1286         /* Slow packets placed in each slave */
1287         uint8_t slave_slow_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
1288
1289         bd_tx_q = (struct bond_tx_queue *)queue;
1290         internals = bd_tx_q->dev_private;
1291
1292         /* Copy slave list to protect against slave up/down changes during tx
1293          * bursting */
1294         num_of_slaves = internals->active_slave_count;
1295         if (num_of_slaves < 1)
1296                 return num_tx_total;
1297
1298         memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) * num_of_slaves);
1299
1300         distributing_count = 0;
1301         for (i = 0; i < num_of_slaves; i++) {
1302                 struct port *port = &mode_8023ad_ports[slaves[i]];
1303
1304                 slave_slow_nb_pkts[i] = rte_ring_dequeue_burst(port->tx_ring,
1305                                 slow_pkts, BOND_MODE_8023AX_SLAVE_TX_PKTS,
1306                                 NULL);
1307                 slave_nb_pkts[i] = slave_slow_nb_pkts[i];
1308
1309                 for (j = 0; j < slave_slow_nb_pkts[i]; j++)
1310                         slave_bufs[i][j] = slow_pkts[j];
1311
1312                 if (ACTOR_STATE(port, DISTRIBUTING))
1313                         distributing_offsets[distributing_count++] = i;
1314         }
1315
1316         if (likely(distributing_count > 0)) {
1317                 /* Populate slaves mbuf with the packets which are to be sent on it */
1318                 for (i = 0; i < nb_pkts; i++) {
1319                         /* Select output slave using hash based on xmit policy */
1320                         op_slave_idx = internals->xmit_hash(bufs[i], distributing_count);
1321
1322                         /* Populate slave mbuf arrays with mbufs for that slave. Use only
1323                          * slaves that are currently distributing. */
1324                         uint8_t slave_offset = distributing_offsets[op_slave_idx];
1325                         slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] = bufs[i];
1326                         slave_nb_pkts[slave_offset]++;
1327                 }
1328         }
1329
1330         /* Send packet burst on each slave device */
1331         for (i = 0; i < num_of_slaves; i++) {
1332                 if (slave_nb_pkts[i] == 0)
1333                         continue;
1334
1335                 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1336                                 slave_bufs[i], slave_nb_pkts[i]);
1337
1338                 /* If tx burst fails drop slow packets */
1339                 for ( ; num_tx_slave < slave_slow_nb_pkts[i]; num_tx_slave++)
1340                         rte_pktmbuf_free(slave_bufs[i][num_tx_slave]);
1341
1342                 num_tx_total += num_tx_slave - slave_slow_nb_pkts[i];
1343                 num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
1344
1345                 /* If tx burst fails move packets to end of bufs */
1346                 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
1347                         uint16_t j = nb_pkts - num_tx_fail_total;
1348                         for ( ; num_tx_slave < slave_nb_pkts[i]; j++, num_tx_slave++)
1349                                 bufs[j] = slave_bufs[i][num_tx_slave];
1350                 }
1351         }
1352
1353         return num_tx_total;
1354 }
1355
1356 static uint16_t
1357 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1358                 uint16_t nb_pkts)
1359 {
1360         struct bond_dev_private *internals;
1361         struct bond_tx_queue *bd_tx_q;
1362
1363         uint8_t tx_failed_flag = 0, num_of_slaves;
1364         uint16_t slaves[RTE_MAX_ETHPORTS];
1365
1366         uint16_t max_nb_of_tx_pkts = 0;
1367
1368         int slave_tx_total[RTE_MAX_ETHPORTS];
1369         int i, most_successful_tx_slave = -1;
1370
1371         bd_tx_q = (struct bond_tx_queue *)queue;
1372         internals = bd_tx_q->dev_private;
1373
1374         /* Copy slave list to protect against slave up/down changes during tx
1375          * bursting */
1376         num_of_slaves = internals->active_slave_count;
1377         memcpy(slaves, internals->active_slaves,
1378                         sizeof(internals->active_slaves[0]) * num_of_slaves);
1379
1380         if (num_of_slaves < 1)
1381                 return 0;
1382
1383         /* Increment reference count on mbufs */
1384         for (i = 0; i < nb_pkts; i++)
1385                 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1386
1387         /* Transmit burst on each active slave */
1388         for (i = 0; i < num_of_slaves; i++) {
1389                 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1390                                         bufs, nb_pkts);
1391
1392                 if (unlikely(slave_tx_total[i] < nb_pkts))
1393                         tx_failed_flag = 1;
1394
1395                 /* record the value and slave index for the slave which transmits the
1396                  * maximum number of packets */
1397                 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1398                         max_nb_of_tx_pkts = slave_tx_total[i];
1399                         most_successful_tx_slave = i;
1400                 }
1401         }
1402
1403         /* if slaves fail to transmit packets from burst, the calling application
1404          * is not expected to know about multiple references to packets so we must
1405          * handle failures of all packets except those of the most successful slave
1406          */
1407         if (unlikely(tx_failed_flag))
1408                 for (i = 0; i < num_of_slaves; i++)
1409                         if (i != most_successful_tx_slave)
1410                                 while (slave_tx_total[i] < nb_pkts)
1411                                         rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1412
1413         return max_nb_of_tx_pkts;
1414 }
1415
1416 void
1417 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1418 {
1419         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1420
1421         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1422                 /**
1423                  * If in mode 4 then save the link properties of the first
1424                  * slave, all subsequent slaves must match these properties
1425                  */
1426                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1427
1428                 bond_link->link_autoneg = slave_link->link_autoneg;
1429                 bond_link->link_duplex = slave_link->link_duplex;
1430                 bond_link->link_speed = slave_link->link_speed;
1431         } else {
1432                 /**
1433                  * In any other mode the link properties are set to default
1434                  * values of AUTONEG/DUPLEX
1435                  */
1436                 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1437                 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1438         }
1439 }
1440
1441 int
1442 link_properties_valid(struct rte_eth_dev *ethdev,
1443                 struct rte_eth_link *slave_link)
1444 {
1445         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1446
1447         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1448                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1449
1450                 if (bond_link->link_duplex != slave_link->link_duplex ||
1451                         bond_link->link_autoneg != slave_link->link_autoneg ||
1452                         bond_link->link_speed != slave_link->link_speed)
1453                         return -1;
1454         }
1455
1456         return 0;
1457 }
1458
1459 int
1460 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1461 {
1462         struct ether_addr *mac_addr;
1463
1464         if (eth_dev == NULL) {
1465                 RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__);
1466                 return -1;
1467         }
1468
1469         if (dst_mac_addr == NULL) {
1470                 RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__);
1471                 return -1;
1472         }
1473
1474         mac_addr = eth_dev->data->mac_addrs;
1475
1476         ether_addr_copy(mac_addr, dst_mac_addr);
1477         return 0;
1478 }
1479
1480 int
1481 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1482 {
1483         struct ether_addr *mac_addr;
1484
1485         if (eth_dev == NULL) {
1486                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1487                 return -1;
1488         }
1489
1490         if (new_mac_addr == NULL) {
1491                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1492                 return -1;
1493         }
1494
1495         mac_addr = eth_dev->data->mac_addrs;
1496
1497         /* If new MAC is different to current MAC then update */
1498         if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1499                 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1500
1501         return 0;
1502 }
1503
1504 int
1505 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1506 {
1507         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1508         int i;
1509
1510         /* Update slave devices MAC addresses */
1511         if (internals->slave_count < 1)
1512                 return -1;
1513
1514         switch (internals->mode) {
1515         case BONDING_MODE_ROUND_ROBIN:
1516         case BONDING_MODE_BALANCE:
1517         case BONDING_MODE_BROADCAST:
1518                 for (i = 0; i < internals->slave_count; i++) {
1519                         if (rte_eth_dev_default_mac_addr_set(
1520                                         internals->slaves[i].port_id,
1521                                         bonded_eth_dev->data->mac_addrs)) {
1522                                 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1523                                                 internals->slaves[i].port_id);
1524                                 return -1;
1525                         }
1526                 }
1527                 break;
1528         case BONDING_MODE_8023AD:
1529                 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1530                 break;
1531         case BONDING_MODE_ACTIVE_BACKUP:
1532         case BONDING_MODE_TLB:
1533         case BONDING_MODE_ALB:
1534         default:
1535                 for (i = 0; i < internals->slave_count; i++) {
1536                         if (internals->slaves[i].port_id ==
1537                                         internals->current_primary_port) {
1538                                 if (rte_eth_dev_default_mac_addr_set(
1539                                                 internals->primary_port,
1540                                                 bonded_eth_dev->data->mac_addrs)) {
1541                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1542                                                         internals->current_primary_port);
1543                                         return -1;
1544                                 }
1545                         } else {
1546                                 if (rte_eth_dev_default_mac_addr_set(
1547                                                 internals->slaves[i].port_id,
1548                                                 &internals->slaves[i].persisted_mac_addr)) {
1549                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1550                                                         internals->slaves[i].port_id);
1551                                         return -1;
1552                                 }
1553                         }
1554                 }
1555         }
1556
1557         return 0;
1558 }
1559
1560 int
1561 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1562 {
1563         struct bond_dev_private *internals;
1564
1565         internals = eth_dev->data->dev_private;
1566
1567         switch (mode) {
1568         case BONDING_MODE_ROUND_ROBIN:
1569                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1570                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1571                 break;
1572         case BONDING_MODE_ACTIVE_BACKUP:
1573                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1574                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1575                 break;
1576         case BONDING_MODE_BALANCE:
1577                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1578                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1579                 break;
1580         case BONDING_MODE_BROADCAST:
1581                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1582                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1583                 break;
1584         case BONDING_MODE_8023AD:
1585                 if (bond_mode_8023ad_enable(eth_dev) != 0)
1586                         return -1;
1587
1588                 if (internals->mode4.dedicated_queues.enabled == 0) {
1589                         eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1590                         eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1591                         RTE_LOG(WARNING, PMD,
1592                                 "Using mode 4, it is necessary to do TX burst "
1593                                 "and RX burst at least every 100ms.\n");
1594                 } else {
1595                         /* Use flow director's optimization */
1596                         eth_dev->rx_pkt_burst =
1597                                         bond_ethdev_rx_burst_8023ad_fast_queue;
1598                         eth_dev->tx_pkt_burst =
1599                                         bond_ethdev_tx_burst_8023ad_fast_queue;
1600                 }
1601                 break;
1602         case BONDING_MODE_TLB:
1603                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1604                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1605                 break;
1606         case BONDING_MODE_ALB:
1607                 if (bond_mode_alb_enable(eth_dev) != 0)
1608                         return -1;
1609
1610                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1611                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1612                 break;
1613         default:
1614                 return -1;
1615         }
1616
1617         internals->mode = mode;
1618
1619         return 0;
1620 }
1621
1622
1623 static int
1624 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1625                 struct rte_eth_dev *slave_eth_dev)
1626 {
1627         int errval = 0;
1628         struct bond_dev_private *internals = (struct bond_dev_private *)
1629                 bonded_eth_dev->data->dev_private;
1630         struct port *port = &mode_8023ad_ports[slave_eth_dev->data->port_id];
1631
1632         if (port->slow_pool == NULL) {
1633                 char mem_name[256];
1634                 int slave_id = slave_eth_dev->data->port_id;
1635
1636                 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1637                                 slave_id);
1638                 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1639                         250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1640                         slave_eth_dev->data->numa_node);
1641
1642                 /* Any memory allocation failure in initialization is critical because
1643                  * resources can't be free, so reinitialization is impossible. */
1644                 if (port->slow_pool == NULL) {
1645                         rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1646                                 slave_id, mem_name, rte_strerror(rte_errno));
1647                 }
1648         }
1649
1650         if (internals->mode4.dedicated_queues.enabled == 1) {
1651                 /* Configure slow Rx queue */
1652
1653                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1654                                 internals->mode4.dedicated_queues.rx_qid, 128,
1655                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1656                                 NULL, port->slow_pool);
1657                 if (errval != 0) {
1658                         RTE_BOND_LOG(ERR,
1659                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1660                                         slave_eth_dev->data->port_id,
1661                                         internals->mode4.dedicated_queues.rx_qid,
1662                                         errval);
1663                         return errval;
1664                 }
1665
1666                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1667                                 internals->mode4.dedicated_queues.tx_qid, 512,
1668                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1669                                 NULL);
1670                 if (errval != 0) {
1671                         RTE_BOND_LOG(ERR,
1672                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1673                                 slave_eth_dev->data->port_id,
1674                                 internals->mode4.dedicated_queues.tx_qid,
1675                                 errval);
1676                         return errval;
1677                 }
1678         }
1679         return 0;
1680 }
1681
1682 int
1683 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1684                 struct rte_eth_dev *slave_eth_dev)
1685 {
1686         struct bond_rx_queue *bd_rx_q;
1687         struct bond_tx_queue *bd_tx_q;
1688         uint16_t nb_rx_queues;
1689         uint16_t nb_tx_queues;
1690
1691         int errval;
1692         uint16_t q_id;
1693         struct rte_flow_error flow_error;
1694
1695         struct bond_dev_private *internals = (struct bond_dev_private *)
1696                 bonded_eth_dev->data->dev_private;
1697
1698         /* Stop slave */
1699         rte_eth_dev_stop(slave_eth_dev->data->port_id);
1700
1701         /* Enable interrupts on slave device if supported */
1702         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1703                 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1704
1705         /* If RSS is enabled for bonding, try to enable it for slaves  */
1706         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1707                 if (internals->rss_key_len != 0) {
1708                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1709                                         internals->rss_key_len;
1710                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1711                                         internals->rss_key;
1712                 } else {
1713                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1714                 }
1715
1716                 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1717                                 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1718                 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1719                                 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1720         }
1721
1722         slave_eth_dev->data->dev_conf.rxmode.hw_vlan_filter =
1723                         bonded_eth_dev->data->dev_conf.rxmode.hw_vlan_filter;
1724
1725         nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1726         nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1727
1728         if (internals->mode == BONDING_MODE_8023AD) {
1729                 if (internals->mode4.dedicated_queues.enabled == 1) {
1730                         nb_rx_queues++;
1731                         nb_tx_queues++;
1732                 }
1733         }
1734
1735         /* Configure device */
1736         errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1737                         nb_rx_queues, nb_tx_queues,
1738                         &(slave_eth_dev->data->dev_conf));
1739         if (errval != 0) {
1740                 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u , err (%d)",
1741                                 slave_eth_dev->data->port_id, errval);
1742                 return errval;
1743         }
1744
1745         /* Setup Rx Queues */
1746         for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1747                 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1748
1749                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1750                                 bd_rx_q->nb_rx_desc,
1751                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1752                                 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1753                 if (errval != 0) {
1754                         RTE_BOND_LOG(ERR,
1755                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1756                                         slave_eth_dev->data->port_id, q_id, errval);
1757                         return errval;
1758                 }
1759         }
1760
1761         /* Setup Tx Queues */
1762         for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1763                 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1764
1765                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1766                                 bd_tx_q->nb_tx_desc,
1767                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1768                                 &bd_tx_q->tx_conf);
1769                 if (errval != 0) {
1770                         RTE_BOND_LOG(ERR,
1771                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1772                                 slave_eth_dev->data->port_id, q_id, errval);
1773                         return errval;
1774                 }
1775         }
1776
1777         if (internals->mode == BONDING_MODE_8023AD &&
1778                         internals->mode4.dedicated_queues.enabled == 1) {
1779                 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1780                                 != 0)
1781                         return errval;
1782
1783                 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1784                                 slave_eth_dev->data->port_id) != 0) {
1785                         RTE_BOND_LOG(ERR,
1786                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1787                                 slave_eth_dev->data->port_id, q_id, errval);
1788                         return -1;
1789                 }
1790
1791                 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1792                         rte_flow_destroy(slave_eth_dev->data->port_id,
1793                                         internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1794                                         &flow_error);
1795
1796                 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1797                                 slave_eth_dev->data->port_id);
1798         }
1799
1800         /* Start device */
1801         errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1802         if (errval != 0) {
1803                 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1804                                 slave_eth_dev->data->port_id, errval);
1805                 return -1;
1806         }
1807
1808         /* If RSS is enabled for bonding, synchronize RETA */
1809         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1810                 int i;
1811                 struct bond_dev_private *internals;
1812
1813                 internals = bonded_eth_dev->data->dev_private;
1814
1815                 for (i = 0; i < internals->slave_count; i++) {
1816                         if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1817                                 errval = rte_eth_dev_rss_reta_update(
1818                                                 slave_eth_dev->data->port_id,
1819                                                 &internals->reta_conf[0],
1820                                                 internals->slaves[i].reta_size);
1821                                 if (errval != 0) {
1822                                         RTE_LOG(WARNING, PMD,
1823                                                         "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1824                                                         " RSS Configuration for bonding may be inconsistent.\n",
1825                                                         slave_eth_dev->data->port_id, errval);
1826                                 }
1827                                 break;
1828                         }
1829                 }
1830         }
1831
1832         /* If lsc interrupt is set, check initial slave's link status */
1833         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1834                 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1835                 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1836                         RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1837                         NULL);
1838         }
1839
1840         return 0;
1841 }
1842
1843 void
1844 slave_remove(struct bond_dev_private *internals,
1845                 struct rte_eth_dev *slave_eth_dev)
1846 {
1847         uint8_t i;
1848
1849         for (i = 0; i < internals->slave_count; i++)
1850                 if (internals->slaves[i].port_id ==
1851                                 slave_eth_dev->data->port_id)
1852                         break;
1853
1854         if (i < (internals->slave_count - 1))
1855                 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1856                                 sizeof(internals->slaves[0]) *
1857                                 (internals->slave_count - i - 1));
1858
1859         internals->slave_count--;
1860
1861         /* force reconfiguration of slave interfaces */
1862         _rte_eth_dev_reset(slave_eth_dev);
1863 }
1864
1865 static void
1866 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1867
1868 void
1869 slave_add(struct bond_dev_private *internals,
1870                 struct rte_eth_dev *slave_eth_dev)
1871 {
1872         struct bond_slave_details *slave_details =
1873                         &internals->slaves[internals->slave_count];
1874
1875         slave_details->port_id = slave_eth_dev->data->port_id;
1876         slave_details->last_link_status = 0;
1877
1878         /* Mark slave devices that don't support interrupts so we can
1879          * compensate when we start the bond
1880          */
1881         if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1882                 slave_details->link_status_poll_enabled = 1;
1883         }
1884
1885         slave_details->link_status_wait_to_complete = 0;
1886         /* clean tlb_last_obytes when adding port for bonding device */
1887         memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1888                         sizeof(struct ether_addr));
1889 }
1890
1891 void
1892 bond_ethdev_primary_set(struct bond_dev_private *internals,
1893                 uint16_t slave_port_id)
1894 {
1895         int i;
1896
1897         if (internals->active_slave_count < 1)
1898                 internals->current_primary_port = slave_port_id;
1899         else
1900                 /* Search bonded device slave ports for new proposed primary port */
1901                 for (i = 0; i < internals->active_slave_count; i++) {
1902                         if (internals->active_slaves[i] == slave_port_id)
1903                                 internals->current_primary_port = slave_port_id;
1904                 }
1905 }
1906
1907 static void
1908 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
1909
1910 static int
1911 bond_ethdev_start(struct rte_eth_dev *eth_dev)
1912 {
1913         struct bond_dev_private *internals;
1914         int i;
1915
1916         /* slave eth dev will be started by bonded device */
1917         if (check_for_bonded_ethdev(eth_dev)) {
1918                 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
1919                                 eth_dev->data->port_id);
1920                 return -1;
1921         }
1922
1923         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
1924         eth_dev->data->dev_started = 1;
1925
1926         internals = eth_dev->data->dev_private;
1927
1928         if (internals->slave_count == 0) {
1929                 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
1930                 goto out_err;
1931         }
1932
1933         if (internals->user_defined_mac == 0) {
1934                 struct ether_addr *new_mac_addr = NULL;
1935
1936                 for (i = 0; i < internals->slave_count; i++)
1937                         if (internals->slaves[i].port_id == internals->primary_port)
1938                                 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
1939
1940                 if (new_mac_addr == NULL)
1941                         goto out_err;
1942
1943                 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
1944                         RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
1945                                         eth_dev->data->port_id);
1946                         goto out_err;
1947                 }
1948         }
1949
1950         /* If bonded device is configure in promiscuous mode then re-apply config */
1951         if (internals->promiscuous_en)
1952                 bond_ethdev_promiscuous_enable(eth_dev);
1953
1954         if (internals->mode == BONDING_MODE_8023AD) {
1955                 if (internals->mode4.dedicated_queues.enabled == 1) {
1956                         internals->mode4.dedicated_queues.rx_qid =
1957                                         eth_dev->data->nb_rx_queues;
1958                         internals->mode4.dedicated_queues.tx_qid =
1959                                         eth_dev->data->nb_tx_queues;
1960                 }
1961         }
1962
1963
1964         /* Reconfigure each slave device if starting bonded device */
1965         for (i = 0; i < internals->slave_count; i++) {
1966                 struct rte_eth_dev *slave_ethdev =
1967                                 &(rte_eth_devices[internals->slaves[i].port_id]);
1968                 if (slave_configure(eth_dev, slave_ethdev) != 0) {
1969                         RTE_BOND_LOG(ERR,
1970                                 "bonded port (%d) failed to reconfigure slave device (%d)",
1971                                 eth_dev->data->port_id,
1972                                 internals->slaves[i].port_id);
1973                         goto out_err;
1974                 }
1975                 /* We will need to poll for link status if any slave doesn't
1976                  * support interrupts
1977                  */
1978                 if (internals->slaves[i].link_status_poll_enabled)
1979                         internals->link_status_polling_enabled = 1;
1980         }
1981
1982         /* start polling if needed */
1983         if (internals->link_status_polling_enabled) {
1984                 rte_eal_alarm_set(
1985                         internals->link_status_polling_interval_ms * 1000,
1986                         bond_ethdev_slave_link_status_change_monitor,
1987                         (void *)&rte_eth_devices[internals->port_id]);
1988         }
1989
1990         /* Update all slave devices MACs*/
1991         if (mac_address_slaves_update(eth_dev) != 0)
1992                 goto out_err;
1993
1994         if (internals->user_defined_primary_port)
1995                 bond_ethdev_primary_set(internals, internals->primary_port);
1996
1997         if (internals->mode == BONDING_MODE_8023AD)
1998                 bond_mode_8023ad_start(eth_dev);
1999
2000         if (internals->mode == BONDING_MODE_TLB ||
2001                         internals->mode == BONDING_MODE_ALB)
2002                 bond_tlb_enable(internals);
2003
2004         return 0;
2005
2006 out_err:
2007         eth_dev->data->dev_started = 0;
2008         return -1;
2009 }
2010
2011 static void
2012 bond_ethdev_free_queues(struct rte_eth_dev *dev)
2013 {
2014         uint8_t i;
2015
2016         if (dev->data->rx_queues != NULL) {
2017                 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2018                         rte_free(dev->data->rx_queues[i]);
2019                         dev->data->rx_queues[i] = NULL;
2020                 }
2021                 dev->data->nb_rx_queues = 0;
2022         }
2023
2024         if (dev->data->tx_queues != NULL) {
2025                 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2026                         rte_free(dev->data->tx_queues[i]);
2027                         dev->data->tx_queues[i] = NULL;
2028                 }
2029                 dev->data->nb_tx_queues = 0;
2030         }
2031 }
2032
2033 void
2034 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2035 {
2036         struct bond_dev_private *internals = eth_dev->data->dev_private;
2037         uint8_t i;
2038
2039         if (internals->mode == BONDING_MODE_8023AD) {
2040                 struct port *port;
2041                 void *pkt = NULL;
2042
2043                 bond_mode_8023ad_stop(eth_dev);
2044
2045                 /* Discard all messages to/from mode 4 state machines */
2046                 for (i = 0; i < internals->active_slave_count; i++) {
2047                         port = &mode_8023ad_ports[internals->active_slaves[i]];
2048
2049                         RTE_ASSERT(port->rx_ring != NULL);
2050                         while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2051                                 rte_pktmbuf_free(pkt);
2052
2053                         RTE_ASSERT(port->tx_ring != NULL);
2054                         while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2055                                 rte_pktmbuf_free(pkt);
2056                 }
2057         }
2058
2059         if (internals->mode == BONDING_MODE_TLB ||
2060                         internals->mode == BONDING_MODE_ALB) {
2061                 bond_tlb_disable(internals);
2062                 for (i = 0; i < internals->active_slave_count; i++)
2063                         tlb_last_obytets[internals->active_slaves[i]] = 0;
2064         }
2065
2066         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2067         eth_dev->data->dev_started = 0;
2068
2069         internals->link_status_polling_enabled = 0;
2070         for (i = 0; i < internals->slave_count; i++) {
2071                 uint16_t slave_id = internals->slaves[i].port_id;
2072                 if (find_slave_by_id(internals->active_slaves,
2073                                 internals->active_slave_count, slave_id) !=
2074                                                 internals->active_slave_count) {
2075                         internals->slaves[i].last_link_status = 0;
2076                         rte_eth_dev_stop(slave_id);
2077                         deactivate_slave(eth_dev, slave_id);
2078                 }
2079         }
2080 }
2081
2082 void
2083 bond_ethdev_close(struct rte_eth_dev *dev)
2084 {
2085         struct bond_dev_private *internals = dev->data->dev_private;
2086         uint8_t bond_port_id = internals->port_id;
2087         int skipped = 0;
2088
2089         RTE_LOG(INFO, EAL, "Closing bonded device %s\n", dev->device->name);
2090         while (internals->slave_count != skipped) {
2091                 uint16_t port_id = internals->slaves[skipped].port_id;
2092
2093                 rte_eth_dev_stop(port_id);
2094
2095                 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2096                         RTE_LOG(ERR, EAL,
2097                                 "Failed to remove port %d from bonded device "
2098                                 "%s\n", port_id, dev->device->name);
2099                         skipped++;
2100                 }
2101         }
2102         bond_ethdev_free_queues(dev);
2103         rte_bitmap_reset(internals->vlan_filter_bmp);
2104 }
2105
2106 /* forward declaration */
2107 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2108
2109 static void
2110 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2111 {
2112         struct bond_dev_private *internals = dev->data->dev_private;
2113
2114         uint16_t max_nb_rx_queues = UINT16_MAX;
2115         uint16_t max_nb_tx_queues = UINT16_MAX;
2116
2117         dev_info->max_mac_addrs = 1;
2118
2119         dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2120                         internals->candidate_max_rx_pktlen :
2121                         ETHER_MAX_JUMBO_FRAME_LEN;
2122
2123         /* Max number of tx/rx queues that the bonded device can support is the
2124          * minimum values of the bonded slaves, as all slaves must be capable
2125          * of supporting the same number of tx/rx queues.
2126          */
2127         if (internals->slave_count > 0) {
2128                 struct rte_eth_dev_info slave_info;
2129                 uint8_t idx;
2130
2131                 for (idx = 0; idx < internals->slave_count; idx++) {
2132                         rte_eth_dev_info_get(internals->slaves[idx].port_id,
2133                                         &slave_info);
2134
2135                         if (slave_info.max_rx_queues < max_nb_rx_queues)
2136                                 max_nb_rx_queues = slave_info.max_rx_queues;
2137
2138                         if (slave_info.max_tx_queues < max_nb_tx_queues)
2139                                 max_nb_tx_queues = slave_info.max_tx_queues;
2140                 }
2141         }
2142
2143         dev_info->max_rx_queues = max_nb_rx_queues;
2144         dev_info->max_tx_queues = max_nb_tx_queues;
2145
2146         /**
2147          * If dedicated hw queues enabled for link bonding device in LACP mode
2148          * then we need to reduce the maximum number of data path queues by 1.
2149          */
2150         if (internals->mode == BONDING_MODE_8023AD &&
2151                 internals->mode4.dedicated_queues.enabled == 1) {
2152                 dev_info->max_rx_queues--;
2153                 dev_info->max_tx_queues--;
2154         }
2155
2156         dev_info->min_rx_bufsize = 0;
2157
2158         dev_info->rx_offload_capa = internals->rx_offload_capa;
2159         dev_info->tx_offload_capa = internals->tx_offload_capa;
2160         dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2161
2162         dev_info->reta_size = internals->reta_size;
2163 }
2164
2165 static int
2166 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2167 {
2168         int res;
2169         uint16_t i;
2170         struct bond_dev_private *internals = dev->data->dev_private;
2171
2172         /* don't do this while a slave is being added */
2173         rte_spinlock_lock(&internals->lock);
2174
2175         if (on)
2176                 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2177         else
2178                 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2179
2180         for (i = 0; i < internals->slave_count; i++) {
2181                 uint16_t port_id = internals->slaves[i].port_id;
2182
2183                 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2184                 if (res == ENOTSUP)
2185                         RTE_LOG(WARNING, PMD,
2186                                 "Setting VLAN filter on slave port %u not supported.\n",
2187                                 port_id);
2188         }
2189
2190         rte_spinlock_unlock(&internals->lock);
2191         return 0;
2192 }
2193
2194 static int
2195 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2196                 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2197                 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2198 {
2199         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2200                         rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2201                                         0, dev->data->numa_node);
2202         if (bd_rx_q == NULL)
2203                 return -1;
2204
2205         bd_rx_q->queue_id = rx_queue_id;
2206         bd_rx_q->dev_private = dev->data->dev_private;
2207
2208         bd_rx_q->nb_rx_desc = nb_rx_desc;
2209
2210         memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2211         bd_rx_q->mb_pool = mb_pool;
2212
2213         dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2214
2215         return 0;
2216 }
2217
2218 static int
2219 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2220                 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2221                 const struct rte_eth_txconf *tx_conf)
2222 {
2223         struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
2224                         rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2225                                         0, dev->data->numa_node);
2226
2227         if (bd_tx_q == NULL)
2228                 return -1;
2229
2230         bd_tx_q->queue_id = tx_queue_id;
2231         bd_tx_q->dev_private = dev->data->dev_private;
2232
2233         bd_tx_q->nb_tx_desc = nb_tx_desc;
2234         memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2235
2236         dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2237
2238         return 0;
2239 }
2240
2241 static void
2242 bond_ethdev_rx_queue_release(void *queue)
2243 {
2244         if (queue == NULL)
2245                 return;
2246
2247         rte_free(queue);
2248 }
2249
2250 static void
2251 bond_ethdev_tx_queue_release(void *queue)
2252 {
2253         if (queue == NULL)
2254                 return;
2255
2256         rte_free(queue);
2257 }
2258
2259 static void
2260 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2261 {
2262         struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2263         struct bond_dev_private *internals;
2264
2265         /* Default value for polling slave found is true as we don't want to
2266          * disable the polling thread if we cannot get the lock */
2267         int i, polling_slave_found = 1;
2268
2269         if (cb_arg == NULL)
2270                 return;
2271
2272         bonded_ethdev = (struct rte_eth_dev *)cb_arg;
2273         internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
2274
2275         if (!bonded_ethdev->data->dev_started ||
2276                 !internals->link_status_polling_enabled)
2277                 return;
2278
2279         /* If device is currently being configured then don't check slaves link
2280          * status, wait until next period */
2281         if (rte_spinlock_trylock(&internals->lock)) {
2282                 if (internals->slave_count > 0)
2283                         polling_slave_found = 0;
2284
2285                 for (i = 0; i < internals->slave_count; i++) {
2286                         if (!internals->slaves[i].link_status_poll_enabled)
2287                                 continue;
2288
2289                         slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2290                         polling_slave_found = 1;
2291
2292                         /* Update slave link status */
2293                         (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2294                                         internals->slaves[i].link_status_wait_to_complete);
2295
2296                         /* if link status has changed since last checked then call lsc
2297                          * event callback */
2298                         if (slave_ethdev->data->dev_link.link_status !=
2299                                         internals->slaves[i].last_link_status) {
2300                                 internals->slaves[i].last_link_status =
2301                                                 slave_ethdev->data->dev_link.link_status;
2302
2303                                 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2304                                                 RTE_ETH_EVENT_INTR_LSC,
2305                                                 &bonded_ethdev->data->port_id,
2306                                                 NULL);
2307                         }
2308                 }
2309                 rte_spinlock_unlock(&internals->lock);
2310         }
2311
2312         if (polling_slave_found)
2313                 /* Set alarm to continue monitoring link status of slave ethdev's */
2314                 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2315                                 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2316 }
2317
2318 static int
2319 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2320 {
2321         void (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2322
2323         struct bond_dev_private *bond_ctx;
2324         struct rte_eth_link slave_link;
2325
2326         uint32_t idx;
2327
2328         bond_ctx = ethdev->data->dev_private;
2329
2330         ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2331
2332         if (ethdev->data->dev_started == 0 ||
2333                         bond_ctx->active_slave_count == 0) {
2334                 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2335                 return 0;
2336         }
2337
2338         ethdev->data->dev_link.link_status = ETH_LINK_UP;
2339
2340         if (wait_to_complete)
2341                 link_update = rte_eth_link_get;
2342         else
2343                 link_update = rte_eth_link_get_nowait;
2344
2345         switch (bond_ctx->mode) {
2346         case BONDING_MODE_BROADCAST:
2347                 /**
2348                  * Setting link speed to UINT32_MAX to ensure we pick up the
2349                  * value of the first active slave
2350                  */
2351                 ethdev->data->dev_link.link_speed = UINT32_MAX;
2352
2353                 /**
2354                  * link speed is minimum value of all the slaves link speed as
2355                  * packet loss will occur on this slave if transmission at rates
2356                  * greater than this are attempted
2357                  */
2358                 for (idx = 1; idx < bond_ctx->active_slave_count; idx++) {
2359                         link_update(bond_ctx->active_slaves[0], &slave_link);
2360
2361                         if (slave_link.link_speed <
2362                                         ethdev->data->dev_link.link_speed)
2363                                 ethdev->data->dev_link.link_speed =
2364                                                 slave_link.link_speed;
2365                 }
2366                 break;
2367         case BONDING_MODE_ACTIVE_BACKUP:
2368                 /* Current primary slave */
2369                 link_update(bond_ctx->current_primary_port, &slave_link);
2370
2371                 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2372                 break;
2373         case BONDING_MODE_8023AD:
2374                 ethdev->data->dev_link.link_autoneg =
2375                                 bond_ctx->mode4.slave_link.link_autoneg;
2376                 ethdev->data->dev_link.link_duplex =
2377                                 bond_ctx->mode4.slave_link.link_duplex;
2378                 /* fall through to update link speed */
2379         case BONDING_MODE_ROUND_ROBIN:
2380         case BONDING_MODE_BALANCE:
2381         case BONDING_MODE_TLB:
2382         case BONDING_MODE_ALB:
2383         default:
2384                 /**
2385                  * In theses mode the maximum theoretical link speed is the sum
2386                  * of all the slaves
2387                  */
2388                 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2389
2390                 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2391                         link_update(bond_ctx->active_slaves[idx], &slave_link);
2392
2393                         ethdev->data->dev_link.link_speed +=
2394                                         slave_link.link_speed;
2395                 }
2396         }
2397
2398
2399         return 0;
2400 }
2401
2402
2403 static int
2404 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2405 {
2406         struct bond_dev_private *internals = dev->data->dev_private;
2407         struct rte_eth_stats slave_stats;
2408         int i, j;
2409
2410         for (i = 0; i < internals->slave_count; i++) {
2411                 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2412
2413                 stats->ipackets += slave_stats.ipackets;
2414                 stats->opackets += slave_stats.opackets;
2415                 stats->ibytes += slave_stats.ibytes;
2416                 stats->obytes += slave_stats.obytes;
2417                 stats->imissed += slave_stats.imissed;
2418                 stats->ierrors += slave_stats.ierrors;
2419                 stats->oerrors += slave_stats.oerrors;
2420                 stats->rx_nombuf += slave_stats.rx_nombuf;
2421
2422                 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2423                         stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2424                         stats->q_opackets[j] += slave_stats.q_opackets[j];
2425                         stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2426                         stats->q_obytes[j] += slave_stats.q_obytes[j];
2427                         stats->q_errors[j] += slave_stats.q_errors[j];
2428                 }
2429
2430         }
2431
2432         return 0;
2433 }
2434
2435 static void
2436 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2437 {
2438         struct bond_dev_private *internals = dev->data->dev_private;
2439         int i;
2440
2441         for (i = 0; i < internals->slave_count; i++)
2442                 rte_eth_stats_reset(internals->slaves[i].port_id);
2443 }
2444
2445 static void
2446 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2447 {
2448         struct bond_dev_private *internals = eth_dev->data->dev_private;
2449         int i;
2450
2451         internals->promiscuous_en = 1;
2452
2453         switch (internals->mode) {
2454         /* Promiscuous mode is propagated to all slaves */
2455         case BONDING_MODE_ROUND_ROBIN:
2456         case BONDING_MODE_BALANCE:
2457         case BONDING_MODE_BROADCAST:
2458                 for (i = 0; i < internals->slave_count; i++)
2459                         rte_eth_promiscuous_enable(internals->slaves[i].port_id);
2460                 break;
2461         /* In mode4 promiscus mode is managed when slave is added/removed */
2462         case BONDING_MODE_8023AD:
2463                 break;
2464         /* Promiscuous mode is propagated only to primary slave */
2465         case BONDING_MODE_ACTIVE_BACKUP:
2466         case BONDING_MODE_TLB:
2467         case BONDING_MODE_ALB:
2468         default:
2469                 rte_eth_promiscuous_enable(internals->current_primary_port);
2470         }
2471 }
2472
2473 static void
2474 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2475 {
2476         struct bond_dev_private *internals = dev->data->dev_private;
2477         int i;
2478
2479         internals->promiscuous_en = 0;
2480
2481         switch (internals->mode) {
2482         /* Promiscuous mode is propagated to all slaves */
2483         case BONDING_MODE_ROUND_ROBIN:
2484         case BONDING_MODE_BALANCE:
2485         case BONDING_MODE_BROADCAST:
2486                 for (i = 0; i < internals->slave_count; i++)
2487                         rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2488                 break;
2489         /* In mode4 promiscus mode is set managed when slave is added/removed */
2490         case BONDING_MODE_8023AD:
2491                 break;
2492         /* Promiscuous mode is propagated only to primary slave */
2493         case BONDING_MODE_ACTIVE_BACKUP:
2494         case BONDING_MODE_TLB:
2495         case BONDING_MODE_ALB:
2496         default:
2497                 rte_eth_promiscuous_disable(internals->current_primary_port);
2498         }
2499 }
2500
2501 static void
2502 bond_ethdev_delayed_lsc_propagation(void *arg)
2503 {
2504         if (arg == NULL)
2505                 return;
2506
2507         _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2508                         RTE_ETH_EVENT_INTR_LSC, NULL, NULL);
2509 }
2510
2511 int
2512 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2513                 void *param, void *ret_param __rte_unused)
2514 {
2515         struct rte_eth_dev *bonded_eth_dev;
2516         struct bond_dev_private *internals;
2517         struct rte_eth_link link;
2518         int rc = -1;
2519
2520         int i, valid_slave = 0;
2521         uint8_t active_pos;
2522         uint8_t lsc_flag = 0;
2523
2524         if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2525                 return rc;
2526
2527         bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
2528
2529         if (check_for_bonded_ethdev(bonded_eth_dev))
2530                 return rc;
2531
2532         internals = bonded_eth_dev->data->dev_private;
2533
2534         /* If the device isn't started don't handle interrupts */
2535         if (!bonded_eth_dev->data->dev_started)
2536                 return rc;
2537
2538         /* verify that port_id is a valid slave of bonded port */
2539         for (i = 0; i < internals->slave_count; i++) {
2540                 if (internals->slaves[i].port_id == port_id) {
2541                         valid_slave = 1;
2542                         break;
2543                 }
2544         }
2545
2546         if (!valid_slave)
2547                 return rc;
2548
2549         /* Synchronize lsc callback parallel calls either by real link event
2550          * from the slaves PMDs or by the bonding PMD itself.
2551          */
2552         rte_spinlock_lock(&internals->lsc_lock);
2553
2554         /* Search for port in active port list */
2555         active_pos = find_slave_by_id(internals->active_slaves,
2556                         internals->active_slave_count, port_id);
2557
2558         rte_eth_link_get_nowait(port_id, &link);
2559         if (link.link_status) {
2560                 if (active_pos < internals->active_slave_count)
2561                         goto link_update;
2562
2563                 /* if no active slave ports then set this port to be primary port */
2564                 if (internals->active_slave_count < 1) {
2565                         /* If first active slave, then change link status */
2566                         bonded_eth_dev->data->dev_link.link_status = ETH_LINK_UP;
2567                         internals->current_primary_port = port_id;
2568                         lsc_flag = 1;
2569
2570                         mac_address_slaves_update(bonded_eth_dev);
2571                 }
2572
2573                 activate_slave(bonded_eth_dev, port_id);
2574
2575                 /* If user has defined the primary port then default to using it */
2576                 if (internals->user_defined_primary_port &&
2577                                 internals->primary_port == port_id)
2578                         bond_ethdev_primary_set(internals, port_id);
2579         } else {
2580                 if (active_pos == internals->active_slave_count)
2581                         goto link_update;
2582
2583                 /* Remove from active slave list */
2584                 deactivate_slave(bonded_eth_dev, port_id);
2585
2586                 if (internals->active_slave_count < 1)
2587                         lsc_flag = 1;
2588
2589                 /* Update primary id, take first active slave from list or if none
2590                  * available set to -1 */
2591                 if (port_id == internals->current_primary_port) {
2592                         if (internals->active_slave_count > 0)
2593                                 bond_ethdev_primary_set(internals,
2594                                                 internals->active_slaves[0]);
2595                         else
2596                                 internals->current_primary_port = internals->primary_port;
2597                 }
2598         }
2599
2600 link_update:
2601         /**
2602          * Update bonded device link properties after any change to active
2603          * slaves
2604          */
2605         bond_ethdev_link_update(bonded_eth_dev, 0);
2606
2607         if (lsc_flag) {
2608                 /* Cancel any possible outstanding interrupts if delays are enabled */
2609                 if (internals->link_up_delay_ms > 0 ||
2610                         internals->link_down_delay_ms > 0)
2611                         rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2612                                         bonded_eth_dev);
2613
2614                 if (bonded_eth_dev->data->dev_link.link_status) {
2615                         if (internals->link_up_delay_ms > 0)
2616                                 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2617                                                 bond_ethdev_delayed_lsc_propagation,
2618                                                 (void *)bonded_eth_dev);
2619                         else
2620                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2621                                                 RTE_ETH_EVENT_INTR_LSC,
2622                                                 NULL, NULL);
2623
2624                 } else {
2625                         if (internals->link_down_delay_ms > 0)
2626                                 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2627                                                 bond_ethdev_delayed_lsc_propagation,
2628                                                 (void *)bonded_eth_dev);
2629                         else
2630                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2631                                                 RTE_ETH_EVENT_INTR_LSC,
2632                                                 NULL, NULL);
2633                 }
2634         }
2635
2636         rte_spinlock_unlock(&internals->lsc_lock);
2637
2638         return rc;
2639 }
2640
2641 static int
2642 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2643                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2644 {
2645         unsigned i, j;
2646         int result = 0;
2647         int slave_reta_size;
2648         unsigned reta_count;
2649         struct bond_dev_private *internals = dev->data->dev_private;
2650
2651         if (reta_size != internals->reta_size)
2652                 return -EINVAL;
2653
2654          /* Copy RETA table */
2655         reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2656
2657         for (i = 0; i < reta_count; i++) {
2658                 internals->reta_conf[i].mask = reta_conf[i].mask;
2659                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2660                         if ((reta_conf[i].mask >> j) & 0x01)
2661                                 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2662         }
2663
2664         /* Fill rest of array */
2665         for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2666                 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2667                                 sizeof(internals->reta_conf[0]) * reta_count);
2668
2669         /* Propagate RETA over slaves */
2670         for (i = 0; i < internals->slave_count; i++) {
2671                 slave_reta_size = internals->slaves[i].reta_size;
2672                 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2673                                 &internals->reta_conf[0], slave_reta_size);
2674                 if (result < 0)
2675                         return result;
2676         }
2677
2678         return 0;
2679 }
2680
2681 static int
2682 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2683                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2684 {
2685         int i, j;
2686         struct bond_dev_private *internals = dev->data->dev_private;
2687
2688         if (reta_size != internals->reta_size)
2689                 return -EINVAL;
2690
2691          /* Copy RETA table */
2692         for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2693                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2694                         if ((reta_conf[i].mask >> j) & 0x01)
2695                                 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2696
2697         return 0;
2698 }
2699
2700 static int
2701 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2702                 struct rte_eth_rss_conf *rss_conf)
2703 {
2704         int i, result = 0;
2705         struct bond_dev_private *internals = dev->data->dev_private;
2706         struct rte_eth_rss_conf bond_rss_conf;
2707
2708         memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2709
2710         bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2711
2712         if (bond_rss_conf.rss_hf != 0)
2713                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2714
2715         if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2716                         sizeof(internals->rss_key)) {
2717                 if (bond_rss_conf.rss_key_len == 0)
2718                         bond_rss_conf.rss_key_len = 40;
2719                 internals->rss_key_len = bond_rss_conf.rss_key_len;
2720                 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2721                                 internals->rss_key_len);
2722         }
2723
2724         for (i = 0; i < internals->slave_count; i++) {
2725                 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2726                                 &bond_rss_conf);
2727                 if (result < 0)
2728                         return result;
2729         }
2730
2731         return 0;
2732 }
2733
2734 static int
2735 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2736                 struct rte_eth_rss_conf *rss_conf)
2737 {
2738         struct bond_dev_private *internals = dev->data->dev_private;
2739
2740         rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2741         rss_conf->rss_key_len = internals->rss_key_len;
2742         if (rss_conf->rss_key)
2743                 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2744
2745         return 0;
2746 }
2747
2748 const struct eth_dev_ops default_dev_ops = {
2749         .dev_start            = bond_ethdev_start,
2750         .dev_stop             = bond_ethdev_stop,
2751         .dev_close            = bond_ethdev_close,
2752         .dev_configure        = bond_ethdev_configure,
2753         .dev_infos_get        = bond_ethdev_info,
2754         .vlan_filter_set      = bond_ethdev_vlan_filter_set,
2755         .rx_queue_setup       = bond_ethdev_rx_queue_setup,
2756         .tx_queue_setup       = bond_ethdev_tx_queue_setup,
2757         .rx_queue_release     = bond_ethdev_rx_queue_release,
2758         .tx_queue_release     = bond_ethdev_tx_queue_release,
2759         .link_update          = bond_ethdev_link_update,
2760         .stats_get            = bond_ethdev_stats_get,
2761         .stats_reset          = bond_ethdev_stats_reset,
2762         .promiscuous_enable   = bond_ethdev_promiscuous_enable,
2763         .promiscuous_disable  = bond_ethdev_promiscuous_disable,
2764         .reta_update          = bond_ethdev_rss_reta_update,
2765         .reta_query           = bond_ethdev_rss_reta_query,
2766         .rss_hash_update      = bond_ethdev_rss_hash_update,
2767         .rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get
2768 };
2769
2770 static int
2771 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
2772 {
2773         const char *name = rte_vdev_device_name(dev);
2774         uint8_t socket_id = dev->device.numa_node;
2775         struct bond_dev_private *internals = NULL;
2776         struct rte_eth_dev *eth_dev = NULL;
2777         uint32_t vlan_filter_bmp_size;
2778
2779         /* now do all data allocation - for eth_dev structure, dummy pci driver
2780          * and internal (private) data
2781          */
2782
2783         /* reserve an ethdev entry */
2784         eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
2785         if (eth_dev == NULL) {
2786                 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
2787                 goto err;
2788         }
2789
2790         internals = eth_dev->data->dev_private;
2791         eth_dev->data->nb_rx_queues = (uint16_t)1;
2792         eth_dev->data->nb_tx_queues = (uint16_t)1;
2793
2794         eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN, 0,
2795                         socket_id);
2796         if (eth_dev->data->mac_addrs == NULL) {
2797                 RTE_BOND_LOG(ERR, "Unable to malloc mac_addrs");
2798                 goto err;
2799         }
2800
2801         eth_dev->dev_ops = &default_dev_ops;
2802         eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC;
2803
2804         rte_spinlock_init(&internals->lock);
2805         rte_spinlock_init(&internals->lsc_lock);
2806
2807         internals->port_id = eth_dev->data->port_id;
2808         internals->mode = BONDING_MODE_INVALID;
2809         internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
2810         internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
2811         internals->xmit_hash = xmit_l2_hash;
2812         internals->user_defined_mac = 0;
2813
2814         internals->link_status_polling_enabled = 0;
2815
2816         internals->link_status_polling_interval_ms =
2817                 DEFAULT_POLLING_INTERVAL_10_MS;
2818         internals->link_down_delay_ms = 0;
2819         internals->link_up_delay_ms = 0;
2820
2821         internals->slave_count = 0;
2822         internals->active_slave_count = 0;
2823         internals->rx_offload_capa = 0;
2824         internals->tx_offload_capa = 0;
2825         internals->candidate_max_rx_pktlen = 0;
2826         internals->max_rx_pktlen = 0;
2827
2828         /* Initially allow to choose any offload type */
2829         internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
2830
2831         memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
2832         memset(internals->slaves, 0, sizeof(internals->slaves));
2833
2834         /* Set mode 4 default configuration */
2835         bond_mode_8023ad_setup(eth_dev, NULL);
2836         if (bond_ethdev_mode_set(eth_dev, mode)) {
2837                 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode too %d",
2838                                  eth_dev->data->port_id, mode);
2839                 goto err;
2840         }
2841
2842         vlan_filter_bmp_size =
2843                 rte_bitmap_get_memory_footprint(ETHER_MAX_VLAN_ID + 1);
2844         internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
2845                                                    RTE_CACHE_LINE_SIZE);
2846         if (internals->vlan_filter_bmpmem == NULL) {
2847                 RTE_BOND_LOG(ERR,
2848                              "Failed to allocate vlan bitmap for bonded device %u\n",
2849                              eth_dev->data->port_id);
2850                 goto err;
2851         }
2852
2853         internals->vlan_filter_bmp = rte_bitmap_init(ETHER_MAX_VLAN_ID + 1,
2854                         internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
2855         if (internals->vlan_filter_bmp == NULL) {
2856                 RTE_BOND_LOG(ERR,
2857                              "Failed to init vlan bitmap for bonded device %u\n",
2858                              eth_dev->data->port_id);
2859                 rte_free(internals->vlan_filter_bmpmem);
2860                 goto err;
2861         }
2862
2863         return eth_dev->data->port_id;
2864
2865 err:
2866         rte_free(internals);
2867         if (eth_dev != NULL) {
2868                 rte_free(eth_dev->data->mac_addrs);
2869                 rte_eth_dev_release_port(eth_dev);
2870         }
2871         return -1;
2872 }
2873
2874 static int
2875 bond_probe(struct rte_vdev_device *dev)
2876 {
2877         const char *name;
2878         struct bond_dev_private *internals;
2879         struct rte_kvargs *kvlist;
2880         uint8_t bonding_mode, socket_id/*, agg_mode*/;
2881         int  arg_count, port_id;
2882         uint8_t agg_mode;
2883
2884         if (!dev)
2885                 return -EINVAL;
2886
2887         name = rte_vdev_device_name(dev);
2888         RTE_LOG(INFO, EAL, "Initializing pmd_bond for %s\n", name);
2889
2890         kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
2891                 pmd_bond_init_valid_arguments);
2892         if (kvlist == NULL)
2893                 return -1;
2894
2895         /* Parse link bonding mode */
2896         if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
2897                 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
2898                                 &bond_ethdev_parse_slave_mode_kvarg,
2899                                 &bonding_mode) != 0) {
2900                         RTE_LOG(ERR, EAL, "Invalid mode for bonded device %s\n",
2901                                         name);
2902                         goto parse_error;
2903                 }
2904         } else {
2905                 RTE_LOG(ERR, EAL, "Mode must be specified only once for bonded "
2906                                 "device %s\n", name);
2907                 goto parse_error;
2908         }
2909
2910         /* Parse socket id to create bonding device on */
2911         arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
2912         if (arg_count == 1) {
2913                 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
2914                                 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
2915                                 != 0) {
2916                         RTE_LOG(ERR, EAL, "Invalid socket Id specified for "
2917                                         "bonded device %s\n", name);
2918                         goto parse_error;
2919                 }
2920         } else if (arg_count > 1) {
2921                 RTE_LOG(ERR, EAL, "Socket Id can be specified only once for "
2922                                 "bonded device %s\n", name);
2923                 goto parse_error;
2924         } else {
2925                 socket_id = rte_socket_id();
2926         }
2927
2928         dev->device.numa_node = socket_id;
2929
2930         /* Create link bonding eth device */
2931         port_id = bond_alloc(dev, bonding_mode);
2932         if (port_id < 0) {
2933                 RTE_LOG(ERR, EAL, "Failed to create socket %s in mode %u on "
2934                                 "socket %u.\n", name, bonding_mode, socket_id);
2935                 goto parse_error;
2936         }
2937         internals = rte_eth_devices[port_id].data->dev_private;
2938         internals->kvlist = kvlist;
2939
2940
2941         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
2942                 if (rte_kvargs_process(kvlist,
2943                                 PMD_BOND_AGG_MODE_KVARG,
2944                                 &bond_ethdev_parse_slave_agg_mode_kvarg,
2945                                 &agg_mode) != 0) {
2946                         RTE_LOG(ERR, EAL,
2947                                         "Failed to parse agg selection mode for bonded device %s\n",
2948                                         name);
2949                         goto parse_error;
2950                 }
2951
2952                 if (internals->mode == BONDING_MODE_8023AD) {
2953                         int ret = rte_eth_bond_8023ad_agg_selection_set(port_id,
2954                                         agg_mode);
2955                         if (ret < 0) {
2956                                 RTE_BOND_LOG(ERR,
2957                                         "Invalid args for agg selection set "
2958                                         "for bonded device %s", name);
2959                                 return -1;
2960                         }
2961                 }
2962         } else {
2963                 rte_eth_bond_8023ad_agg_selection_set(port_id, AGG_STABLE);
2964         }
2965
2966         RTE_LOG(INFO, EAL, "Create bonded device %s on port %d in mode %u on "
2967                         "socket %u.\n", name, port_id, bonding_mode, socket_id);
2968         return 0;
2969
2970 parse_error:
2971         rte_kvargs_free(kvlist);
2972
2973         return -1;
2974 }
2975
2976 static int
2977 bond_remove(struct rte_vdev_device *dev)
2978 {
2979         struct rte_eth_dev *eth_dev;
2980         struct bond_dev_private *internals;
2981         const char *name;
2982
2983         if (!dev)
2984                 return -EINVAL;
2985
2986         name = rte_vdev_device_name(dev);
2987         RTE_LOG(INFO, EAL, "Uninitializing pmd_bond for %s\n", name);
2988
2989         /* now free all data allocation - for eth_dev structure,
2990          * dummy pci driver and internal (private) data
2991          */
2992
2993         /* find an ethdev entry */
2994         eth_dev = rte_eth_dev_allocated(name);
2995         if (eth_dev == NULL)
2996                 return -ENODEV;
2997
2998         RTE_ASSERT(eth_dev->device == &dev->device);
2999
3000         internals = eth_dev->data->dev_private;
3001         if (internals->slave_count != 0)
3002                 return -EBUSY;
3003
3004         if (eth_dev->data->dev_started == 1) {
3005                 bond_ethdev_stop(eth_dev);
3006                 bond_ethdev_close(eth_dev);
3007         }
3008
3009         eth_dev->dev_ops = NULL;
3010         eth_dev->rx_pkt_burst = NULL;
3011         eth_dev->tx_pkt_burst = NULL;
3012
3013         internals = eth_dev->data->dev_private;
3014         /* Try to release mempool used in mode6. If the bond
3015          * device is not mode6, free the NULL is not problem.
3016          */
3017         rte_mempool_free(internals->mode6.mempool);
3018         rte_bitmap_free(internals->vlan_filter_bmp);
3019         rte_free(internals->vlan_filter_bmpmem);
3020         rte_free(eth_dev->data->dev_private);
3021         rte_free(eth_dev->data->mac_addrs);
3022
3023         rte_eth_dev_release_port(eth_dev);
3024
3025         return 0;
3026 }
3027
3028 /* this part will resolve the slave portids after all the other pdev and vdev
3029  * have been allocated */
3030 static int
3031 bond_ethdev_configure(struct rte_eth_dev *dev)
3032 {
3033         const char *name = dev->device->name;
3034         struct bond_dev_private *internals = dev->data->dev_private;
3035         struct rte_kvargs *kvlist = internals->kvlist;
3036         int arg_count;
3037         uint16_t port_id = dev - rte_eth_devices;
3038         uint8_t agg_mode;
3039
3040         static const uint8_t default_rss_key[40] = {
3041                 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3042                 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3043                 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3044                 0xBE, 0xAC, 0x01, 0xFA
3045         };
3046
3047         unsigned i, j;
3048
3049         /*
3050          * If RSS is enabled, fill table with default values and
3051          * set key to the the value specified in port RSS configuration.
3052          * Fall back to default RSS key if the key is not specified
3053          */
3054         if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
3055                 if (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key != NULL) {
3056                         internals->rss_key_len =
3057                                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
3058                         memcpy(internals->rss_key,
3059                                dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key,
3060                                internals->rss_key_len);
3061                 } else {
3062                         internals->rss_key_len = sizeof(default_rss_key);
3063                         memcpy(internals->rss_key, default_rss_key,
3064                                internals->rss_key_len);
3065                 }
3066
3067                 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3068                         internals->reta_conf[i].mask = ~0LL;
3069                         for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3070                                 internals->reta_conf[i].reta[j] =
3071                                                 (i * RTE_RETA_GROUP_SIZE + j) %
3072                                                 dev->data->nb_rx_queues;
3073                 }
3074         }
3075
3076         /* set the max_rx_pktlen */
3077         internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3078
3079         /*
3080          * if no kvlist, it means that this bonded device has been created
3081          * through the bonding api.
3082          */
3083         if (!kvlist)
3084                 return 0;
3085
3086         /* Parse MAC address for bonded device */
3087         arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3088         if (arg_count == 1) {
3089                 struct ether_addr bond_mac;
3090
3091                 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3092                                 &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3093                         RTE_LOG(INFO, EAL, "Invalid mac address for bonded device %s\n",
3094                                         name);
3095                         return -1;
3096                 }
3097
3098                 /* Set MAC address */
3099                 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3100                         RTE_LOG(ERR, EAL,
3101                                         "Failed to set mac address on bonded device %s\n",
3102                                         name);
3103                         return -1;
3104                 }
3105         } else if (arg_count > 1) {
3106                 RTE_LOG(ERR, EAL,
3107                                 "MAC address can be specified only once for bonded device %s\n",
3108                                 name);
3109                 return -1;
3110         }
3111
3112         /* Parse/set balance mode transmit policy */
3113         arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3114         if (arg_count == 1) {
3115                 uint8_t xmit_policy;
3116
3117                 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3118                                 &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3119                                                 0) {
3120                         RTE_LOG(INFO, EAL,
3121                                         "Invalid xmit policy specified for bonded device %s\n",
3122                                         name);
3123                         return -1;
3124                 }
3125
3126                 /* Set balance mode transmit policy*/
3127                 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3128                         RTE_LOG(ERR, EAL,
3129                                         "Failed to set balance xmit policy on bonded device %s\n",
3130                                         name);
3131                         return -1;
3132                 }
3133         } else if (arg_count > 1) {
3134                 RTE_LOG(ERR, EAL,
3135                                 "Transmit policy can be specified only once for bonded device"
3136                                 " %s\n", name);
3137                 return -1;
3138         }
3139
3140         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3141                 if (rte_kvargs_process(kvlist,
3142                                 PMD_BOND_AGG_MODE_KVARG,
3143                                 &bond_ethdev_parse_slave_agg_mode_kvarg,
3144                                 &agg_mode) != 0) {
3145                         RTE_LOG(ERR, EAL,
3146                                         "Failed to parse agg selection mode for bonded device %s\n",
3147                                         name);
3148                 }
3149                 if (internals->mode == BONDING_MODE_8023AD)
3150                                 rte_eth_bond_8023ad_agg_selection_set(port_id,
3151                                                 agg_mode);
3152         }
3153
3154         /* Parse/add slave ports to bonded device */
3155         if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3156                 struct bond_ethdev_slave_ports slave_ports;
3157                 unsigned i;
3158
3159                 memset(&slave_ports, 0, sizeof(slave_ports));
3160
3161                 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3162                                 &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3163                         RTE_LOG(ERR, EAL,
3164                                         "Failed to parse slave ports for bonded device %s\n",
3165                                         name);
3166                         return -1;
3167                 }
3168
3169                 for (i = 0; i < slave_ports.slave_count; i++) {
3170                         if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3171                                 RTE_LOG(ERR, EAL,
3172                                                 "Failed to add port %d as slave to bonded device %s\n",
3173                                                 slave_ports.slaves[i], name);
3174                         }
3175                 }
3176
3177         } else {
3178                 RTE_LOG(INFO, EAL, "No slaves specified for bonded device %s\n", name);
3179                 return -1;
3180         }
3181
3182         /* Parse/set primary slave port id*/
3183         arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3184         if (arg_count == 1) {
3185                 uint16_t primary_slave_port_id;
3186
3187                 if (rte_kvargs_process(kvlist,
3188                                 PMD_BOND_PRIMARY_SLAVE_KVARG,
3189                                 &bond_ethdev_parse_primary_slave_port_id_kvarg,
3190                                 &primary_slave_port_id) < 0) {
3191                         RTE_LOG(INFO, EAL,
3192                                         "Invalid primary slave port id specified for bonded device"
3193                                         " %s\n", name);
3194                         return -1;
3195                 }
3196
3197                 /* Set balance mode transmit policy*/
3198                 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3199                                 != 0) {
3200                         RTE_LOG(ERR, EAL,
3201                                         "Failed to set primary slave port %d on bonded device %s\n",
3202                                         primary_slave_port_id, name);
3203                         return -1;
3204                 }
3205         } else if (arg_count > 1) {
3206                 RTE_LOG(INFO, EAL,
3207                                 "Primary slave can be specified only once for bonded device"
3208                                 " %s\n", name);
3209                 return -1;
3210         }
3211
3212         /* Parse link status monitor polling interval */
3213         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3214         if (arg_count == 1) {
3215                 uint32_t lsc_poll_interval_ms;
3216
3217                 if (rte_kvargs_process(kvlist,
3218                                 PMD_BOND_LSC_POLL_PERIOD_KVARG,
3219                                 &bond_ethdev_parse_time_ms_kvarg,
3220                                 &lsc_poll_interval_ms) < 0) {
3221                         RTE_LOG(INFO, EAL,
3222                                         "Invalid lsc polling interval value specified for bonded"
3223                                         " device %s\n", name);
3224                         return -1;
3225                 }
3226
3227                 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3228                                 != 0) {
3229                         RTE_LOG(ERR, EAL,
3230                                         "Failed to set lsc monitor polling interval (%u ms) on"
3231                                         " bonded device %s\n", lsc_poll_interval_ms, name);
3232                         return -1;
3233                 }
3234         } else if (arg_count > 1) {
3235                 RTE_LOG(INFO, EAL,
3236                                 "LSC polling interval can be specified only once for bonded"
3237                                 " device %s\n", name);
3238                 return -1;
3239         }
3240
3241         /* Parse link up interrupt propagation delay */
3242         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3243         if (arg_count == 1) {
3244                 uint32_t link_up_delay_ms;
3245
3246                 if (rte_kvargs_process(kvlist,
3247                                 PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3248                                 &bond_ethdev_parse_time_ms_kvarg,
3249                                 &link_up_delay_ms) < 0) {
3250                         RTE_LOG(INFO, EAL,
3251                                         "Invalid link up propagation delay value specified for"
3252                                         " bonded device %s\n", name);
3253                         return -1;
3254                 }
3255
3256                 /* Set balance mode transmit policy*/
3257                 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3258                                 != 0) {
3259                         RTE_LOG(ERR, EAL,
3260                                         "Failed to set link up propagation delay (%u ms) on bonded"
3261                                         " device %s\n", link_up_delay_ms, name);
3262                         return -1;
3263                 }
3264         } else if (arg_count > 1) {
3265                 RTE_LOG(INFO, EAL,
3266                                 "Link up propagation delay can be specified only once for"
3267                                 " bonded device %s\n", name);
3268                 return -1;
3269         }
3270
3271         /* Parse link down interrupt propagation delay */
3272         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3273         if (arg_count == 1) {
3274                 uint32_t link_down_delay_ms;
3275
3276                 if (rte_kvargs_process(kvlist,
3277                                 PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3278                                 &bond_ethdev_parse_time_ms_kvarg,
3279                                 &link_down_delay_ms) < 0) {
3280                         RTE_LOG(INFO, EAL,
3281                                         "Invalid link down propagation delay value specified for"
3282                                         " bonded device %s\n", name);
3283                         return -1;
3284                 }
3285
3286                 /* Set balance mode transmit policy*/
3287                 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3288                                 != 0) {
3289                         RTE_LOG(ERR, EAL,
3290                                         "Failed to set link down propagation delay (%u ms) on"
3291                                         " bonded device %s\n", link_down_delay_ms, name);
3292                         return -1;
3293                 }
3294         } else if (arg_count > 1) {
3295                 RTE_LOG(INFO, EAL,
3296                                 "Link down propagation delay can be specified only once for"
3297                                 " bonded device %s\n", name);
3298                 return -1;
3299         }
3300
3301         return 0;
3302 }
3303
3304 struct rte_vdev_driver pmd_bond_drv = {
3305         .probe = bond_probe,
3306         .remove = bond_remove,
3307 };
3308
3309 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3310 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3311
3312 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3313         "slave=<ifc> "
3314         "primary=<ifc> "
3315         "mode=[0-6] "
3316         "xmit_policy=[l2 | l23 | l34] "
3317         "agg_mode=[count | stable | bandwidth] "
3318         "socket_id=<int> "
3319         "mac=<mac addr> "
3320         "lsc_poll_period_ms=<int> "
3321         "up_delay=<int> "
3322         "down_delay=<int>");