3ee70baa05ea0c6e3f13323e06ffd7df5c59b4a0
[deb_dpdk.git] / drivers / net / bonding / rte_eth_bond_pmd.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 #include <stdlib.h>
34 #include <netinet/in.h>
35
36 #include <rte_mbuf.h>
37 #include <rte_malloc.h>
38 #include <rte_ethdev.h>
39 #include <rte_ethdev_vdev.h>
40 #include <rte_tcp.h>
41 #include <rte_udp.h>
42 #include <rte_ip.h>
43 #include <rte_ip_frag.h>
44 #include <rte_devargs.h>
45 #include <rte_kvargs.h>
46 #include <rte_vdev.h>
47 #include <rte_alarm.h>
48 #include <rte_cycles.h>
49
50 #include "rte_eth_bond.h"
51 #include "rte_eth_bond_private.h"
52 #include "rte_eth_bond_8023ad_private.h"
53
54 #define REORDER_PERIOD_MS 10
55 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
56
57 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
58
59 /* Table for statistics in mode 5 TLB */
60 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
61
62 static inline size_t
63 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
64 {
65         size_t vlan_offset = 0;
66
67         if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
68                 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
69
70                 vlan_offset = sizeof(struct vlan_hdr);
71                 *proto = vlan_hdr->eth_proto;
72
73                 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
74                         vlan_hdr = vlan_hdr + 1;
75                         *proto = vlan_hdr->eth_proto;
76                         vlan_offset += sizeof(struct vlan_hdr);
77                 }
78         }
79         return vlan_offset;
80 }
81
82 static uint16_t
83 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
84 {
85         struct bond_dev_private *internals;
86
87         uint16_t num_rx_slave = 0;
88         uint16_t num_rx_total = 0;
89
90         int i;
91
92         /* Cast to structure, containing bonded device's port id and queue id */
93         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
94
95         internals = bd_rx_q->dev_private;
96
97
98         for (i = 0; i < internals->active_slave_count && nb_pkts; i++) {
99                 /* Offset of pointer to *bufs increases as packets are received
100                  * from other slaves */
101                 num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
102                                 bd_rx_q->queue_id, bufs + num_rx_total, nb_pkts);
103                 if (num_rx_slave) {
104                         num_rx_total += num_rx_slave;
105                         nb_pkts -= num_rx_slave;
106                 }
107         }
108
109         return num_rx_total;
110 }
111
112 static uint16_t
113 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
114                 uint16_t nb_pkts)
115 {
116         struct bond_dev_private *internals;
117
118         /* Cast to structure, containing bonded device's port id and queue id */
119         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
120
121         internals = bd_rx_q->dev_private;
122
123         return rte_eth_rx_burst(internals->current_primary_port,
124                         bd_rx_q->queue_id, bufs, nb_pkts);
125 }
126
127 static inline uint8_t
128 is_lacp_packets(uint16_t ethertype, uint8_t subtype, uint16_t vlan_tci)
129 {
130         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
131
132         return !vlan_tci && (ethertype == ether_type_slow_be &&
133                 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
134 }
135
136 /*****************************************************************************
137  * Flow director's setup for mode 4 optimization
138  */
139
140 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
141         .dst.addr_bytes = { 0 },
142         .src.addr_bytes = { 0 },
143         .type = RTE_BE16(ETHER_TYPE_SLOW),
144 };
145
146 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
147         .dst.addr_bytes = { 0 },
148         .src.addr_bytes = { 0 },
149         .type = 0xFFFF,
150 };
151
152 static struct rte_flow_item flow_item_8023ad[] = {
153         {
154                 .type = RTE_FLOW_ITEM_TYPE_ETH,
155                 .spec = &flow_item_eth_type_8023ad,
156                 .last = NULL,
157                 .mask = &flow_item_eth_mask_type_8023ad,
158         },
159         {
160                 .type = RTE_FLOW_ITEM_TYPE_END,
161                 .spec = NULL,
162                 .last = NULL,
163                 .mask = NULL,
164         }
165 };
166
167 const struct rte_flow_attr flow_attr_8023ad = {
168         .group = 0,
169         .priority = 0,
170         .ingress = 1,
171         .egress = 0,
172         .reserved = 0,
173 };
174
175 int
176 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
177                 uint8_t slave_port) {
178         struct rte_flow_error error;
179         struct bond_dev_private *internals = (struct bond_dev_private *)
180                         (bond_dev->data->dev_private);
181
182         struct rte_flow_action_queue lacp_queue_conf = {
183                 .index = internals->mode4.dedicated_queues.rx_qid,
184         };
185
186         const struct rte_flow_action actions[] = {
187                 {
188                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
189                         .conf = &lacp_queue_conf
190                 },
191                 {
192                         .type = RTE_FLOW_ACTION_TYPE_END,
193                 }
194         };
195
196         int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
197                         flow_item_8023ad, actions, &error);
198         if (ret < 0)
199                 return -1;
200
201         return 0;
202 }
203
204 int
205 bond_8023ad_slow_pkt_hw_filter_supported(uint8_t port_id) {
206         struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
207         struct bond_dev_private *internals = (struct bond_dev_private *)
208                         (bond_dev->data->dev_private);
209         struct rte_eth_dev_info bond_info, slave_info;
210         uint8_t idx;
211
212         /* Verify if all slaves in bonding supports flow director and */
213         if (internals->slave_count > 0) {
214                 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
215
216                 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
217                 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
218
219                 for (idx = 0; idx < internals->slave_count; idx++) {
220                         rte_eth_dev_info_get(internals->slaves[idx].port_id,
221                                         &slave_info);
222
223                         if (bond_ethdev_8023ad_flow_verify(bond_dev,
224                                         internals->slaves[idx].port_id) != 0)
225                                 return -1;
226                 }
227         }
228
229         return 0;
230 }
231
232 int
233 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint8_t slave_port) {
234
235         struct rte_flow_error error;
236         struct bond_dev_private *internals = (struct bond_dev_private *)
237                         (bond_dev->data->dev_private);
238
239         struct rte_flow_action_queue lacp_queue_conf = {
240                 .index = internals->mode4.dedicated_queues.rx_qid,
241         };
242
243         const struct rte_flow_action actions[] = {
244                 {
245                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
246                         .conf = &lacp_queue_conf
247                 },
248                 {
249                         .type = RTE_FLOW_ACTION_TYPE_END,
250                 }
251         };
252
253         internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
254                         &flow_attr_8023ad, flow_item_8023ad, actions, &error);
255         if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
256                 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
257                                 "(slave_port=%d queue_id=%d)",
258                                 error.message, slave_port,
259                                 internals->mode4.dedicated_queues.rx_qid);
260                 return -1;
261         }
262
263         return 0;
264 }
265
266 static uint16_t
267 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
268                 uint16_t nb_pkts)
269 {
270         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
271         struct bond_dev_private *internals = bd_rx_q->dev_private;
272         uint16_t num_rx_total = 0;      /* Total number of received packets */
273         uint8_t slaves[RTE_MAX_ETHPORTS];
274         uint8_t slave_count;
275
276         uint8_t i, idx;
277
278         /* Copy slave list to protect against slave up/down changes during tx
279          * bursting */
280         slave_count = internals->active_slave_count;
281         memcpy(slaves, internals->active_slaves,
282                         sizeof(internals->active_slaves[0]) * slave_count);
283
284         for (i = 0, idx = internals->active_slave;
285                         i < slave_count && num_rx_total < nb_pkts; i++, idx++) {
286                 idx = idx % slave_count;
287
288                 /* Read packets from this slave */
289                 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
290                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
291         }
292
293         internals->active_slave = idx;
294
295         return num_rx_total;
296 }
297
298 static uint16_t
299 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
300                 uint16_t nb_pkts)
301 {
302         struct bond_dev_private *internals;
303         struct bond_tx_queue *bd_tx_q;
304
305         uint8_t num_of_slaves;
306         uint8_t slaves[RTE_MAX_ETHPORTS];
307          /* positions in slaves, not ID */
308         uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
309         uint8_t distributing_count;
310
311         uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
312         uint16_t i, op_slave_idx;
313
314         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
315
316         /* Total amount of packets in slave_bufs */
317         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
318         /* Slow packets placed in each slave */
319
320         if (unlikely(nb_pkts == 0))
321                 return 0;
322
323         bd_tx_q = (struct bond_tx_queue *)queue;
324         internals = bd_tx_q->dev_private;
325
326         /* Copy slave list to protect against slave up/down changes during tx
327          * bursting */
328         num_of_slaves = internals->active_slave_count;
329         if (num_of_slaves < 1)
330                 return num_tx_total;
331
332         memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) *
333                         num_of_slaves);
334
335         distributing_count = 0;
336         for (i = 0; i < num_of_slaves; i++) {
337                 struct port *port = &mode_8023ad_ports[slaves[i]];
338                 if (ACTOR_STATE(port, DISTRIBUTING))
339                         distributing_offsets[distributing_count++] = i;
340         }
341
342         if (likely(distributing_count > 0)) {
343                 /* Populate slaves mbuf with the packets which are to be sent */
344                 for (i = 0; i < nb_pkts; i++) {
345                         /* Select output slave using hash based on xmit policy */
346                         op_slave_idx = internals->xmit_hash(bufs[i],
347                                         distributing_count);
348
349                         /* Populate slave mbuf arrays with mbufs for that slave.
350                          * Use only slaves that are currently distributing.
351                          */
352                         uint8_t slave_offset =
353                                         distributing_offsets[op_slave_idx];
354                         slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] =
355                                         bufs[i];
356                         slave_nb_pkts[slave_offset]++;
357                 }
358         }
359
360         /* Send packet burst on each slave device */
361         for (i = 0; i < num_of_slaves; i++) {
362                 if (slave_nb_pkts[i] == 0)
363                         continue;
364
365                 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
366                                 slave_bufs[i], slave_nb_pkts[i]);
367
368                 num_tx_total += num_tx_slave;
369                 num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
370
371                 /* If tx burst fails move packets to end of bufs */
372                 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
373                         uint16_t j = nb_pkts - num_tx_fail_total;
374                         for ( ; num_tx_slave < slave_nb_pkts[i]; j++,
375                                         num_tx_slave++)
376                                 bufs[j] = slave_bufs[i][num_tx_slave];
377                 }
378         }
379
380         return num_tx_total;
381 }
382
383
384 static uint16_t
385 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
386                 uint16_t nb_pkts)
387 {
388         /* Cast to structure, containing bonded device's port id and queue id */
389         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
390         struct bond_dev_private *internals = bd_rx_q->dev_private;
391         struct ether_addr bond_mac;
392
393         struct ether_hdr *hdr;
394
395         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
396         uint16_t num_rx_total = 0;      /* Total number of received packets */
397         uint8_t slaves[RTE_MAX_ETHPORTS];
398         uint8_t slave_count, idx;
399
400         uint8_t collecting;  /* current slave collecting status */
401         const uint8_t promisc = internals->promiscuous_en;
402         uint8_t i, j, k;
403         uint8_t subtype;
404
405         rte_eth_macaddr_get(internals->port_id, &bond_mac);
406         /* Copy slave list to protect against slave up/down changes during tx
407          * bursting */
408         slave_count = internals->active_slave_count;
409         memcpy(slaves, internals->active_slaves,
410                         sizeof(internals->active_slaves[0]) * slave_count);
411
412         idx = internals->active_slave;
413         if (idx >= slave_count) {
414                 internals->active_slave = 0;
415                 idx = 0;
416         }
417         for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
418                 j = num_rx_total;
419                 collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[idx]],
420                                          COLLECTING);
421
422                 /* Read packets from this slave */
423                 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
424                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
425
426                 for (k = j; k < 2 && k < num_rx_total; k++)
427                         rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
428
429                 /* Handle slow protocol packets. */
430                 while (j < num_rx_total) {
431
432                         /* If packet is not pure L2 and is known, skip it */
433                         if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) {
434                                 j++;
435                                 continue;
436                         }
437
438                         if (j + 3 < num_rx_total)
439                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
440
441                         hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
442                         subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
443
444                         /* Remove packet from array if it is slow packet or slave is not
445                          * in collecting state or bonding interface is not in promiscuous
446                          * mode and packet address does not match. */
447                         if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]->vlan_tci) ||
448                                 !collecting || (!promisc &&
449                                         !is_multicast_ether_addr(&hdr->d_addr) &&
450                                         !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
451
452                                 if (hdr->ether_type == ether_type_slow_be) {
453                                         bond_mode_8023ad_handle_slow_pkt(
454                                             internals, slaves[idx], bufs[j]);
455                                 } else
456                                         rte_pktmbuf_free(bufs[j]);
457
458                                 /* Packet is managed by mode 4 or dropped, shift the array */
459                                 num_rx_total--;
460                                 if (j < num_rx_total) {
461                                         memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
462                                                 (num_rx_total - j));
463                                 }
464                         } else
465                                 j++;
466                 }
467                 if (unlikely(++idx == slave_count))
468                         idx = 0;
469         }
470
471         internals->active_slave = idx;
472         return num_rx_total;
473 }
474
475 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
476 uint32_t burstnumberRX;
477 uint32_t burstnumberTX;
478
479 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
480
481 static void
482 arp_op_name(uint16_t arp_op, char *buf)
483 {
484         switch (arp_op) {
485         case ARP_OP_REQUEST:
486                 snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
487                 return;
488         case ARP_OP_REPLY:
489                 snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
490                 return;
491         case ARP_OP_REVREQUEST:
492                 snprintf(buf, sizeof("Reverse ARP Request"), "%s",
493                                 "Reverse ARP Request");
494                 return;
495         case ARP_OP_REVREPLY:
496                 snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
497                                 "Reverse ARP Reply");
498                 return;
499         case ARP_OP_INVREQUEST:
500                 snprintf(buf, sizeof("Peer Identify Request"), "%s",
501                                 "Peer Identify Request");
502                 return;
503         case ARP_OP_INVREPLY:
504                 snprintf(buf, sizeof("Peer Identify Reply"), "%s",
505                                 "Peer Identify Reply");
506                 return;
507         default:
508                 break;
509         }
510         snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
511         return;
512 }
513 #endif
514 #define MaxIPv4String   16
515 static void
516 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
517 {
518         uint32_t ipv4_addr;
519
520         ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
521         snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
522                 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
523                 ipv4_addr & 0xFF);
524 }
525
526 #define MAX_CLIENTS_NUMBER      128
527 uint8_t active_clients;
528 struct client_stats_t {
529         uint8_t port;
530         uint32_t ipv4_addr;
531         uint32_t ipv4_rx_packets;
532         uint32_t ipv4_tx_packets;
533 };
534 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
535
536 static void
537 update_client_stats(uint32_t addr, uint8_t port, uint32_t *TXorRXindicator)
538 {
539         int i = 0;
540
541         for (; i < MAX_CLIENTS_NUMBER; i++)     {
542                 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port))      {
543                         /* Just update RX packets number for this client */
544                         if (TXorRXindicator == &burstnumberRX)
545                                 client_stats[i].ipv4_rx_packets++;
546                         else
547                                 client_stats[i].ipv4_tx_packets++;
548                         return;
549                 }
550         }
551         /* We have a new client. Insert him to the table, and increment stats */
552         if (TXorRXindicator == &burstnumberRX)
553                 client_stats[active_clients].ipv4_rx_packets++;
554         else
555                 client_stats[active_clients].ipv4_tx_packets++;
556         client_stats[active_clients].ipv4_addr = addr;
557         client_stats[active_clients].port = port;
558         active_clients++;
559
560 }
561
562 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
563 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber)     \
564                 RTE_LOG(DEBUG, PMD, \
565                 "%s " \
566                 "port:%d " \
567                 "SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
568                 "SrcIP:%s " \
569                 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
570                 "DstIP:%s " \
571                 "%s " \
572                 "%d\n", \
573                 info, \
574                 port, \
575                 eth_h->s_addr.addr_bytes[0], \
576                 eth_h->s_addr.addr_bytes[1], \
577                 eth_h->s_addr.addr_bytes[2], \
578                 eth_h->s_addr.addr_bytes[3], \
579                 eth_h->s_addr.addr_bytes[4], \
580                 eth_h->s_addr.addr_bytes[5], \
581                 src_ip, \
582                 eth_h->d_addr.addr_bytes[0], \
583                 eth_h->d_addr.addr_bytes[1], \
584                 eth_h->d_addr.addr_bytes[2], \
585                 eth_h->d_addr.addr_bytes[3], \
586                 eth_h->d_addr.addr_bytes[4], \
587                 eth_h->d_addr.addr_bytes[5], \
588                 dst_ip, \
589                 arp_op, \
590                 ++burstnumber)
591 #endif
592
593 static void
594 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
595                 uint8_t port, uint32_t __attribute__((unused)) *burstnumber)
596 {
597         struct ipv4_hdr *ipv4_h;
598 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
599         struct arp_hdr *arp_h;
600         char dst_ip[16];
601         char ArpOp[24];
602         char buf[16];
603 #endif
604         char src_ip[16];
605
606         uint16_t ether_type = eth_h->ether_type;
607         uint16_t offset = get_vlan_offset(eth_h, &ether_type);
608
609 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
610         snprintf(buf, 16, "%s", info);
611 #endif
612
613         if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
614                 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
615                 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
616 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
617                 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
618                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
619 #endif
620                 update_client_stats(ipv4_h->src_addr, port, burstnumber);
621         }
622 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
623         else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
624                 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
625                 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
626                 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
627                 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
628                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
629         }
630 #endif
631 }
632 #endif
633
634 static uint16_t
635 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
636 {
637         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
638         struct bond_dev_private *internals = bd_tx_q->dev_private;
639         struct ether_hdr *eth_h;
640         uint16_t ether_type, offset;
641         uint16_t nb_recv_pkts;
642         int i;
643
644         nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
645
646         for (i = 0; i < nb_recv_pkts; i++) {
647                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
648                 ether_type = eth_h->ether_type;
649                 offset = get_vlan_offset(eth_h, &ether_type);
650
651                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
652 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
653                         mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
654 #endif
655                         bond_mode_alb_arp_recv(eth_h, offset, internals);
656                 }
657 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
658                 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
659                         mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
660 #endif
661         }
662
663         return nb_recv_pkts;
664 }
665
666 static uint16_t
667 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
668                 uint16_t nb_pkts)
669 {
670         struct bond_dev_private *internals;
671         struct bond_tx_queue *bd_tx_q;
672
673         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
674         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
675
676         uint8_t num_of_slaves;
677         uint8_t slaves[RTE_MAX_ETHPORTS];
678
679         uint16_t num_tx_total = 0, num_tx_slave;
680
681         static int slave_idx = 0;
682         int i, cslave_idx = 0, tx_fail_total = 0;
683
684         bd_tx_q = (struct bond_tx_queue *)queue;
685         internals = bd_tx_q->dev_private;
686
687         /* Copy slave list to protect against slave up/down changes during tx
688          * bursting */
689         num_of_slaves = internals->active_slave_count;
690         memcpy(slaves, internals->active_slaves,
691                         sizeof(internals->active_slaves[0]) * num_of_slaves);
692
693         if (num_of_slaves < 1)
694                 return num_tx_total;
695
696         /* Populate slaves mbuf with which packets are to be sent on it  */
697         for (i = 0; i < nb_pkts; i++) {
698                 cslave_idx = (slave_idx + i) % num_of_slaves;
699                 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
700         }
701
702         /* increment current slave index so the next call to tx burst starts on the
703          * next slave */
704         slave_idx = ++cslave_idx;
705
706         /* Send packet burst on each slave device */
707         for (i = 0; i < num_of_slaves; i++) {
708                 if (slave_nb_pkts[i] > 0) {
709                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
710                                         slave_bufs[i], slave_nb_pkts[i]);
711
712                         /* if tx burst fails move packets to end of bufs */
713                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
714                                 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
715
716                                 tx_fail_total += tx_fail_slave;
717
718                                 memcpy(&bufs[nb_pkts - tx_fail_total],
719                                                 &slave_bufs[i][num_tx_slave],
720                                                 tx_fail_slave * sizeof(bufs[0]));
721                         }
722                         num_tx_total += num_tx_slave;
723                 }
724         }
725
726         return num_tx_total;
727 }
728
729 static uint16_t
730 bond_ethdev_tx_burst_active_backup(void *queue,
731                 struct rte_mbuf **bufs, uint16_t nb_pkts)
732 {
733         struct bond_dev_private *internals;
734         struct bond_tx_queue *bd_tx_q;
735
736         bd_tx_q = (struct bond_tx_queue *)queue;
737         internals = bd_tx_q->dev_private;
738
739         if (internals->active_slave_count < 1)
740                 return 0;
741
742         return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
743                         bufs, nb_pkts);
744 }
745
746 static inline uint16_t
747 ether_hash(struct ether_hdr *eth_hdr)
748 {
749         unaligned_uint16_t *word_src_addr =
750                 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
751         unaligned_uint16_t *word_dst_addr =
752                 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
753
754         return (word_src_addr[0] ^ word_dst_addr[0]) ^
755                         (word_src_addr[1] ^ word_dst_addr[1]) ^
756                         (word_src_addr[2] ^ word_dst_addr[2]);
757 }
758
759 static inline uint32_t
760 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
761 {
762         return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
763 }
764
765 static inline uint32_t
766 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
767 {
768         unaligned_uint32_t *word_src_addr =
769                 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
770         unaligned_uint32_t *word_dst_addr =
771                 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
772
773         return (word_src_addr[0] ^ word_dst_addr[0]) ^
774                         (word_src_addr[1] ^ word_dst_addr[1]) ^
775                         (word_src_addr[2] ^ word_dst_addr[2]) ^
776                         (word_src_addr[3] ^ word_dst_addr[3]);
777 }
778
779 uint16_t
780 xmit_l2_hash(const struct rte_mbuf *buf, uint8_t slave_count)
781 {
782         struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
783
784         uint32_t hash = ether_hash(eth_hdr);
785
786         return (hash ^= hash >> 8) % slave_count;
787 }
788
789 uint16_t
790 xmit_l23_hash(const struct rte_mbuf *buf, uint8_t slave_count)
791 {
792         struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
793         uint16_t proto = eth_hdr->ether_type;
794         size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
795         uint32_t hash, l3hash = 0;
796
797         hash = ether_hash(eth_hdr);
798
799         if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
800                 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
801                                 ((char *)(eth_hdr + 1) + vlan_offset);
802                 l3hash = ipv4_hash(ipv4_hdr);
803
804         } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
805                 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
806                                 ((char *)(eth_hdr + 1) + vlan_offset);
807                 l3hash = ipv6_hash(ipv6_hdr);
808         }
809
810         hash = hash ^ l3hash;
811         hash ^= hash >> 16;
812         hash ^= hash >> 8;
813
814         return hash % slave_count;
815 }
816
817 uint16_t
818 xmit_l34_hash(const struct rte_mbuf *buf, uint8_t slave_count)
819 {
820         struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
821         uint16_t proto = eth_hdr->ether_type;
822         size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
823
824         struct udp_hdr *udp_hdr = NULL;
825         struct tcp_hdr *tcp_hdr = NULL;
826         uint32_t hash, l3hash = 0, l4hash = 0;
827
828         if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
829                 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
830                                 ((char *)(eth_hdr + 1) + vlan_offset);
831                 size_t ip_hdr_offset;
832
833                 l3hash = ipv4_hash(ipv4_hdr);
834
835                 /* there is no L4 header in fragmented packet */
836                 if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr) == 0)) {
837                         ip_hdr_offset = (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) *
838                                         IPV4_IHL_MULTIPLIER;
839
840                         if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
841                                 tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr +
842                                                 ip_hdr_offset);
843                                 l4hash = HASH_L4_PORTS(tcp_hdr);
844                         } else if (ipv4_hdr->next_proto_id == IPPROTO_UDP) {
845                                 udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr +
846                                                 ip_hdr_offset);
847                                 l4hash = HASH_L4_PORTS(udp_hdr);
848                         }
849                 }
850         } else if  (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
851                 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
852                                 ((char *)(eth_hdr + 1) + vlan_offset);
853                 l3hash = ipv6_hash(ipv6_hdr);
854
855                 if (ipv6_hdr->proto == IPPROTO_TCP) {
856                         tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
857                         l4hash = HASH_L4_PORTS(tcp_hdr);
858                 } else if (ipv6_hdr->proto == IPPROTO_UDP) {
859                         udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
860                         l4hash = HASH_L4_PORTS(udp_hdr);
861                 }
862         }
863
864         hash = l3hash ^ l4hash;
865         hash ^= hash >> 16;
866         hash ^= hash >> 8;
867
868         return hash % slave_count;
869 }
870
871 struct bwg_slave {
872         uint64_t bwg_left_int;
873         uint64_t bwg_left_remainder;
874         uint8_t slave;
875 };
876
877 void
878 bond_tlb_activate_slave(struct bond_dev_private *internals) {
879         int i;
880
881         for (i = 0; i < internals->active_slave_count; i++) {
882                 tlb_last_obytets[internals->active_slaves[i]] = 0;
883         }
884 }
885
886 static int
887 bandwidth_cmp(const void *a, const void *b)
888 {
889         const struct bwg_slave *bwg_a = a;
890         const struct bwg_slave *bwg_b = b;
891         int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
892         int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
893                         (int64_t)bwg_a->bwg_left_remainder;
894         if (diff > 0)
895                 return 1;
896         else if (diff < 0)
897                 return -1;
898         else if (diff2 > 0)
899                 return 1;
900         else if (diff2 < 0)
901                 return -1;
902         else
903                 return 0;
904 }
905
906 static void
907 bandwidth_left(uint8_t port_id, uint64_t load, uint8_t update_idx,
908                 struct bwg_slave *bwg_slave)
909 {
910         struct rte_eth_link link_status;
911
912         rte_eth_link_get_nowait(port_id, &link_status);
913         uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
914         if (link_bwg == 0)
915                 return;
916         link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
917         bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
918         bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
919 }
920
921 static void
922 bond_ethdev_update_tlb_slave_cb(void *arg)
923 {
924         struct bond_dev_private *internals = arg;
925         struct rte_eth_stats slave_stats;
926         struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
927         uint8_t slave_count;
928         uint64_t tx_bytes;
929
930         uint8_t update_stats = 0;
931         uint8_t i, slave_id;
932
933         internals->slave_update_idx++;
934
935
936         if (internals->slave_update_idx >= REORDER_PERIOD_MS)
937                 update_stats = 1;
938
939         for (i = 0; i < internals->active_slave_count; i++) {
940                 slave_id = internals->active_slaves[i];
941                 rte_eth_stats_get(slave_id, &slave_stats);
942                 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
943                 bandwidth_left(slave_id, tx_bytes,
944                                 internals->slave_update_idx, &bwg_array[i]);
945                 bwg_array[i].slave = slave_id;
946
947                 if (update_stats) {
948                         tlb_last_obytets[slave_id] = slave_stats.obytes;
949                 }
950         }
951
952         if (update_stats == 1)
953                 internals->slave_update_idx = 0;
954
955         slave_count = i;
956         qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
957         for (i = 0; i < slave_count; i++)
958                 internals->tlb_slaves_order[i] = bwg_array[i].slave;
959
960         rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
961                         (struct bond_dev_private *)internals);
962 }
963
964 static uint16_t
965 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
966 {
967         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
968         struct bond_dev_private *internals = bd_tx_q->dev_private;
969
970         struct rte_eth_dev *primary_port =
971                         &rte_eth_devices[internals->primary_port];
972         uint16_t num_tx_total = 0;
973         uint8_t i, j;
974
975         uint8_t num_of_slaves = internals->active_slave_count;
976         uint8_t slaves[RTE_MAX_ETHPORTS];
977
978         struct ether_hdr *ether_hdr;
979         struct ether_addr primary_slave_addr;
980         struct ether_addr active_slave_addr;
981
982         if (num_of_slaves < 1)
983                 return num_tx_total;
984
985         memcpy(slaves, internals->tlb_slaves_order,
986                                 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
987
988
989         ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
990
991         if (nb_pkts > 3) {
992                 for (i = 0; i < 3; i++)
993                         rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
994         }
995
996         for (i = 0; i < num_of_slaves; i++) {
997                 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
998                 for (j = num_tx_total; j < nb_pkts; j++) {
999                         if (j + 3 < nb_pkts)
1000                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
1001
1002                         ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
1003                         if (is_same_ether_addr(&ether_hdr->s_addr, &primary_slave_addr))
1004                                 ether_addr_copy(&active_slave_addr, &ether_hdr->s_addr);
1005 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1006                                         mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
1007 #endif
1008                 }
1009
1010                 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1011                                 bufs + num_tx_total, nb_pkts - num_tx_total);
1012
1013                 if (num_tx_total == nb_pkts)
1014                         break;
1015         }
1016
1017         return num_tx_total;
1018 }
1019
1020 void
1021 bond_tlb_disable(struct bond_dev_private *internals)
1022 {
1023         rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
1024 }
1025
1026 void
1027 bond_tlb_enable(struct bond_dev_private *internals)
1028 {
1029         bond_ethdev_update_tlb_slave_cb(internals);
1030 }
1031
1032 static uint16_t
1033 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1034 {
1035         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1036         struct bond_dev_private *internals = bd_tx_q->dev_private;
1037
1038         struct ether_hdr *eth_h;
1039         uint16_t ether_type, offset;
1040
1041         struct client_data *client_info;
1042
1043         /*
1044          * We create transmit buffers for every slave and one additional to send
1045          * through tlb. In worst case every packet will be send on one port.
1046          */
1047         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
1048         uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
1049
1050         /*
1051          * We create separate transmit buffers for update packets as they won't
1052          * be counted in num_tx_total.
1053          */
1054         struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1055         uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1056
1057         struct rte_mbuf *upd_pkt;
1058         size_t pkt_size;
1059
1060         uint16_t num_send, num_not_send = 0;
1061         uint16_t num_tx_total = 0;
1062         uint8_t slave_idx;
1063
1064         int i, j;
1065
1066         /* Search tx buffer for ARP packets and forward them to alb */
1067         for (i = 0; i < nb_pkts; i++) {
1068                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
1069                 ether_type = eth_h->ether_type;
1070                 offset = get_vlan_offset(eth_h, &ether_type);
1071
1072                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
1073                         slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1074
1075                         /* Change src mac in eth header */
1076                         rte_eth_macaddr_get(slave_idx, &eth_h->s_addr);
1077
1078                         /* Add packet to slave tx buffer */
1079                         slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1080                         slave_bufs_pkts[slave_idx]++;
1081                 } else {
1082                         /* If packet is not ARP, send it with TLB policy */
1083                         slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1084                                         bufs[i];
1085                         slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1086                 }
1087         }
1088
1089         /* Update connected client ARP tables */
1090         if (internals->mode6.ntt) {
1091                 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1092                         client_info = &internals->mode6.client_table[i];
1093
1094                         if (client_info->in_use) {
1095                                 /* Allocate new packet to send ARP update on current slave */
1096                                 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1097                                 if (upd_pkt == NULL) {
1098                                         RTE_LOG(ERR, PMD, "Failed to allocate ARP packet from pool\n");
1099                                         continue;
1100                                 }
1101                                 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
1102                                                 + client_info->vlan_count * sizeof(struct vlan_hdr);
1103                                 upd_pkt->data_len = pkt_size;
1104                                 upd_pkt->pkt_len = pkt_size;
1105
1106                                 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1107                                                 internals);
1108
1109                                 /* Add packet to update tx buffer */
1110                                 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1111                                 update_bufs_pkts[slave_idx]++;
1112                         }
1113                 }
1114                 internals->mode6.ntt = 0;
1115         }
1116
1117         /* Send ARP packets on proper slaves */
1118         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1119                 if (slave_bufs_pkts[i] > 0) {
1120                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1121                                         slave_bufs[i], slave_bufs_pkts[i]);
1122                         for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1123                                 bufs[nb_pkts - 1 - num_not_send - j] =
1124                                                 slave_bufs[i][nb_pkts - 1 - j];
1125                         }
1126
1127                         num_tx_total += num_send;
1128                         num_not_send += slave_bufs_pkts[i] - num_send;
1129
1130 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1131         /* Print TX stats including update packets */
1132                         for (j = 0; j < slave_bufs_pkts[i]; j++) {
1133                                 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
1134                                 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1135                         }
1136 #endif
1137                 }
1138         }
1139
1140         /* Send update packets on proper slaves */
1141         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1142                 if (update_bufs_pkts[i] > 0) {
1143                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1144                                         update_bufs_pkts[i]);
1145                         for (j = num_send; j < update_bufs_pkts[i]; j++) {
1146                                 rte_pktmbuf_free(update_bufs[i][j]);
1147                         }
1148 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1149                         for (j = 0; j < update_bufs_pkts[i]; j++) {
1150                                 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
1151                                 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1152                         }
1153 #endif
1154                 }
1155         }
1156
1157         /* Send non-ARP packets using tlb policy */
1158         if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1159                 num_send = bond_ethdev_tx_burst_tlb(queue,
1160                                 slave_bufs[RTE_MAX_ETHPORTS],
1161                                 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1162
1163                 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1164                         bufs[nb_pkts - 1 - num_not_send - j] =
1165                                         slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1166                 }
1167
1168                 num_tx_total += num_send;
1169         }
1170
1171         return num_tx_total;
1172 }
1173
1174 static uint16_t
1175 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1176                 uint16_t nb_pkts)
1177 {
1178         struct bond_dev_private *internals;
1179         struct bond_tx_queue *bd_tx_q;
1180
1181         uint8_t num_of_slaves;
1182         uint8_t slaves[RTE_MAX_ETHPORTS];
1183
1184         uint16_t num_tx_total = 0, num_tx_slave = 0, tx_fail_total = 0;
1185
1186         int i, op_slave_id;
1187
1188         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
1189         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
1190
1191         bd_tx_q = (struct bond_tx_queue *)queue;
1192         internals = bd_tx_q->dev_private;
1193
1194         /* Copy slave list to protect against slave up/down changes during tx
1195          * bursting */
1196         num_of_slaves = internals->active_slave_count;
1197         memcpy(slaves, internals->active_slaves,
1198                         sizeof(internals->active_slaves[0]) * num_of_slaves);
1199
1200         if (num_of_slaves < 1)
1201                 return num_tx_total;
1202
1203         /* Populate slaves mbuf with the packets which are to be sent on it  */
1204         for (i = 0; i < nb_pkts; i++) {
1205                 /* Select output slave using hash based on xmit policy */
1206                 op_slave_id = internals->xmit_hash(bufs[i], num_of_slaves);
1207
1208                 /* Populate slave mbuf arrays with mbufs for that slave */
1209                 slave_bufs[op_slave_id][slave_nb_pkts[op_slave_id]++] = bufs[i];
1210         }
1211
1212         /* Send packet burst on each slave device */
1213         for (i = 0; i < num_of_slaves; i++) {
1214                 if (slave_nb_pkts[i] > 0) {
1215                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1216                                         slave_bufs[i], slave_nb_pkts[i]);
1217
1218                         /* if tx burst fails move packets to end of bufs */
1219                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
1220                                 int slave_tx_fail_count = slave_nb_pkts[i] - num_tx_slave;
1221
1222                                 tx_fail_total += slave_tx_fail_count;
1223                                 memcpy(&bufs[nb_pkts - tx_fail_total],
1224                                                 &slave_bufs[i][num_tx_slave],
1225                                                 slave_tx_fail_count * sizeof(bufs[0]));
1226                         }
1227
1228                         num_tx_total += num_tx_slave;
1229                 }
1230         }
1231
1232         return num_tx_total;
1233 }
1234
1235 static uint16_t
1236 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1237                 uint16_t nb_pkts)
1238 {
1239         struct bond_dev_private *internals;
1240         struct bond_tx_queue *bd_tx_q;
1241
1242         uint8_t num_of_slaves;
1243         uint8_t slaves[RTE_MAX_ETHPORTS];
1244          /* positions in slaves, not ID */
1245         uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
1246         uint8_t distributing_count;
1247
1248         uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
1249         uint16_t i, j, op_slave_idx;
1250         const uint16_t buffs_size = nb_pkts + BOND_MODE_8023AX_SLAVE_TX_PKTS + 1;
1251
1252         /* Allocate additional packets in case 8023AD mode. */
1253         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][buffs_size];
1254         void *slow_pkts[BOND_MODE_8023AX_SLAVE_TX_PKTS] = { NULL };
1255
1256         /* Total amount of packets in slave_bufs */
1257         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
1258         /* Slow packets placed in each slave */
1259         uint8_t slave_slow_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
1260
1261         bd_tx_q = (struct bond_tx_queue *)queue;
1262         internals = bd_tx_q->dev_private;
1263
1264         /* Copy slave list to protect against slave up/down changes during tx
1265          * bursting */
1266         num_of_slaves = internals->active_slave_count;
1267         if (num_of_slaves < 1)
1268                 return num_tx_total;
1269
1270         memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) * num_of_slaves);
1271
1272         distributing_count = 0;
1273         for (i = 0; i < num_of_slaves; i++) {
1274                 struct port *port = &mode_8023ad_ports[slaves[i]];
1275
1276                 slave_slow_nb_pkts[i] = rte_ring_dequeue_burst(port->tx_ring,
1277                                 slow_pkts, BOND_MODE_8023AX_SLAVE_TX_PKTS,
1278                                 NULL);
1279                 slave_nb_pkts[i] = slave_slow_nb_pkts[i];
1280
1281                 for (j = 0; j < slave_slow_nb_pkts[i]; j++)
1282                         slave_bufs[i][j] = slow_pkts[j];
1283
1284                 if (ACTOR_STATE(port, DISTRIBUTING))
1285                         distributing_offsets[distributing_count++] = i;
1286         }
1287
1288         if (likely(distributing_count > 0)) {
1289                 /* Populate slaves mbuf with the packets which are to be sent on it */
1290                 for (i = 0; i < nb_pkts; i++) {
1291                         /* Select output slave using hash based on xmit policy */
1292                         op_slave_idx = internals->xmit_hash(bufs[i], distributing_count);
1293
1294                         /* Populate slave mbuf arrays with mbufs for that slave. Use only
1295                          * slaves that are currently distributing. */
1296                         uint8_t slave_offset = distributing_offsets[op_slave_idx];
1297                         slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] = bufs[i];
1298                         slave_nb_pkts[slave_offset]++;
1299                 }
1300         }
1301
1302         /* Send packet burst on each slave device */
1303         for (i = 0; i < num_of_slaves; i++) {
1304                 if (slave_nb_pkts[i] == 0)
1305                         continue;
1306
1307                 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1308                                 slave_bufs[i], slave_nb_pkts[i]);
1309
1310                 /* If tx burst fails drop slow packets */
1311                 for ( ; num_tx_slave < slave_slow_nb_pkts[i]; num_tx_slave++)
1312                         rte_pktmbuf_free(slave_bufs[i][num_tx_slave]);
1313
1314                 num_tx_total += num_tx_slave - slave_slow_nb_pkts[i];
1315                 num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
1316
1317                 /* If tx burst fails move packets to end of bufs */
1318                 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
1319                         uint16_t j = nb_pkts - num_tx_fail_total;
1320                         for ( ; num_tx_slave < slave_nb_pkts[i]; j++, num_tx_slave++)
1321                                 bufs[j] = slave_bufs[i][num_tx_slave];
1322                 }
1323         }
1324
1325         return num_tx_total;
1326 }
1327
1328 static uint16_t
1329 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1330                 uint16_t nb_pkts)
1331 {
1332         struct bond_dev_private *internals;
1333         struct bond_tx_queue *bd_tx_q;
1334
1335         uint8_t tx_failed_flag = 0, num_of_slaves;
1336         uint8_t slaves[RTE_MAX_ETHPORTS];
1337
1338         uint16_t max_nb_of_tx_pkts = 0;
1339
1340         int slave_tx_total[RTE_MAX_ETHPORTS];
1341         int i, most_successful_tx_slave = -1;
1342
1343         bd_tx_q = (struct bond_tx_queue *)queue;
1344         internals = bd_tx_q->dev_private;
1345
1346         /* Copy slave list to protect against slave up/down changes during tx
1347          * bursting */
1348         num_of_slaves = internals->active_slave_count;
1349         memcpy(slaves, internals->active_slaves,
1350                         sizeof(internals->active_slaves[0]) * num_of_slaves);
1351
1352         if (num_of_slaves < 1)
1353                 return 0;
1354
1355         /* Increment reference count on mbufs */
1356         for (i = 0; i < nb_pkts; i++)
1357                 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1358
1359         /* Transmit burst on each active slave */
1360         for (i = 0; i < num_of_slaves; i++) {
1361                 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1362                                         bufs, nb_pkts);
1363
1364                 if (unlikely(slave_tx_total[i] < nb_pkts))
1365                         tx_failed_flag = 1;
1366
1367                 /* record the value and slave index for the slave which transmits the
1368                  * maximum number of packets */
1369                 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1370                         max_nb_of_tx_pkts = slave_tx_total[i];
1371                         most_successful_tx_slave = i;
1372                 }
1373         }
1374
1375         /* if slaves fail to transmit packets from burst, the calling application
1376          * is not expected to know about multiple references to packets so we must
1377          * handle failures of all packets except those of the most successful slave
1378          */
1379         if (unlikely(tx_failed_flag))
1380                 for (i = 0; i < num_of_slaves; i++)
1381                         if (i != most_successful_tx_slave)
1382                                 while (slave_tx_total[i] < nb_pkts)
1383                                         rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1384
1385         return max_nb_of_tx_pkts;
1386 }
1387
1388 void
1389 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1390 {
1391         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1392
1393         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1394                 /**
1395                  * If in mode 4 then save the link properties of the first
1396                  * slave, all subsequent slaves must match these properties
1397                  */
1398                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1399
1400                 bond_link->link_autoneg = slave_link->link_autoneg;
1401                 bond_link->link_duplex = slave_link->link_duplex;
1402                 bond_link->link_speed = slave_link->link_speed;
1403         } else {
1404                 /**
1405                  * In any other mode the link properties are set to default
1406                  * values of AUTONEG/DUPLEX
1407                  */
1408                 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1409                 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1410         }
1411 }
1412
1413 int
1414 link_properties_valid(struct rte_eth_dev *ethdev,
1415                 struct rte_eth_link *slave_link)
1416 {
1417         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1418
1419         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1420                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1421
1422                 if (bond_link->link_duplex != slave_link->link_duplex ||
1423                         bond_link->link_autoneg != slave_link->link_autoneg ||
1424                         bond_link->link_speed != slave_link->link_speed)
1425                         return -1;
1426         }
1427
1428         return 0;
1429 }
1430
1431 int
1432 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1433 {
1434         struct ether_addr *mac_addr;
1435
1436         if (eth_dev == NULL) {
1437                 RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__);
1438                 return -1;
1439         }
1440
1441         if (dst_mac_addr == NULL) {
1442                 RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__);
1443                 return -1;
1444         }
1445
1446         mac_addr = eth_dev->data->mac_addrs;
1447
1448         ether_addr_copy(mac_addr, dst_mac_addr);
1449         return 0;
1450 }
1451
1452 int
1453 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1454 {
1455         struct ether_addr *mac_addr;
1456
1457         if (eth_dev == NULL) {
1458                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1459                 return -1;
1460         }
1461
1462         if (new_mac_addr == NULL) {
1463                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1464                 return -1;
1465         }
1466
1467         mac_addr = eth_dev->data->mac_addrs;
1468
1469         /* If new MAC is different to current MAC then update */
1470         if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1471                 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1472
1473         return 0;
1474 }
1475
1476 int
1477 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1478 {
1479         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1480         int i;
1481
1482         /* Update slave devices MAC addresses */
1483         if (internals->slave_count < 1)
1484                 return -1;
1485
1486         switch (internals->mode) {
1487         case BONDING_MODE_ROUND_ROBIN:
1488         case BONDING_MODE_BALANCE:
1489         case BONDING_MODE_BROADCAST:
1490                 for (i = 0; i < internals->slave_count; i++) {
1491                         if (mac_address_set(&rte_eth_devices[internals->slaves[i].port_id],
1492                                         bonded_eth_dev->data->mac_addrs)) {
1493                                 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1494                                                 internals->slaves[i].port_id);
1495                                 return -1;
1496                         }
1497                 }
1498                 break;
1499         case BONDING_MODE_8023AD:
1500                 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1501                 break;
1502         case BONDING_MODE_ACTIVE_BACKUP:
1503         case BONDING_MODE_TLB:
1504         case BONDING_MODE_ALB:
1505         default:
1506                 for (i = 0; i < internals->slave_count; i++) {
1507                         if (internals->slaves[i].port_id ==
1508                                         internals->current_primary_port) {
1509                                 if (mac_address_set(&rte_eth_devices[internals->primary_port],
1510                                                 bonded_eth_dev->data->mac_addrs)) {
1511                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1512                                                         internals->current_primary_port);
1513                                         return -1;
1514                                 }
1515                         } else {
1516                                 if (mac_address_set(
1517                                                 &rte_eth_devices[internals->slaves[i].port_id],
1518                                                 &internals->slaves[i].persisted_mac_addr)) {
1519                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1520                                                         internals->slaves[i].port_id);
1521                                         return -1;
1522                                 }
1523                         }
1524                 }
1525         }
1526
1527         return 0;
1528 }
1529
1530 int
1531 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1532 {
1533         struct bond_dev_private *internals;
1534
1535         internals = eth_dev->data->dev_private;
1536
1537         switch (mode) {
1538         case BONDING_MODE_ROUND_ROBIN:
1539                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1540                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1541                 break;
1542         case BONDING_MODE_ACTIVE_BACKUP:
1543                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1544                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1545                 break;
1546         case BONDING_MODE_BALANCE:
1547                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1548                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1549                 break;
1550         case BONDING_MODE_BROADCAST:
1551                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1552                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1553                 break;
1554         case BONDING_MODE_8023AD:
1555                 if (bond_mode_8023ad_enable(eth_dev) != 0)
1556                         return -1;
1557
1558                 if (internals->mode4.dedicated_queues.enabled == 0) {
1559                         eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1560                         eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1561                         RTE_LOG(WARNING, PMD,
1562                                 "Using mode 4, it is necessary to do TX burst "
1563                                 "and RX burst at least every 100ms.\n");
1564                 } else {
1565                         /* Use flow director's optimization */
1566                         eth_dev->rx_pkt_burst =
1567                                         bond_ethdev_rx_burst_8023ad_fast_queue;
1568                         eth_dev->tx_pkt_burst =
1569                                         bond_ethdev_tx_burst_8023ad_fast_queue;
1570                 }
1571                 break;
1572         case BONDING_MODE_TLB:
1573                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1574                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1575                 break;
1576         case BONDING_MODE_ALB:
1577                 if (bond_mode_alb_enable(eth_dev) != 0)
1578                         return -1;
1579
1580                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1581                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1582                 break;
1583         default:
1584                 return -1;
1585         }
1586
1587         internals->mode = mode;
1588
1589         return 0;
1590 }
1591
1592
1593 static int
1594 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1595                 struct rte_eth_dev *slave_eth_dev)
1596 {
1597         int errval = 0;
1598         struct bond_dev_private *internals = (struct bond_dev_private *)
1599                 bonded_eth_dev->data->dev_private;
1600         struct port *port = &mode_8023ad_ports[slave_eth_dev->data->port_id];
1601
1602         if (port->slow_pool == NULL) {
1603                 char mem_name[256];
1604                 int slave_id = slave_eth_dev->data->port_id;
1605
1606                 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1607                                 slave_id);
1608                 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1609                         250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1610                         slave_eth_dev->data->numa_node);
1611
1612                 /* Any memory allocation failure in initialization is critical because
1613                  * resources can't be free, so reinitialization is impossible. */
1614                 if (port->slow_pool == NULL) {
1615                         rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1616                                 slave_id, mem_name, rte_strerror(rte_errno));
1617                 }
1618         }
1619
1620         if (internals->mode4.dedicated_queues.enabled == 1) {
1621                 /* Configure slow Rx queue */
1622
1623                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1624                                 internals->mode4.dedicated_queues.rx_qid, 128,
1625                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1626                                 NULL, port->slow_pool);
1627                 if (errval != 0) {
1628                         RTE_BOND_LOG(ERR,
1629                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1630                                         slave_eth_dev->data->port_id,
1631                                         internals->mode4.dedicated_queues.rx_qid,
1632                                         errval);
1633                         return errval;
1634                 }
1635
1636                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1637                                 internals->mode4.dedicated_queues.tx_qid, 512,
1638                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1639                                 NULL);
1640                 if (errval != 0) {
1641                         RTE_BOND_LOG(ERR,
1642                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1643                                 slave_eth_dev->data->port_id,
1644                                 internals->mode4.dedicated_queues.tx_qid,
1645                                 errval);
1646                         return errval;
1647                 }
1648         }
1649         return 0;
1650 }
1651
1652 int
1653 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1654                 struct rte_eth_dev *slave_eth_dev)
1655 {
1656         struct bond_rx_queue *bd_rx_q;
1657         struct bond_tx_queue *bd_tx_q;
1658         uint16_t nb_rx_queues;
1659         uint16_t nb_tx_queues;
1660
1661         int errval;
1662         uint16_t q_id;
1663         struct rte_flow_error flow_error;
1664
1665         struct bond_dev_private *internals = (struct bond_dev_private *)
1666                 bonded_eth_dev->data->dev_private;
1667
1668         /* Stop slave */
1669         rte_eth_dev_stop(slave_eth_dev->data->port_id);
1670
1671         /* Enable interrupts on slave device if supported */
1672         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1673                 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1674
1675         /* If RSS is enabled for bonding, try to enable it for slaves  */
1676         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1677                 if (bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len
1678                                 != 0) {
1679                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1680                                         bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
1681                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1682                                         bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
1683                 } else {
1684                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1685                 }
1686
1687                 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1688                                 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1689                 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1690                                 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1691         }
1692
1693         slave_eth_dev->data->dev_conf.rxmode.hw_vlan_filter =
1694                         bonded_eth_dev->data->dev_conf.rxmode.hw_vlan_filter;
1695
1696         nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1697         nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1698
1699         if (internals->mode == BONDING_MODE_8023AD) {
1700                 if (internals->mode4.dedicated_queues.enabled == 1) {
1701                         nb_rx_queues++;
1702                         nb_tx_queues++;
1703                 }
1704         }
1705
1706         /* Configure device */
1707         errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1708                         nb_rx_queues, nb_tx_queues,
1709                         &(slave_eth_dev->data->dev_conf));
1710         if (errval != 0) {
1711                 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u , err (%d)",
1712                                 slave_eth_dev->data->port_id, errval);
1713                 return errval;
1714         }
1715
1716         /* Setup Rx Queues */
1717         for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1718                 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1719
1720                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1721                                 bd_rx_q->nb_rx_desc,
1722                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1723                                 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1724                 if (errval != 0) {
1725                         RTE_BOND_LOG(ERR,
1726                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1727                                         slave_eth_dev->data->port_id, q_id, errval);
1728                         return errval;
1729                 }
1730         }
1731
1732         /* Setup Tx Queues */
1733         for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1734                 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1735
1736                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1737                                 bd_tx_q->nb_tx_desc,
1738                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1739                                 &bd_tx_q->tx_conf);
1740                 if (errval != 0) {
1741                         RTE_BOND_LOG(ERR,
1742                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1743                                 slave_eth_dev->data->port_id, q_id, errval);
1744                         return errval;
1745                 }
1746         }
1747
1748         if (internals->mode == BONDING_MODE_8023AD &&
1749                         internals->mode4.dedicated_queues.enabled == 1) {
1750                 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1751                                 != 0)
1752                         return errval;
1753
1754                 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1755                                 slave_eth_dev->data->port_id) != 0) {
1756                         RTE_BOND_LOG(ERR,
1757                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1758                                 slave_eth_dev->data->port_id, q_id, errval);
1759                         return -1;
1760                 }
1761
1762                 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1763                         rte_flow_destroy(slave_eth_dev->data->port_id,
1764                                         internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1765                                         &flow_error);
1766
1767                 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1768                                 slave_eth_dev->data->port_id);
1769         }
1770
1771         /* Start device */
1772         errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1773         if (errval != 0) {
1774                 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1775                                 slave_eth_dev->data->port_id, errval);
1776                 return -1;
1777         }
1778
1779         /* If RSS is enabled for bonding, synchronize RETA */
1780         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1781                 int i;
1782                 struct bond_dev_private *internals;
1783
1784                 internals = bonded_eth_dev->data->dev_private;
1785
1786                 for (i = 0; i < internals->slave_count; i++) {
1787                         if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1788                                 errval = rte_eth_dev_rss_reta_update(
1789                                                 slave_eth_dev->data->port_id,
1790                                                 &internals->reta_conf[0],
1791                                                 internals->slaves[i].reta_size);
1792                                 if (errval != 0) {
1793                                         RTE_LOG(WARNING, PMD,
1794                                                         "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1795                                                         " RSS Configuration for bonding may be inconsistent.\n",
1796                                                         slave_eth_dev->data->port_id, errval);
1797                                 }
1798                                 break;
1799                         }
1800                 }
1801         }
1802
1803         /* If lsc interrupt is set, check initial slave's link status */
1804         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1805                 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1806                 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1807                         RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1808                         NULL);
1809         }
1810
1811         return 0;
1812 }
1813
1814 void
1815 slave_remove(struct bond_dev_private *internals,
1816                 struct rte_eth_dev *slave_eth_dev)
1817 {
1818         uint8_t i;
1819
1820         for (i = 0; i < internals->slave_count; i++)
1821                 if (internals->slaves[i].port_id ==
1822                                 slave_eth_dev->data->port_id)
1823                         break;
1824
1825         if (i < (internals->slave_count - 1))
1826                 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1827                                 sizeof(internals->slaves[0]) *
1828                                 (internals->slave_count - i - 1));
1829
1830         internals->slave_count--;
1831
1832         /* force reconfiguration of slave interfaces */
1833         _rte_eth_dev_reset(slave_eth_dev);
1834 }
1835
1836 static void
1837 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1838
1839 void
1840 slave_add(struct bond_dev_private *internals,
1841                 struct rte_eth_dev *slave_eth_dev)
1842 {
1843         struct bond_slave_details *slave_details =
1844                         &internals->slaves[internals->slave_count];
1845
1846         slave_details->port_id = slave_eth_dev->data->port_id;
1847         slave_details->last_link_status = 0;
1848
1849         /* Mark slave devices that don't support interrupts so we can
1850          * compensate when we start the bond
1851          */
1852         if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1853                 slave_details->link_status_poll_enabled = 1;
1854         }
1855
1856         slave_details->link_status_wait_to_complete = 0;
1857         /* clean tlb_last_obytes when adding port for bonding device */
1858         memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1859                         sizeof(struct ether_addr));
1860 }
1861
1862 void
1863 bond_ethdev_primary_set(struct bond_dev_private *internals,
1864                 uint8_t slave_port_id)
1865 {
1866         int i;
1867
1868         if (internals->active_slave_count < 1)
1869                 internals->current_primary_port = slave_port_id;
1870         else
1871                 /* Search bonded device slave ports for new proposed primary port */
1872                 for (i = 0; i < internals->active_slave_count; i++) {
1873                         if (internals->active_slaves[i] == slave_port_id)
1874                                 internals->current_primary_port = slave_port_id;
1875                 }
1876 }
1877
1878 static void
1879 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
1880
1881 static int
1882 bond_ethdev_start(struct rte_eth_dev *eth_dev)
1883 {
1884         struct bond_dev_private *internals;
1885         int i;
1886
1887         /* slave eth dev will be started by bonded device */
1888         if (check_for_bonded_ethdev(eth_dev)) {
1889                 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
1890                                 eth_dev->data->port_id);
1891                 return -1;
1892         }
1893
1894         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
1895         eth_dev->data->dev_started = 1;
1896
1897         internals = eth_dev->data->dev_private;
1898
1899         if (internals->slave_count == 0) {
1900                 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
1901                 return -1;
1902         }
1903
1904         if (internals->user_defined_mac == 0) {
1905                 struct ether_addr *new_mac_addr = NULL;
1906
1907                 for (i = 0; i < internals->slave_count; i++)
1908                         if (internals->slaves[i].port_id == internals->primary_port)
1909                                 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
1910
1911                 if (new_mac_addr == NULL)
1912                         return -1;
1913
1914                 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
1915                         RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
1916                                         eth_dev->data->port_id);
1917                         return -1;
1918                 }
1919         }
1920
1921         /* Update all slave devices MACs*/
1922         if (mac_address_slaves_update(eth_dev) != 0)
1923                 return -1;
1924
1925         /* If bonded device is configure in promiscuous mode then re-apply config */
1926         if (internals->promiscuous_en)
1927                 bond_ethdev_promiscuous_enable(eth_dev);
1928
1929         if (internals->mode == BONDING_MODE_8023AD) {
1930                 if (internals->mode4.dedicated_queues.enabled == 1) {
1931                         internals->mode4.dedicated_queues.rx_qid =
1932                                         eth_dev->data->nb_rx_queues;
1933                         internals->mode4.dedicated_queues.tx_qid =
1934                                         eth_dev->data->nb_tx_queues;
1935                 }
1936         }
1937
1938
1939         /* Reconfigure each slave device if starting bonded device */
1940         for (i = 0; i < internals->slave_count; i++) {
1941                 struct rte_eth_dev *slave_ethdev =
1942                                 &(rte_eth_devices[internals->slaves[i].port_id]);
1943                 if (slave_configure(eth_dev, slave_ethdev) != 0) {
1944                         RTE_BOND_LOG(ERR,
1945                                 "bonded port (%d) failed to reconfigure slave device (%d)",
1946                                 eth_dev->data->port_id,
1947                                 internals->slaves[i].port_id);
1948                         return -1;
1949                 }
1950                 /* We will need to poll for link status if any slave doesn't
1951                  * support interrupts
1952                  */
1953                 if (internals->slaves[i].link_status_poll_enabled)
1954                         internals->link_status_polling_enabled = 1;
1955         }
1956         /* start polling if needed */
1957         if (internals->link_status_polling_enabled) {
1958                 rte_eal_alarm_set(
1959                         internals->link_status_polling_interval_ms * 1000,
1960                         bond_ethdev_slave_link_status_change_monitor,
1961                         (void *)&rte_eth_devices[internals->port_id]);
1962         }
1963
1964         if (internals->user_defined_primary_port)
1965                 bond_ethdev_primary_set(internals, internals->primary_port);
1966
1967         if (internals->mode == BONDING_MODE_8023AD)
1968                 bond_mode_8023ad_start(eth_dev);
1969
1970         if (internals->mode == BONDING_MODE_TLB ||
1971                         internals->mode == BONDING_MODE_ALB)
1972                 bond_tlb_enable(internals);
1973
1974         return 0;
1975 }
1976
1977 static void
1978 bond_ethdev_free_queues(struct rte_eth_dev *dev)
1979 {
1980         uint8_t i;
1981
1982         if (dev->data->rx_queues != NULL) {
1983                 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1984                         rte_free(dev->data->rx_queues[i]);
1985                         dev->data->rx_queues[i] = NULL;
1986                 }
1987                 dev->data->nb_rx_queues = 0;
1988         }
1989
1990         if (dev->data->tx_queues != NULL) {
1991                 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1992                         rte_free(dev->data->tx_queues[i]);
1993                         dev->data->tx_queues[i] = NULL;
1994                 }
1995                 dev->data->nb_tx_queues = 0;
1996         }
1997 }
1998
1999 void
2000 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2001 {
2002         struct bond_dev_private *internals = eth_dev->data->dev_private;
2003         uint8_t i;
2004
2005         if (internals->mode == BONDING_MODE_8023AD) {
2006                 struct port *port;
2007                 void *pkt = NULL;
2008
2009                 bond_mode_8023ad_stop(eth_dev);
2010
2011                 /* Discard all messages to/from mode 4 state machines */
2012                 for (i = 0; i < internals->active_slave_count; i++) {
2013                         port = &mode_8023ad_ports[internals->active_slaves[i]];
2014
2015                         RTE_ASSERT(port->rx_ring != NULL);
2016                         while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2017                                 rte_pktmbuf_free(pkt);
2018
2019                         RTE_ASSERT(port->tx_ring != NULL);
2020                         while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2021                                 rte_pktmbuf_free(pkt);
2022                 }
2023         }
2024
2025         if (internals->mode == BONDING_MODE_TLB ||
2026                         internals->mode == BONDING_MODE_ALB) {
2027                 bond_tlb_disable(internals);
2028                 for (i = 0; i < internals->active_slave_count; i++)
2029                         tlb_last_obytets[internals->active_slaves[i]] = 0;
2030         }
2031
2032         internals->active_slave_count = 0;
2033         internals->link_status_polling_enabled = 0;
2034         for (i = 0; i < internals->slave_count; i++)
2035                 internals->slaves[i].last_link_status = 0;
2036
2037         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2038         eth_dev->data->dev_started = 0;
2039 }
2040
2041 void
2042 bond_ethdev_close(struct rte_eth_dev *dev)
2043 {
2044         struct bond_dev_private *internals = dev->data->dev_private;
2045         uint8_t bond_port_id = internals->port_id;
2046         int skipped = 0;
2047
2048         RTE_LOG(INFO, EAL, "Closing bonded device %s\n", dev->device->name);
2049         while (internals->slave_count != skipped) {
2050                 uint8_t port_id = internals->slaves[skipped].port_id;
2051
2052                 rte_eth_dev_stop(port_id);
2053
2054                 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2055                         RTE_LOG(ERR, EAL,
2056                                 "Failed to remove port %d from bonded device "
2057                                 "%s\n", port_id, dev->device->name);
2058                         skipped++;
2059                 }
2060         }
2061         bond_ethdev_free_queues(dev);
2062         rte_bitmap_reset(internals->vlan_filter_bmp);
2063 }
2064
2065 /* forward declaration */
2066 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2067
2068 static void
2069 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2070 {
2071         struct bond_dev_private *internals = dev->data->dev_private;
2072
2073         uint16_t max_nb_rx_queues = UINT16_MAX;
2074         uint16_t max_nb_tx_queues = UINT16_MAX;
2075
2076         dev_info->max_mac_addrs = 1;
2077
2078         dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2079                         internals->candidate_max_rx_pktlen :
2080                         ETHER_MAX_JUMBO_FRAME_LEN;
2081
2082         /* Max number of tx/rx queues that the bonded device can support is the
2083          * minimum values of the bonded slaves, as all slaves must be capable
2084          * of supporting the same number of tx/rx queues.
2085          */
2086         if (internals->slave_count > 0) {
2087                 struct rte_eth_dev_info slave_info;
2088                 uint8_t idx;
2089
2090                 for (idx = 0; idx < internals->slave_count; idx++) {
2091                         rte_eth_dev_info_get(internals->slaves[idx].port_id,
2092                                         &slave_info);
2093
2094                         if (slave_info.max_rx_queues < max_nb_rx_queues)
2095                                 max_nb_rx_queues = slave_info.max_rx_queues;
2096
2097                         if (slave_info.max_tx_queues < max_nb_tx_queues)
2098                                 max_nb_tx_queues = slave_info.max_tx_queues;
2099                 }
2100         }
2101
2102         dev_info->max_rx_queues = max_nb_rx_queues;
2103         dev_info->max_tx_queues = max_nb_tx_queues;
2104
2105         /**
2106          * If dedicated hw queues enabled for link bonding device in LACP mode
2107          * then we need to reduce the maximum number of data path queues by 1.
2108          */
2109         if (internals->mode == BONDING_MODE_8023AD &&
2110                 internals->mode4.dedicated_queues.enabled == 1) {
2111                 dev_info->max_rx_queues--;
2112                 dev_info->max_tx_queues--;
2113         }
2114
2115         dev_info->min_rx_bufsize = 0;
2116
2117         dev_info->rx_offload_capa = internals->rx_offload_capa;
2118         dev_info->tx_offload_capa = internals->tx_offload_capa;
2119         dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2120
2121         dev_info->reta_size = internals->reta_size;
2122 }
2123
2124 static int
2125 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2126 {
2127         int res;
2128         uint8_t i;
2129         struct bond_dev_private *internals = dev->data->dev_private;
2130
2131         /* don't do this while a slave is being added */
2132         rte_spinlock_lock(&internals->lock);
2133
2134         if (on)
2135                 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2136         else
2137                 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2138
2139         for (i = 0; i < internals->slave_count; i++) {
2140                 uint8_t port_id = internals->slaves[i].port_id;
2141
2142                 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2143                 if (res == ENOTSUP)
2144                         RTE_LOG(WARNING, PMD,
2145                                 "Setting VLAN filter on slave port %u not supported.\n",
2146                                 port_id);
2147         }
2148
2149         rte_spinlock_unlock(&internals->lock);
2150         return 0;
2151 }
2152
2153 static int
2154 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2155                 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2156                 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2157 {
2158         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2159                         rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2160                                         0, dev->data->numa_node);
2161         if (bd_rx_q == NULL)
2162                 return -1;
2163
2164         bd_rx_q->queue_id = rx_queue_id;
2165         bd_rx_q->dev_private = dev->data->dev_private;
2166
2167         bd_rx_q->nb_rx_desc = nb_rx_desc;
2168
2169         memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2170         bd_rx_q->mb_pool = mb_pool;
2171
2172         dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2173
2174         return 0;
2175 }
2176
2177 static int
2178 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2179                 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2180                 const struct rte_eth_txconf *tx_conf)
2181 {
2182         struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
2183                         rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2184                                         0, dev->data->numa_node);
2185
2186         if (bd_tx_q == NULL)
2187                 return -1;
2188
2189         bd_tx_q->queue_id = tx_queue_id;
2190         bd_tx_q->dev_private = dev->data->dev_private;
2191
2192         bd_tx_q->nb_tx_desc = nb_tx_desc;
2193         memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2194
2195         dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2196
2197         return 0;
2198 }
2199
2200 static void
2201 bond_ethdev_rx_queue_release(void *queue)
2202 {
2203         if (queue == NULL)
2204                 return;
2205
2206         rte_free(queue);
2207 }
2208
2209 static void
2210 bond_ethdev_tx_queue_release(void *queue)
2211 {
2212         if (queue == NULL)
2213                 return;
2214
2215         rte_free(queue);
2216 }
2217
2218 static void
2219 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2220 {
2221         struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2222         struct bond_dev_private *internals;
2223
2224         /* Default value for polling slave found is true as we don't want to
2225          * disable the polling thread if we cannot get the lock */
2226         int i, polling_slave_found = 1;
2227
2228         if (cb_arg == NULL)
2229                 return;
2230
2231         bonded_ethdev = (struct rte_eth_dev *)cb_arg;
2232         internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
2233
2234         if (!bonded_ethdev->data->dev_started ||
2235                 !internals->link_status_polling_enabled)
2236                 return;
2237
2238         /* If device is currently being configured then don't check slaves link
2239          * status, wait until next period */
2240         if (rte_spinlock_trylock(&internals->lock)) {
2241                 if (internals->slave_count > 0)
2242                         polling_slave_found = 0;
2243
2244                 for (i = 0; i < internals->slave_count; i++) {
2245                         if (!internals->slaves[i].link_status_poll_enabled)
2246                                 continue;
2247
2248                         slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2249                         polling_slave_found = 1;
2250
2251                         /* Update slave link status */
2252                         (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2253                                         internals->slaves[i].link_status_wait_to_complete);
2254
2255                         /* if link status has changed since last checked then call lsc
2256                          * event callback */
2257                         if (slave_ethdev->data->dev_link.link_status !=
2258                                         internals->slaves[i].last_link_status) {
2259                                 internals->slaves[i].last_link_status =
2260                                                 slave_ethdev->data->dev_link.link_status;
2261
2262                                 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2263                                                 RTE_ETH_EVENT_INTR_LSC,
2264                                                 &bonded_ethdev->data->port_id,
2265                                                 NULL);
2266                         }
2267                 }
2268                 rte_spinlock_unlock(&internals->lock);
2269         }
2270
2271         if (polling_slave_found)
2272                 /* Set alarm to continue monitoring link status of slave ethdev's */
2273                 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2274                                 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2275 }
2276
2277 static int
2278 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2279 {
2280         void (*link_update)(uint8_t port_id, struct rte_eth_link *eth_link);
2281
2282         struct bond_dev_private *bond_ctx;
2283         struct rte_eth_link slave_link;
2284
2285         uint32_t idx;
2286
2287         bond_ctx = ethdev->data->dev_private;
2288
2289         ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2290
2291         if (ethdev->data->dev_started == 0 ||
2292                         bond_ctx->active_slave_count == 0) {
2293                 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2294                 return 0;
2295         }
2296
2297         ethdev->data->dev_link.link_status = ETH_LINK_UP;
2298
2299         if (wait_to_complete)
2300                 link_update = rte_eth_link_get;
2301         else
2302                 link_update = rte_eth_link_get_nowait;
2303
2304         switch (bond_ctx->mode) {
2305         case BONDING_MODE_BROADCAST:
2306                 /**
2307                  * Setting link speed to UINT32_MAX to ensure we pick up the
2308                  * value of the first active slave
2309                  */
2310                 ethdev->data->dev_link.link_speed = UINT32_MAX;
2311
2312                 /**
2313                  * link speed is minimum value of all the slaves link speed as
2314                  * packet loss will occur on this slave if transmission at rates
2315                  * greater than this are attempted
2316                  */
2317                 for (idx = 1; idx < bond_ctx->active_slave_count; idx++) {
2318                         link_update(bond_ctx->active_slaves[0], &slave_link);
2319
2320                         if (slave_link.link_speed <
2321                                         ethdev->data->dev_link.link_speed)
2322                                 ethdev->data->dev_link.link_speed =
2323                                                 slave_link.link_speed;
2324                 }
2325                 break;
2326         case BONDING_MODE_ACTIVE_BACKUP:
2327                 /* Current primary slave */
2328                 link_update(bond_ctx->current_primary_port, &slave_link);
2329
2330                 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2331                 break;
2332         case BONDING_MODE_8023AD:
2333                 ethdev->data->dev_link.link_autoneg =
2334                                 bond_ctx->mode4.slave_link.link_autoneg;
2335                 ethdev->data->dev_link.link_duplex =
2336                                 bond_ctx->mode4.slave_link.link_duplex;
2337                 /* fall through to update link speed */
2338         case BONDING_MODE_ROUND_ROBIN:
2339         case BONDING_MODE_BALANCE:
2340         case BONDING_MODE_TLB:
2341         case BONDING_MODE_ALB:
2342         default:
2343                 /**
2344                  * In theses mode the maximum theoretical link speed is the sum
2345                  * of all the slaves
2346                  */
2347                 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2348
2349                 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2350                         link_update(bond_ctx->active_slaves[idx], &slave_link);
2351
2352                         ethdev->data->dev_link.link_speed +=
2353                                         slave_link.link_speed;
2354                 }
2355         }
2356
2357
2358         return 0;
2359 }
2360
2361
2362 static void
2363 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2364 {
2365         struct bond_dev_private *internals = dev->data->dev_private;
2366         struct rte_eth_stats slave_stats;
2367         int i, j;
2368
2369         for (i = 0; i < internals->slave_count; i++) {
2370                 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2371
2372                 stats->ipackets += slave_stats.ipackets;
2373                 stats->opackets += slave_stats.opackets;
2374                 stats->ibytes += slave_stats.ibytes;
2375                 stats->obytes += slave_stats.obytes;
2376                 stats->imissed += slave_stats.imissed;
2377                 stats->ierrors += slave_stats.ierrors;
2378                 stats->oerrors += slave_stats.oerrors;
2379                 stats->rx_nombuf += slave_stats.rx_nombuf;
2380
2381                 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2382                         stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2383                         stats->q_opackets[j] += slave_stats.q_opackets[j];
2384                         stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2385                         stats->q_obytes[j] += slave_stats.q_obytes[j];
2386                         stats->q_errors[j] += slave_stats.q_errors[j];
2387                 }
2388
2389         }
2390 }
2391
2392 static void
2393 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2394 {
2395         struct bond_dev_private *internals = dev->data->dev_private;
2396         int i;
2397
2398         for (i = 0; i < internals->slave_count; i++)
2399                 rte_eth_stats_reset(internals->slaves[i].port_id);
2400 }
2401
2402 static void
2403 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2404 {
2405         struct bond_dev_private *internals = eth_dev->data->dev_private;
2406         int i;
2407
2408         internals->promiscuous_en = 1;
2409
2410         switch (internals->mode) {
2411         /* Promiscuous mode is propagated to all slaves */
2412         case BONDING_MODE_ROUND_ROBIN:
2413         case BONDING_MODE_BALANCE:
2414         case BONDING_MODE_BROADCAST:
2415                 for (i = 0; i < internals->slave_count; i++)
2416                         rte_eth_promiscuous_enable(internals->slaves[i].port_id);
2417                 break;
2418         /* In mode4 promiscus mode is managed when slave is added/removed */
2419         case BONDING_MODE_8023AD:
2420                 break;
2421         /* Promiscuous mode is propagated only to primary slave */
2422         case BONDING_MODE_ACTIVE_BACKUP:
2423         case BONDING_MODE_TLB:
2424         case BONDING_MODE_ALB:
2425         default:
2426                 rte_eth_promiscuous_enable(internals->current_primary_port);
2427         }
2428 }
2429
2430 static void
2431 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2432 {
2433         struct bond_dev_private *internals = dev->data->dev_private;
2434         int i;
2435
2436         internals->promiscuous_en = 0;
2437
2438         switch (internals->mode) {
2439         /* Promiscuous mode is propagated to all slaves */
2440         case BONDING_MODE_ROUND_ROBIN:
2441         case BONDING_MODE_BALANCE:
2442         case BONDING_MODE_BROADCAST:
2443                 for (i = 0; i < internals->slave_count; i++)
2444                         rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2445                 break;
2446         /* In mode4 promiscus mode is set managed when slave is added/removed */
2447         case BONDING_MODE_8023AD:
2448                 break;
2449         /* Promiscuous mode is propagated only to primary slave */
2450         case BONDING_MODE_ACTIVE_BACKUP:
2451         case BONDING_MODE_TLB:
2452         case BONDING_MODE_ALB:
2453         default:
2454                 rte_eth_promiscuous_disable(internals->current_primary_port);
2455         }
2456 }
2457
2458 static void
2459 bond_ethdev_delayed_lsc_propagation(void *arg)
2460 {
2461         if (arg == NULL)
2462                 return;
2463
2464         _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2465                         RTE_ETH_EVENT_INTR_LSC, NULL, NULL);
2466 }
2467
2468 int
2469 bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type,
2470                 void *param, void *ret_param __rte_unused)
2471 {
2472         struct rte_eth_dev *bonded_eth_dev;
2473         struct bond_dev_private *internals;
2474         struct rte_eth_link link;
2475         int rc = -1;
2476
2477         int i, valid_slave = 0;
2478         uint8_t active_pos;
2479         uint8_t lsc_flag = 0;
2480
2481         if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2482                 return rc;
2483
2484         bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
2485
2486         if (check_for_bonded_ethdev(bonded_eth_dev))
2487                 return rc;
2488
2489         internals = bonded_eth_dev->data->dev_private;
2490
2491         /* If the device isn't started don't handle interrupts */
2492         if (!bonded_eth_dev->data->dev_started)
2493                 return rc;
2494
2495         /* verify that port_id is a valid slave of bonded port */
2496         for (i = 0; i < internals->slave_count; i++) {
2497                 if (internals->slaves[i].port_id == port_id) {
2498                         valid_slave = 1;
2499                         break;
2500                 }
2501         }
2502
2503         if (!valid_slave)
2504                 return rc;
2505
2506         /* Search for port in active port list */
2507         active_pos = find_slave_by_id(internals->active_slaves,
2508                         internals->active_slave_count, port_id);
2509
2510         rte_eth_link_get_nowait(port_id, &link);
2511         if (link.link_status) {
2512                 if (active_pos < internals->active_slave_count)
2513                         return rc;
2514
2515                 /* if no active slave ports then set this port to be primary port */
2516                 if (internals->active_slave_count < 1) {
2517                         /* If first active slave, then change link status */
2518                         bonded_eth_dev->data->dev_link.link_status = ETH_LINK_UP;
2519                         internals->current_primary_port = port_id;
2520                         lsc_flag = 1;
2521
2522                         mac_address_slaves_update(bonded_eth_dev);
2523                 }
2524
2525                 activate_slave(bonded_eth_dev, port_id);
2526
2527                 /* If user has defined the primary port then default to using it */
2528                 if (internals->user_defined_primary_port &&
2529                                 internals->primary_port == port_id)
2530                         bond_ethdev_primary_set(internals, port_id);
2531         } else {
2532                 if (active_pos == internals->active_slave_count)
2533                         return rc;
2534
2535                 /* Remove from active slave list */
2536                 deactivate_slave(bonded_eth_dev, port_id);
2537
2538                 if (internals->active_slave_count < 1)
2539                         lsc_flag = 1;
2540
2541                 /* Update primary id, take first active slave from list or if none
2542                  * available set to -1 */
2543                 if (port_id == internals->current_primary_port) {
2544                         if (internals->active_slave_count > 0)
2545                                 bond_ethdev_primary_set(internals,
2546                                                 internals->active_slaves[0]);
2547                         else
2548                                 internals->current_primary_port = internals->primary_port;
2549                 }
2550         }
2551
2552         /**
2553          * Update bonded device link properties after any change to active
2554          * slaves
2555          */
2556         bond_ethdev_link_update(bonded_eth_dev, 0);
2557
2558         if (lsc_flag) {
2559                 /* Cancel any possible outstanding interrupts if delays are enabled */
2560                 if (internals->link_up_delay_ms > 0 ||
2561                         internals->link_down_delay_ms > 0)
2562                         rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2563                                         bonded_eth_dev);
2564
2565                 if (bonded_eth_dev->data->dev_link.link_status) {
2566                         if (internals->link_up_delay_ms > 0)
2567                                 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2568                                                 bond_ethdev_delayed_lsc_propagation,
2569                                                 (void *)bonded_eth_dev);
2570                         else
2571                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2572                                                 RTE_ETH_EVENT_INTR_LSC,
2573                                                 NULL, NULL);
2574
2575                 } else {
2576                         if (internals->link_down_delay_ms > 0)
2577                                 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2578                                                 bond_ethdev_delayed_lsc_propagation,
2579                                                 (void *)bonded_eth_dev);
2580                         else
2581                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2582                                                 RTE_ETH_EVENT_INTR_LSC,
2583                                                 NULL, NULL);
2584                 }
2585         }
2586         return 0;
2587 }
2588
2589 static int
2590 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2591                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2592 {
2593         unsigned i, j;
2594         int result = 0;
2595         int slave_reta_size;
2596         unsigned reta_count;
2597         struct bond_dev_private *internals = dev->data->dev_private;
2598
2599         if (reta_size != internals->reta_size)
2600                 return -EINVAL;
2601
2602          /* Copy RETA table */
2603         reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2604
2605         for (i = 0; i < reta_count; i++) {
2606                 internals->reta_conf[i].mask = reta_conf[i].mask;
2607                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2608                         if ((reta_conf[i].mask >> j) & 0x01)
2609                                 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2610         }
2611
2612         /* Fill rest of array */
2613         for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2614                 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2615                                 sizeof(internals->reta_conf[0]) * reta_count);
2616
2617         /* Propagate RETA over slaves */
2618         for (i = 0; i < internals->slave_count; i++) {
2619                 slave_reta_size = internals->slaves[i].reta_size;
2620                 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2621                                 &internals->reta_conf[0], slave_reta_size);
2622                 if (result < 0)
2623                         return result;
2624         }
2625
2626         return 0;
2627 }
2628
2629 static int
2630 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2631                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2632 {
2633         int i, j;
2634         struct bond_dev_private *internals = dev->data->dev_private;
2635
2636         if (reta_size != internals->reta_size)
2637                 return -EINVAL;
2638
2639          /* Copy RETA table */
2640         for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2641                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2642                         if ((reta_conf[i].mask >> j) & 0x01)
2643                                 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2644
2645         return 0;
2646 }
2647
2648 static int
2649 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2650                 struct rte_eth_rss_conf *rss_conf)
2651 {
2652         int i, result = 0;
2653         struct bond_dev_private *internals = dev->data->dev_private;
2654         struct rte_eth_rss_conf bond_rss_conf;
2655
2656         memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2657
2658         bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2659
2660         if (bond_rss_conf.rss_hf != 0)
2661                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2662
2663         if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2664                         sizeof(internals->rss_key)) {
2665                 if (bond_rss_conf.rss_key_len == 0)
2666                         bond_rss_conf.rss_key_len = 40;
2667                 internals->rss_key_len = bond_rss_conf.rss_key_len;
2668                 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2669                                 internals->rss_key_len);
2670         }
2671
2672         for (i = 0; i < internals->slave_count; i++) {
2673                 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2674                                 &bond_rss_conf);
2675                 if (result < 0)
2676                         return result;
2677         }
2678
2679         return 0;
2680 }
2681
2682 static int
2683 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2684                 struct rte_eth_rss_conf *rss_conf)
2685 {
2686         struct bond_dev_private *internals = dev->data->dev_private;
2687
2688         rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2689         rss_conf->rss_key_len = internals->rss_key_len;
2690         if (rss_conf->rss_key)
2691                 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2692
2693         return 0;
2694 }
2695
2696 const struct eth_dev_ops default_dev_ops = {
2697         .dev_start            = bond_ethdev_start,
2698         .dev_stop             = bond_ethdev_stop,
2699         .dev_close            = bond_ethdev_close,
2700         .dev_configure        = bond_ethdev_configure,
2701         .dev_infos_get        = bond_ethdev_info,
2702         .vlan_filter_set      = bond_ethdev_vlan_filter_set,
2703         .rx_queue_setup       = bond_ethdev_rx_queue_setup,
2704         .tx_queue_setup       = bond_ethdev_tx_queue_setup,
2705         .rx_queue_release     = bond_ethdev_rx_queue_release,
2706         .tx_queue_release     = bond_ethdev_tx_queue_release,
2707         .link_update          = bond_ethdev_link_update,
2708         .stats_get            = bond_ethdev_stats_get,
2709         .stats_reset          = bond_ethdev_stats_reset,
2710         .promiscuous_enable   = bond_ethdev_promiscuous_enable,
2711         .promiscuous_disable  = bond_ethdev_promiscuous_disable,
2712         .reta_update          = bond_ethdev_rss_reta_update,
2713         .reta_query           = bond_ethdev_rss_reta_query,
2714         .rss_hash_update      = bond_ethdev_rss_hash_update,
2715         .rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get
2716 };
2717
2718 static int
2719 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
2720 {
2721         const char *name = rte_vdev_device_name(dev);
2722         uint8_t socket_id = dev->device.numa_node;
2723         struct bond_dev_private *internals = NULL;
2724         struct rte_eth_dev *eth_dev = NULL;
2725         uint32_t vlan_filter_bmp_size;
2726
2727         /* now do all data allocation - for eth_dev structure, dummy pci driver
2728          * and internal (private) data
2729          */
2730
2731         /* reserve an ethdev entry */
2732         eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
2733         if (eth_dev == NULL) {
2734                 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
2735                 goto err;
2736         }
2737
2738         internals = eth_dev->data->dev_private;
2739         eth_dev->data->nb_rx_queues = (uint16_t)1;
2740         eth_dev->data->nb_tx_queues = (uint16_t)1;
2741
2742         eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN, 0,
2743                         socket_id);
2744         if (eth_dev->data->mac_addrs == NULL) {
2745                 RTE_BOND_LOG(ERR, "Unable to malloc mac_addrs");
2746                 goto err;
2747         }
2748
2749         eth_dev->dev_ops = &default_dev_ops;
2750         eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC |
2751                 RTE_ETH_DEV_DETACHABLE;
2752
2753         rte_spinlock_init(&internals->lock);
2754
2755         internals->port_id = eth_dev->data->port_id;
2756         internals->mode = BONDING_MODE_INVALID;
2757         internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
2758         internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
2759         internals->xmit_hash = xmit_l2_hash;
2760         internals->user_defined_mac = 0;
2761
2762         internals->link_status_polling_enabled = 0;
2763
2764         internals->link_status_polling_interval_ms =
2765                 DEFAULT_POLLING_INTERVAL_10_MS;
2766         internals->link_down_delay_ms = 0;
2767         internals->link_up_delay_ms = 0;
2768
2769         internals->slave_count = 0;
2770         internals->active_slave_count = 0;
2771         internals->rx_offload_capa = 0;
2772         internals->tx_offload_capa = 0;
2773         internals->candidate_max_rx_pktlen = 0;
2774         internals->max_rx_pktlen = 0;
2775
2776         /* Initially allow to choose any offload type */
2777         internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
2778
2779         memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
2780         memset(internals->slaves, 0, sizeof(internals->slaves));
2781
2782         /* Set mode 4 default configuration */
2783         bond_mode_8023ad_setup(eth_dev, NULL);
2784         if (bond_ethdev_mode_set(eth_dev, mode)) {
2785                 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode too %d",
2786                                  eth_dev->data->port_id, mode);
2787                 goto err;
2788         }
2789
2790         vlan_filter_bmp_size =
2791                 rte_bitmap_get_memory_footprint(ETHER_MAX_VLAN_ID + 1);
2792         internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
2793                                                    RTE_CACHE_LINE_SIZE);
2794         if (internals->vlan_filter_bmpmem == NULL) {
2795                 RTE_BOND_LOG(ERR,
2796                              "Failed to allocate vlan bitmap for bonded device %u\n",
2797                              eth_dev->data->port_id);
2798                 goto err;
2799         }
2800
2801         internals->vlan_filter_bmp = rte_bitmap_init(ETHER_MAX_VLAN_ID + 1,
2802                         internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
2803         if (internals->vlan_filter_bmp == NULL) {
2804                 RTE_BOND_LOG(ERR,
2805                              "Failed to init vlan bitmap for bonded device %u\n",
2806                              eth_dev->data->port_id);
2807                 rte_free(internals->vlan_filter_bmpmem);
2808                 goto err;
2809         }
2810
2811         return eth_dev->data->port_id;
2812
2813 err:
2814         rte_free(internals);
2815         if (eth_dev != NULL) {
2816                 rte_free(eth_dev->data->mac_addrs);
2817                 rte_eth_dev_release_port(eth_dev);
2818         }
2819         return -1;
2820 }
2821
2822 static int
2823 bond_probe(struct rte_vdev_device *dev)
2824 {
2825         const char *name;
2826         struct bond_dev_private *internals;
2827         struct rte_kvargs *kvlist;
2828         uint8_t bonding_mode, socket_id/*, agg_mode*/;
2829         int  arg_count, port_id;
2830
2831         if (!dev)
2832                 return -EINVAL;
2833
2834         name = rte_vdev_device_name(dev);
2835         RTE_LOG(INFO, EAL, "Initializing pmd_bond for %s\n", name);
2836
2837         kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
2838                 pmd_bond_init_valid_arguments);
2839         if (kvlist == NULL)
2840                 return -1;
2841
2842         /* Parse link bonding mode */
2843         if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
2844                 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
2845                                 &bond_ethdev_parse_slave_mode_kvarg,
2846                                 &bonding_mode) != 0) {
2847                         RTE_LOG(ERR, EAL, "Invalid mode for bonded device %s\n",
2848                                         name);
2849                         goto parse_error;
2850                 }
2851         } else {
2852                 RTE_LOG(ERR, EAL, "Mode must be specified only once for bonded "
2853                                 "device %s\n", name);
2854                 goto parse_error;
2855         }
2856
2857         /* Parse socket id to create bonding device on */
2858         arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
2859         if (arg_count == 1) {
2860                 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
2861                                 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
2862                                 != 0) {
2863                         RTE_LOG(ERR, EAL, "Invalid socket Id specified for "
2864                                         "bonded device %s\n", name);
2865                         goto parse_error;
2866                 }
2867         } else if (arg_count > 1) {
2868                 RTE_LOG(ERR, EAL, "Socket Id can be specified only once for "
2869                                 "bonded device %s\n", name);
2870                 goto parse_error;
2871         } else {
2872                 socket_id = rte_socket_id();
2873         }
2874
2875         dev->device.numa_node = socket_id;
2876
2877         /* Create link bonding eth device */
2878         port_id = bond_alloc(dev, bonding_mode);
2879         if (port_id < 0) {
2880                 RTE_LOG(ERR, EAL, "Failed to create socket %s in mode %u on "
2881                                 "socket %u.\n", name, bonding_mode, socket_id);
2882                 goto parse_error;
2883         }
2884         internals = rte_eth_devices[port_id].data->dev_private;
2885         internals->kvlist = kvlist;
2886
2887         RTE_LOG(INFO, EAL, "Create bonded device %s on port %d in mode %u on "
2888                         "socket %u.\n", name, port_id, bonding_mode, socket_id);
2889         return 0;
2890
2891 parse_error:
2892         rte_kvargs_free(kvlist);
2893
2894         return -1;
2895 }
2896
2897 static int
2898 bond_remove(struct rte_vdev_device *dev)
2899 {
2900         struct rte_eth_dev *eth_dev;
2901         struct bond_dev_private *internals;
2902         const char *name;
2903
2904         if (!dev)
2905                 return -EINVAL;
2906
2907         name = rte_vdev_device_name(dev);
2908         RTE_LOG(INFO, EAL, "Uninitializing pmd_bond for %s\n", name);
2909
2910         /* now free all data allocation - for eth_dev structure,
2911          * dummy pci driver and internal (private) data
2912          */
2913
2914         /* find an ethdev entry */
2915         eth_dev = rte_eth_dev_allocated(name);
2916         if (eth_dev == NULL)
2917                 return -ENODEV;
2918
2919         RTE_ASSERT(eth_dev->device == &dev->device);
2920
2921         internals = eth_dev->data->dev_private;
2922         if (internals->slave_count != 0)
2923                 return -EBUSY;
2924
2925         if (eth_dev->data->dev_started == 1) {
2926                 bond_ethdev_stop(eth_dev);
2927                 bond_ethdev_close(eth_dev);
2928         }
2929
2930         eth_dev->dev_ops = NULL;
2931         eth_dev->rx_pkt_burst = NULL;
2932         eth_dev->tx_pkt_burst = NULL;
2933
2934         internals = eth_dev->data->dev_private;
2935         rte_bitmap_free(internals->vlan_filter_bmp);
2936         rte_free(internals->vlan_filter_bmpmem);
2937         rte_free(eth_dev->data->dev_private);
2938         rte_free(eth_dev->data->mac_addrs);
2939
2940         rte_eth_dev_release_port(eth_dev);
2941
2942         return 0;
2943 }
2944
2945 /* this part will resolve the slave portids after all the other pdev and vdev
2946  * have been allocated */
2947 static int
2948 bond_ethdev_configure(struct rte_eth_dev *dev)
2949 {
2950         const char *name = dev->device->name;
2951         struct bond_dev_private *internals = dev->data->dev_private;
2952         struct rte_kvargs *kvlist = internals->kvlist;
2953         int arg_count;
2954         uint8_t port_id = dev - rte_eth_devices;
2955         uint8_t agg_mode;
2956
2957         static const uint8_t default_rss_key[40] = {
2958                 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
2959                 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
2960                 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
2961                 0xBE, 0xAC, 0x01, 0xFA
2962         };
2963
2964         unsigned i, j;
2965
2966         /* If RSS is enabled, fill table and key with default values */
2967         if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
2968                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = internals->rss_key;
2969                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = 0;
2970                 memcpy(internals->rss_key, default_rss_key, 40);
2971
2972                 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
2973                         internals->reta_conf[i].mask = ~0LL;
2974                         for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2975                                 internals->reta_conf[i].reta[j] = j % dev->data->nb_rx_queues;
2976                 }
2977         }
2978
2979         /* set the max_rx_pktlen */
2980         internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
2981
2982         /*
2983          * if no kvlist, it means that this bonded device has been created
2984          * through the bonding api.
2985          */
2986         if (!kvlist)
2987                 return 0;
2988
2989         /* Parse MAC address for bonded device */
2990         arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
2991         if (arg_count == 1) {
2992                 struct ether_addr bond_mac;
2993
2994                 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
2995                                 &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
2996                         RTE_LOG(INFO, EAL, "Invalid mac address for bonded device %s\n",
2997                                         name);
2998                         return -1;
2999                 }
3000
3001                 /* Set MAC address */
3002                 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3003                         RTE_LOG(ERR, EAL,
3004                                         "Failed to set mac address on bonded device %s\n",
3005                                         name);
3006                         return -1;
3007                 }
3008         } else if (arg_count > 1) {
3009                 RTE_LOG(ERR, EAL,
3010                                 "MAC address can be specified only once for bonded device %s\n",
3011                                 name);
3012                 return -1;
3013         }
3014
3015         /* Parse/set balance mode transmit policy */
3016         arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3017         if (arg_count == 1) {
3018                 uint8_t xmit_policy;
3019
3020                 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3021                                 &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3022                                                 0) {
3023                         RTE_LOG(INFO, EAL,
3024                                         "Invalid xmit policy specified for bonded device %s\n",
3025                                         name);
3026                         return -1;
3027                 }
3028
3029                 /* Set balance mode transmit policy*/
3030                 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3031                         RTE_LOG(ERR, EAL,
3032                                         "Failed to set balance xmit policy on bonded device %s\n",
3033                                         name);
3034                         return -1;
3035                 }
3036         } else if (arg_count > 1) {
3037                 RTE_LOG(ERR, EAL,
3038                                 "Transmit policy can be specified only once for bonded device"
3039                                 " %s\n", name);
3040                 return -1;
3041         }
3042
3043         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3044                 if (rte_kvargs_process(kvlist,
3045                                 PMD_BOND_AGG_MODE_KVARG,
3046                                 &bond_ethdev_parse_slave_agg_mode_kvarg,
3047                                 &agg_mode) != 0) {
3048                         RTE_LOG(ERR, EAL,
3049                                         "Failed to parse agg selection mode for bonded device %s\n",
3050                                         name);
3051                 }
3052                 if (internals->mode == BONDING_MODE_8023AD)
3053                         if (agg_mode != 0)
3054                                 rte_eth_bond_8023ad_agg_selection_set(port_id,
3055                                                 agg_mode);
3056         }
3057
3058         /* Parse/add slave ports to bonded device */
3059         if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3060                 struct bond_ethdev_slave_ports slave_ports;
3061                 unsigned i;
3062
3063                 memset(&slave_ports, 0, sizeof(slave_ports));
3064
3065                 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3066                                 &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3067                         RTE_LOG(ERR, EAL,
3068                                         "Failed to parse slave ports for bonded device %s\n",
3069                                         name);
3070                         return -1;
3071                 }
3072
3073                 for (i = 0; i < slave_ports.slave_count; i++) {
3074                         if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3075                                 RTE_LOG(ERR, EAL,
3076                                                 "Failed to add port %d as slave to bonded device %s\n",
3077                                                 slave_ports.slaves[i], name);
3078                         }
3079                 }
3080
3081         } else {
3082                 RTE_LOG(INFO, EAL, "No slaves specified for bonded device %s\n", name);
3083                 return -1;
3084         }
3085
3086         /* Parse/set primary slave port id*/
3087         arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3088         if (arg_count == 1) {
3089                 uint8_t primary_slave_port_id;
3090
3091                 if (rte_kvargs_process(kvlist,
3092                                 PMD_BOND_PRIMARY_SLAVE_KVARG,
3093                                 &bond_ethdev_parse_primary_slave_port_id_kvarg,
3094                                 &primary_slave_port_id) < 0) {
3095                         RTE_LOG(INFO, EAL,
3096                                         "Invalid primary slave port id specified for bonded device"
3097                                         " %s\n", name);
3098                         return -1;
3099                 }
3100
3101                 /* Set balance mode transmit policy*/
3102                 if (rte_eth_bond_primary_set(port_id, (uint8_t)primary_slave_port_id)
3103                                 != 0) {
3104                         RTE_LOG(ERR, EAL,
3105                                         "Failed to set primary slave port %d on bonded device %s\n",
3106                                         primary_slave_port_id, name);
3107                         return -1;
3108                 }
3109         } else if (arg_count > 1) {
3110                 RTE_LOG(INFO, EAL,
3111                                 "Primary slave can be specified only once for bonded device"
3112                                 " %s\n", name);
3113                 return -1;
3114         }
3115
3116         /* Parse link status monitor polling interval */
3117         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3118         if (arg_count == 1) {
3119                 uint32_t lsc_poll_interval_ms;
3120
3121                 if (rte_kvargs_process(kvlist,
3122                                 PMD_BOND_LSC_POLL_PERIOD_KVARG,
3123                                 &bond_ethdev_parse_time_ms_kvarg,
3124                                 &lsc_poll_interval_ms) < 0) {
3125                         RTE_LOG(INFO, EAL,
3126                                         "Invalid lsc polling interval value specified for bonded"
3127                                         " device %s\n", name);
3128                         return -1;
3129                 }
3130
3131                 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3132                                 != 0) {
3133                         RTE_LOG(ERR, EAL,
3134                                         "Failed to set lsc monitor polling interval (%u ms) on"
3135                                         " bonded device %s\n", lsc_poll_interval_ms, name);
3136                         return -1;
3137                 }
3138         } else if (arg_count > 1) {
3139                 RTE_LOG(INFO, EAL,
3140                                 "LSC polling interval can be specified only once for bonded"
3141                                 " device %s\n", name);
3142                 return -1;
3143         }
3144
3145         /* Parse link up interrupt propagation delay */
3146         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3147         if (arg_count == 1) {
3148                 uint32_t link_up_delay_ms;
3149
3150                 if (rte_kvargs_process(kvlist,
3151                                 PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3152                                 &bond_ethdev_parse_time_ms_kvarg,
3153                                 &link_up_delay_ms) < 0) {
3154                         RTE_LOG(INFO, EAL,
3155                                         "Invalid link up propagation delay value specified for"
3156                                         " bonded device %s\n", name);
3157                         return -1;
3158                 }
3159
3160                 /* Set balance mode transmit policy*/
3161                 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3162                                 != 0) {
3163                         RTE_LOG(ERR, EAL,
3164                                         "Failed to set link up propagation delay (%u ms) on bonded"
3165                                         " device %s\n", link_up_delay_ms, name);
3166                         return -1;
3167                 }
3168         } else if (arg_count > 1) {
3169                 RTE_LOG(INFO, EAL,
3170                                 "Link up propagation delay can be specified only once for"
3171                                 " bonded device %s\n", name);
3172                 return -1;
3173         }
3174
3175         /* Parse link down interrupt propagation delay */
3176         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3177         if (arg_count == 1) {
3178                 uint32_t link_down_delay_ms;
3179
3180                 if (rte_kvargs_process(kvlist,
3181                                 PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3182                                 &bond_ethdev_parse_time_ms_kvarg,
3183                                 &link_down_delay_ms) < 0) {
3184                         RTE_LOG(INFO, EAL,
3185                                         "Invalid link down propagation delay value specified for"
3186                                         " bonded device %s\n", name);
3187                         return -1;
3188                 }
3189
3190                 /* Set balance mode transmit policy*/
3191                 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3192                                 != 0) {
3193                         RTE_LOG(ERR, EAL,
3194                                         "Failed to set link down propagation delay (%u ms) on"
3195                                         " bonded device %s\n", link_down_delay_ms, name);
3196                         return -1;
3197                 }
3198         } else if (arg_count > 1) {
3199                 RTE_LOG(INFO, EAL,
3200                                 "Link down propagation delay can be specified only once for"
3201                                 " bonded device %s\n", name);
3202                 return -1;
3203         }
3204
3205         return 0;
3206 }
3207
3208 struct rte_vdev_driver pmd_bond_drv = {
3209         .probe = bond_probe,
3210         .remove = bond_remove,
3211 };
3212
3213 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3214 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3215
3216 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3217         "slave=<ifc> "
3218         "primary=<ifc> "
3219         "mode=[0-6] "
3220         "xmit_policy=[l2 | l23 | l34] "
3221         "agg_mode=[count | stable | bandwidth] "
3222         "socket_id=<int> "
3223         "mac=<mac addr> "
3224         "lsc_poll_period_ms=<int> "
3225         "up_delay=<int> "
3226         "down_delay=<int>");