New upstream version 18.08
[deb_dpdk.git] / drivers / net / bonding / rte_eth_bond_pmd.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 #include <stdlib.h>
5 #include <netinet/in.h>
6
7 #include <rte_mbuf.h>
8 #include <rte_malloc.h>
9 #include <rte_ethdev_driver.h>
10 #include <rte_ethdev_vdev.h>
11 #include <rte_tcp.h>
12 #include <rte_udp.h>
13 #include <rte_ip.h>
14 #include <rte_ip_frag.h>
15 #include <rte_devargs.h>
16 #include <rte_kvargs.h>
17 #include <rte_bus_vdev.h>
18 #include <rte_alarm.h>
19 #include <rte_cycles.h>
20 #include <rte_string_fns.h>
21
22 #include "rte_eth_bond.h"
23 #include "rte_eth_bond_private.h"
24 #include "rte_eth_bond_8023ad_private.h"
25
26 #define REORDER_PERIOD_MS 10
27 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
28 #define BOND_MAX_MAC_ADDRS 16
29
30 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
31
32 /* Table for statistics in mode 5 TLB */
33 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
34
35 static inline size_t
36 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
37 {
38         size_t vlan_offset = 0;
39
40         if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
41                 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
42
43                 vlan_offset = sizeof(struct vlan_hdr);
44                 *proto = vlan_hdr->eth_proto;
45
46                 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
47                         vlan_hdr = vlan_hdr + 1;
48                         *proto = vlan_hdr->eth_proto;
49                         vlan_offset += sizeof(struct vlan_hdr);
50                 }
51         }
52         return vlan_offset;
53 }
54
55 static uint16_t
56 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
57 {
58         struct bond_dev_private *internals;
59
60         uint16_t num_rx_slave = 0;
61         uint16_t num_rx_total = 0;
62
63         int i;
64
65         /* Cast to structure, containing bonded device's port id and queue id */
66         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
67
68         internals = bd_rx_q->dev_private;
69
70
71         for (i = 0; i < internals->active_slave_count && nb_pkts; i++) {
72                 /* Offset of pointer to *bufs increases as packets are received
73                  * from other slaves */
74                 num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
75                                 bd_rx_q->queue_id, bufs + num_rx_total, nb_pkts);
76                 if (num_rx_slave) {
77                         num_rx_total += num_rx_slave;
78                         nb_pkts -= num_rx_slave;
79                 }
80         }
81
82         return num_rx_total;
83 }
84
85 static uint16_t
86 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
87                 uint16_t nb_pkts)
88 {
89         struct bond_dev_private *internals;
90
91         /* Cast to structure, containing bonded device's port id and queue id */
92         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
93
94         internals = bd_rx_q->dev_private;
95
96         return rte_eth_rx_burst(internals->current_primary_port,
97                         bd_rx_q->queue_id, bufs, nb_pkts);
98 }
99
100 static inline uint8_t
101 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
102 {
103         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
104
105         return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
106                 (ethertype == ether_type_slow_be &&
107                 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
108 }
109
110 /*****************************************************************************
111  * Flow director's setup for mode 4 optimization
112  */
113
114 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
115         .dst.addr_bytes = { 0 },
116         .src.addr_bytes = { 0 },
117         .type = RTE_BE16(ETHER_TYPE_SLOW),
118 };
119
120 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
121         .dst.addr_bytes = { 0 },
122         .src.addr_bytes = { 0 },
123         .type = 0xFFFF,
124 };
125
126 static struct rte_flow_item flow_item_8023ad[] = {
127         {
128                 .type = RTE_FLOW_ITEM_TYPE_ETH,
129                 .spec = &flow_item_eth_type_8023ad,
130                 .last = NULL,
131                 .mask = &flow_item_eth_mask_type_8023ad,
132         },
133         {
134                 .type = RTE_FLOW_ITEM_TYPE_END,
135                 .spec = NULL,
136                 .last = NULL,
137                 .mask = NULL,
138         }
139 };
140
141 const struct rte_flow_attr flow_attr_8023ad = {
142         .group = 0,
143         .priority = 0,
144         .ingress = 1,
145         .egress = 0,
146         .reserved = 0,
147 };
148
149 int
150 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
151                 uint16_t slave_port) {
152         struct rte_eth_dev_info slave_info;
153         struct rte_flow_error error;
154         struct bond_dev_private *internals = (struct bond_dev_private *)
155                         (bond_dev->data->dev_private);
156
157         const struct rte_flow_action_queue lacp_queue_conf = {
158                 .index = 0,
159         };
160
161         const struct rte_flow_action actions[] = {
162                 {
163                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
164                         .conf = &lacp_queue_conf
165                 },
166                 {
167                         .type = RTE_FLOW_ACTION_TYPE_END,
168                 }
169         };
170
171         int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
172                         flow_item_8023ad, actions, &error);
173         if (ret < 0) {
174                 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
175                                 __func__, error.message, slave_port,
176                                 internals->mode4.dedicated_queues.rx_qid);
177                 return -1;
178         }
179
180         rte_eth_dev_info_get(slave_port, &slave_info);
181         if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
182                         slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
183                 RTE_BOND_LOG(ERR,
184                         "%s: Slave %d capabilities doesn't allow to allocate additional queues",
185                         __func__, slave_port);
186                 return -1;
187         }
188
189         return 0;
190 }
191
192 int
193 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
194         struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
195         struct bond_dev_private *internals = (struct bond_dev_private *)
196                         (bond_dev->data->dev_private);
197         struct rte_eth_dev_info bond_info;
198         uint16_t idx;
199
200         /* Verify if all slaves in bonding supports flow director and */
201         if (internals->slave_count > 0) {
202                 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
203
204                 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
205                 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
206
207                 for (idx = 0; idx < internals->slave_count; idx++) {
208                         if (bond_ethdev_8023ad_flow_verify(bond_dev,
209                                         internals->slaves[idx].port_id) != 0)
210                                 return -1;
211                 }
212         }
213
214         return 0;
215 }
216
217 int
218 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
219
220         struct rte_flow_error error;
221         struct bond_dev_private *internals = (struct bond_dev_private *)
222                         (bond_dev->data->dev_private);
223
224         struct rte_flow_action_queue lacp_queue_conf = {
225                 .index = internals->mode4.dedicated_queues.rx_qid,
226         };
227
228         const struct rte_flow_action actions[] = {
229                 {
230                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
231                         .conf = &lacp_queue_conf
232                 },
233                 {
234                         .type = RTE_FLOW_ACTION_TYPE_END,
235                 }
236         };
237
238         internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
239                         &flow_attr_8023ad, flow_item_8023ad, actions, &error);
240         if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
241                 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
242                                 "(slave_port=%d queue_id=%d)",
243                                 error.message, slave_port,
244                                 internals->mode4.dedicated_queues.rx_qid);
245                 return -1;
246         }
247
248         return 0;
249 }
250
251 static uint16_t
252 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
253                 uint16_t nb_pkts)
254 {
255         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
256         struct bond_dev_private *internals = bd_rx_q->dev_private;
257         uint16_t num_rx_total = 0;      /* Total number of received packets */
258         uint16_t slaves[RTE_MAX_ETHPORTS];
259         uint16_t slave_count;
260
261         uint16_t i, idx;
262
263         /* Copy slave list to protect against slave up/down changes during tx
264          * bursting */
265         slave_count = internals->active_slave_count;
266         memcpy(slaves, internals->active_slaves,
267                         sizeof(internals->active_slaves[0]) * slave_count);
268
269         for (i = 0, idx = internals->active_slave;
270                         i < slave_count && num_rx_total < nb_pkts; i++, idx++) {
271                 idx = idx % slave_count;
272
273                 /* Read packets from this slave */
274                 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
275                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
276         }
277
278         internals->active_slave = idx;
279
280         return num_rx_total;
281 }
282
283 static uint16_t
284 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
285                 uint16_t nb_bufs)
286 {
287         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
288         struct bond_dev_private *internals = bd_tx_q->dev_private;
289
290         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
291         uint16_t slave_count;
292
293         uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
294         uint16_t dist_slave_count;
295
296         /* 2-D array to sort mbufs for transmission on each slave into */
297         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
298         /* Number of mbufs for transmission on each slave */
299         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
300         /* Mapping array generated by hash function to map mbufs to slaves */
301         uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
302
303         uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
304         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
305
306         uint16_t i, j;
307
308         if (unlikely(nb_bufs == 0))
309                 return 0;
310
311         /* Copy slave list to protect against slave up/down changes during tx
312          * bursting */
313         slave_count = internals->active_slave_count;
314         if (unlikely(slave_count < 1))
315                 return 0;
316
317         memcpy(slave_port_ids, internals->active_slaves,
318                         sizeof(slave_port_ids[0]) * slave_count);
319
320
321         dist_slave_count = 0;
322         for (i = 0; i < slave_count; i++) {
323                 struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
324
325                 if (ACTOR_STATE(port, DISTRIBUTING))
326                         dist_slave_port_ids[dist_slave_count++] =
327                                         slave_port_ids[i];
328         }
329
330         if (unlikely(dist_slave_count < 1))
331                 return 0;
332
333         /*
334          * Populate slaves mbuf with the packets which are to be sent on it
335          * selecting output slave using hash based on xmit policy
336          */
337         internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
338                         bufs_slave_port_idxs);
339
340         for (i = 0; i < nb_bufs; i++) {
341                 /* Populate slave mbuf arrays with mbufs for that slave. */
342                 uint8_t slave_idx = bufs_slave_port_idxs[i];
343
344                 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
345         }
346
347
348         /* Send packet burst on each slave device */
349         for (i = 0; i < dist_slave_count; i++) {
350                 if (slave_nb_bufs[i] == 0)
351                         continue;
352
353                 slave_tx_count = rte_eth_tx_burst(dist_slave_port_ids[i],
354                                 bd_tx_q->queue_id, slave_bufs[i],
355                                 slave_nb_bufs[i]);
356
357                 total_tx_count += slave_tx_count;
358
359                 /* If tx burst fails move packets to end of bufs */
360                 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
361                         slave_tx_fail_count[i] = slave_nb_bufs[i] -
362                                         slave_tx_count;
363                         total_tx_fail_count += slave_tx_fail_count[i];
364
365                         /*
366                          * Shift bufs to beginning of array to allow reordering
367                          * later
368                          */
369                         for (j = 0; j < slave_tx_fail_count[i]; j++) {
370                                 slave_bufs[i][j] =
371                                         slave_bufs[i][(slave_tx_count - 1) + j];
372                         }
373                 }
374         }
375
376         /*
377          * If there are tx burst failures we move packets to end of bufs to
378          * preserve expected PMD behaviour of all failed transmitted being
379          * at the end of the input mbuf array
380          */
381         if (unlikely(total_tx_fail_count > 0)) {
382                 int bufs_idx = nb_bufs - total_tx_fail_count - 1;
383
384                 for (i = 0; i < slave_count; i++) {
385                         if (slave_tx_fail_count[i] > 0) {
386                                 for (j = 0; j < slave_tx_fail_count[i]; j++)
387                                         bufs[bufs_idx++] = slave_bufs[i][j];
388                         }
389                 }
390         }
391
392         return total_tx_count;
393 }
394
395
396 static uint16_t
397 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
398                 uint16_t nb_pkts)
399 {
400         /* Cast to structure, containing bonded device's port id and queue id */
401         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
402         struct bond_dev_private *internals = bd_rx_q->dev_private;
403         struct ether_addr bond_mac;
404
405         struct ether_hdr *hdr;
406
407         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
408         uint16_t num_rx_total = 0;      /* Total number of received packets */
409         uint16_t slaves[RTE_MAX_ETHPORTS];
410         uint16_t slave_count, idx;
411
412         uint8_t collecting;  /* current slave collecting status */
413         const uint8_t promisc = internals->promiscuous_en;
414         uint8_t i, j, k;
415         uint8_t subtype;
416
417         rte_eth_macaddr_get(internals->port_id, &bond_mac);
418         /* Copy slave list to protect against slave up/down changes during tx
419          * bursting */
420         slave_count = internals->active_slave_count;
421         memcpy(slaves, internals->active_slaves,
422                         sizeof(internals->active_slaves[0]) * slave_count);
423
424         idx = internals->active_slave;
425         if (idx >= slave_count) {
426                 internals->active_slave = 0;
427                 idx = 0;
428         }
429         for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
430                 j = num_rx_total;
431                 collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[idx]],
432                                          COLLECTING);
433
434                 /* Read packets from this slave */
435                 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
436                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
437
438                 for (k = j; k < 2 && k < num_rx_total; k++)
439                         rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
440
441                 /* Handle slow protocol packets. */
442                 while (j < num_rx_total) {
443
444                         /* If packet is not pure L2 and is known, skip it */
445                         if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) {
446                                 j++;
447                                 continue;
448                         }
449
450                         if (j + 3 < num_rx_total)
451                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
452
453                         hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
454                         subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
455
456                         /* Remove packet from array if it is slow packet or slave is not
457                          * in collecting state or bonding interface is not in promiscuous
458                          * mode and packet address does not match. */
459                         if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]) ||
460                                 !collecting || (!promisc &&
461                                         !is_multicast_ether_addr(&hdr->d_addr) &&
462                                         !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
463
464                                 if (hdr->ether_type == ether_type_slow_be) {
465                                         bond_mode_8023ad_handle_slow_pkt(
466                                             internals, slaves[idx], bufs[j]);
467                                 } else
468                                         rte_pktmbuf_free(bufs[j]);
469
470                                 /* Packet is managed by mode 4 or dropped, shift the array */
471                                 num_rx_total--;
472                                 if (j < num_rx_total) {
473                                         memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
474                                                 (num_rx_total - j));
475                                 }
476                         } else
477                                 j++;
478                 }
479                 if (unlikely(++idx == slave_count))
480                         idx = 0;
481         }
482
483         internals->active_slave = idx;
484         return num_rx_total;
485 }
486
487 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
488 uint32_t burstnumberRX;
489 uint32_t burstnumberTX;
490
491 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
492
493 static void
494 arp_op_name(uint16_t arp_op, char *buf)
495 {
496         switch (arp_op) {
497         case ARP_OP_REQUEST:
498                 snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
499                 return;
500         case ARP_OP_REPLY:
501                 snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
502                 return;
503         case ARP_OP_REVREQUEST:
504                 snprintf(buf, sizeof("Reverse ARP Request"), "%s",
505                                 "Reverse ARP Request");
506                 return;
507         case ARP_OP_REVREPLY:
508                 snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
509                                 "Reverse ARP Reply");
510                 return;
511         case ARP_OP_INVREQUEST:
512                 snprintf(buf, sizeof("Peer Identify Request"), "%s",
513                                 "Peer Identify Request");
514                 return;
515         case ARP_OP_INVREPLY:
516                 snprintf(buf, sizeof("Peer Identify Reply"), "%s",
517                                 "Peer Identify Reply");
518                 return;
519         default:
520                 break;
521         }
522         snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
523         return;
524 }
525 #endif
526 #define MaxIPv4String   16
527 static void
528 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
529 {
530         uint32_t ipv4_addr;
531
532         ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
533         snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
534                 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
535                 ipv4_addr & 0xFF);
536 }
537
538 #define MAX_CLIENTS_NUMBER      128
539 uint8_t active_clients;
540 struct client_stats_t {
541         uint16_t port;
542         uint32_t ipv4_addr;
543         uint32_t ipv4_rx_packets;
544         uint32_t ipv4_tx_packets;
545 };
546 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
547
548 static void
549 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
550 {
551         int i = 0;
552
553         for (; i < MAX_CLIENTS_NUMBER; i++)     {
554                 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port))      {
555                         /* Just update RX packets number for this client */
556                         if (TXorRXindicator == &burstnumberRX)
557                                 client_stats[i].ipv4_rx_packets++;
558                         else
559                                 client_stats[i].ipv4_tx_packets++;
560                         return;
561                 }
562         }
563         /* We have a new client. Insert him to the table, and increment stats */
564         if (TXorRXindicator == &burstnumberRX)
565                 client_stats[active_clients].ipv4_rx_packets++;
566         else
567                 client_stats[active_clients].ipv4_tx_packets++;
568         client_stats[active_clients].ipv4_addr = addr;
569         client_stats[active_clients].port = port;
570         active_clients++;
571
572 }
573
574 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
575 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
576         rte_log(RTE_LOG_DEBUG, bond_logtype,                            \
577                 "%s port:%d SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X SrcIP:%s " \
578                 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X DstIP:%s %s %d\n", \
579                 info,                                                   \
580                 port,                                                   \
581                 eth_h->s_addr.addr_bytes[0], eth_h->s_addr.addr_bytes[1], \
582                 eth_h->s_addr.addr_bytes[2], eth_h->s_addr.addr_bytes[3], \
583                 eth_h->s_addr.addr_bytes[4], eth_h->s_addr.addr_bytes[5], \
584                 src_ip,                                                 \
585                 eth_h->d_addr.addr_bytes[0], eth_h->d_addr.addr_bytes[1], \
586                 eth_h->d_addr.addr_bytes[2], eth_h->d_addr.addr_bytes[3], \
587                 eth_h->d_addr.addr_bytes[4], eth_h->d_addr.addr_bytes[5], \
588                 dst_ip,                                                 \
589                 arp_op, ++burstnumber)
590 #endif
591
592 static void
593 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
594                 uint16_t port, uint32_t __attribute__((unused)) *burstnumber)
595 {
596         struct ipv4_hdr *ipv4_h;
597 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
598         struct arp_hdr *arp_h;
599         char dst_ip[16];
600         char ArpOp[24];
601         char buf[16];
602 #endif
603         char src_ip[16];
604
605         uint16_t ether_type = eth_h->ether_type;
606         uint16_t offset = get_vlan_offset(eth_h, &ether_type);
607
608 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
609         strlcpy(buf, info, 16);
610 #endif
611
612         if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
613                 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
614                 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
615 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
616                 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
617                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
618 #endif
619                 update_client_stats(ipv4_h->src_addr, port, burstnumber);
620         }
621 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
622         else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
623                 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
624                 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
625                 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
626                 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
627                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
628         }
629 #endif
630 }
631 #endif
632
633 static uint16_t
634 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
635 {
636         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
637         struct bond_dev_private *internals = bd_tx_q->dev_private;
638         struct ether_hdr *eth_h;
639         uint16_t ether_type, offset;
640         uint16_t nb_recv_pkts;
641         int i;
642
643         nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
644
645         for (i = 0; i < nb_recv_pkts; i++) {
646                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
647                 ether_type = eth_h->ether_type;
648                 offset = get_vlan_offset(eth_h, &ether_type);
649
650                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
651 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
652                         mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
653 #endif
654                         bond_mode_alb_arp_recv(eth_h, offset, internals);
655                 }
656 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
657                 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
658                         mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
659 #endif
660         }
661
662         return nb_recv_pkts;
663 }
664
665 static uint16_t
666 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
667                 uint16_t nb_pkts)
668 {
669         struct bond_dev_private *internals;
670         struct bond_tx_queue *bd_tx_q;
671
672         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
673         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
674
675         uint16_t num_of_slaves;
676         uint16_t slaves[RTE_MAX_ETHPORTS];
677
678         uint16_t num_tx_total = 0, num_tx_slave;
679
680         static int slave_idx = 0;
681         int i, cslave_idx = 0, tx_fail_total = 0;
682
683         bd_tx_q = (struct bond_tx_queue *)queue;
684         internals = bd_tx_q->dev_private;
685
686         /* Copy slave list to protect against slave up/down changes during tx
687          * bursting */
688         num_of_slaves = internals->active_slave_count;
689         memcpy(slaves, internals->active_slaves,
690                         sizeof(internals->active_slaves[0]) * num_of_slaves);
691
692         if (num_of_slaves < 1)
693                 return num_tx_total;
694
695         /* Populate slaves mbuf with which packets are to be sent on it  */
696         for (i = 0; i < nb_pkts; i++) {
697                 cslave_idx = (slave_idx + i) % num_of_slaves;
698                 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
699         }
700
701         /* increment current slave index so the next call to tx burst starts on the
702          * next slave */
703         slave_idx = ++cslave_idx;
704
705         /* Send packet burst on each slave device */
706         for (i = 0; i < num_of_slaves; i++) {
707                 if (slave_nb_pkts[i] > 0) {
708                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
709                                         slave_bufs[i], slave_nb_pkts[i]);
710
711                         /* if tx burst fails move packets to end of bufs */
712                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
713                                 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
714
715                                 tx_fail_total += tx_fail_slave;
716
717                                 memcpy(&bufs[nb_pkts - tx_fail_total],
718                                                 &slave_bufs[i][num_tx_slave],
719                                                 tx_fail_slave * sizeof(bufs[0]));
720                         }
721                         num_tx_total += num_tx_slave;
722                 }
723         }
724
725         return num_tx_total;
726 }
727
728 static uint16_t
729 bond_ethdev_tx_burst_active_backup(void *queue,
730                 struct rte_mbuf **bufs, uint16_t nb_pkts)
731 {
732         struct bond_dev_private *internals;
733         struct bond_tx_queue *bd_tx_q;
734
735         bd_tx_q = (struct bond_tx_queue *)queue;
736         internals = bd_tx_q->dev_private;
737
738         if (internals->active_slave_count < 1)
739                 return 0;
740
741         return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
742                         bufs, nb_pkts);
743 }
744
745 static inline uint16_t
746 ether_hash(struct ether_hdr *eth_hdr)
747 {
748         unaligned_uint16_t *word_src_addr =
749                 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
750         unaligned_uint16_t *word_dst_addr =
751                 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
752
753         return (word_src_addr[0] ^ word_dst_addr[0]) ^
754                         (word_src_addr[1] ^ word_dst_addr[1]) ^
755                         (word_src_addr[2] ^ word_dst_addr[2]);
756 }
757
758 static inline uint32_t
759 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
760 {
761         return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
762 }
763
764 static inline uint32_t
765 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
766 {
767         unaligned_uint32_t *word_src_addr =
768                 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
769         unaligned_uint32_t *word_dst_addr =
770                 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
771
772         return (word_src_addr[0] ^ word_dst_addr[0]) ^
773                         (word_src_addr[1] ^ word_dst_addr[1]) ^
774                         (word_src_addr[2] ^ word_dst_addr[2]) ^
775                         (word_src_addr[3] ^ word_dst_addr[3]);
776 }
777
778
779 void
780 burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
781                 uint8_t slave_count, uint16_t *slaves)
782 {
783         struct ether_hdr *eth_hdr;
784         uint32_t hash;
785         int i;
786
787         for (i = 0; i < nb_pkts; i++) {
788                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
789
790                 hash = ether_hash(eth_hdr);
791
792                 slaves[i] = (hash ^= hash >> 8) % slave_count;
793         }
794 }
795
796 void
797 burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
798                 uint8_t slave_count, uint16_t *slaves)
799 {
800         uint16_t i;
801         struct ether_hdr *eth_hdr;
802         uint16_t proto;
803         size_t vlan_offset;
804         uint32_t hash, l3hash;
805
806         for (i = 0; i < nb_pkts; i++) {
807                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
808                 l3hash = 0;
809
810                 proto = eth_hdr->ether_type;
811                 hash = ether_hash(eth_hdr);
812
813                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
814
815                 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
816                         struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
817                                         ((char *)(eth_hdr + 1) + vlan_offset);
818                         l3hash = ipv4_hash(ipv4_hdr);
819
820                 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
821                         struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
822                                         ((char *)(eth_hdr + 1) + vlan_offset);
823                         l3hash = ipv6_hash(ipv6_hdr);
824                 }
825
826                 hash = hash ^ l3hash;
827                 hash ^= hash >> 16;
828                 hash ^= hash >> 8;
829
830                 slaves[i] = hash % slave_count;
831         }
832 }
833
834 void
835 burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
836                 uint8_t slave_count, uint16_t *slaves)
837 {
838         struct ether_hdr *eth_hdr;
839         uint16_t proto;
840         size_t vlan_offset;
841         int i;
842
843         struct udp_hdr *udp_hdr;
844         struct tcp_hdr *tcp_hdr;
845         uint32_t hash, l3hash, l4hash;
846
847         for (i = 0; i < nb_pkts; i++) {
848                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
849                 proto = eth_hdr->ether_type;
850                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
851                 l3hash = 0;
852                 l4hash = 0;
853
854                 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
855                         struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
856                                         ((char *)(eth_hdr + 1) + vlan_offset);
857                         size_t ip_hdr_offset;
858
859                         l3hash = ipv4_hash(ipv4_hdr);
860
861                         /* there is no L4 header in fragmented packet */
862                         if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)
863                                                                 == 0)) {
864                                 ip_hdr_offset = (ipv4_hdr->version_ihl
865                                         & IPV4_HDR_IHL_MASK) *
866                                         IPV4_IHL_MULTIPLIER;
867
868                                 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
869                                         tcp_hdr = (struct tcp_hdr *)
870                                                 ((char *)ipv4_hdr +
871                                                         ip_hdr_offset);
872                                         l4hash = HASH_L4_PORTS(tcp_hdr);
873                                 } else if (ipv4_hdr->next_proto_id ==
874                                                                 IPPROTO_UDP) {
875                                         udp_hdr = (struct udp_hdr *)
876                                                 ((char *)ipv4_hdr +
877                                                         ip_hdr_offset);
878                                         l4hash = HASH_L4_PORTS(udp_hdr);
879                                 }
880                         }
881                 } else if  (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
882                         struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
883                                         ((char *)(eth_hdr + 1) + vlan_offset);
884                         l3hash = ipv6_hash(ipv6_hdr);
885
886                         if (ipv6_hdr->proto == IPPROTO_TCP) {
887                                 tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
888                                 l4hash = HASH_L4_PORTS(tcp_hdr);
889                         } else if (ipv6_hdr->proto == IPPROTO_UDP) {
890                                 udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
891                                 l4hash = HASH_L4_PORTS(udp_hdr);
892                         }
893                 }
894
895                 hash = l3hash ^ l4hash;
896                 hash ^= hash >> 16;
897                 hash ^= hash >> 8;
898
899                 slaves[i] = hash % slave_count;
900         }
901 }
902
903 struct bwg_slave {
904         uint64_t bwg_left_int;
905         uint64_t bwg_left_remainder;
906         uint8_t slave;
907 };
908
909 void
910 bond_tlb_activate_slave(struct bond_dev_private *internals) {
911         int i;
912
913         for (i = 0; i < internals->active_slave_count; i++) {
914                 tlb_last_obytets[internals->active_slaves[i]] = 0;
915         }
916 }
917
918 static int
919 bandwidth_cmp(const void *a, const void *b)
920 {
921         const struct bwg_slave *bwg_a = a;
922         const struct bwg_slave *bwg_b = b;
923         int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
924         int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
925                         (int64_t)bwg_a->bwg_left_remainder;
926         if (diff > 0)
927                 return 1;
928         else if (diff < 0)
929                 return -1;
930         else if (diff2 > 0)
931                 return 1;
932         else if (diff2 < 0)
933                 return -1;
934         else
935                 return 0;
936 }
937
938 static void
939 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
940                 struct bwg_slave *bwg_slave)
941 {
942         struct rte_eth_link link_status;
943
944         rte_eth_link_get_nowait(port_id, &link_status);
945         uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
946         if (link_bwg == 0)
947                 return;
948         link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
949         bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
950         bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
951 }
952
953 static void
954 bond_ethdev_update_tlb_slave_cb(void *arg)
955 {
956         struct bond_dev_private *internals = arg;
957         struct rte_eth_stats slave_stats;
958         struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
959         uint8_t slave_count;
960         uint64_t tx_bytes;
961
962         uint8_t update_stats = 0;
963         uint8_t i, slave_id;
964
965         internals->slave_update_idx++;
966
967
968         if (internals->slave_update_idx >= REORDER_PERIOD_MS)
969                 update_stats = 1;
970
971         for (i = 0; i < internals->active_slave_count; i++) {
972                 slave_id = internals->active_slaves[i];
973                 rte_eth_stats_get(slave_id, &slave_stats);
974                 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
975                 bandwidth_left(slave_id, tx_bytes,
976                                 internals->slave_update_idx, &bwg_array[i]);
977                 bwg_array[i].slave = slave_id;
978
979                 if (update_stats) {
980                         tlb_last_obytets[slave_id] = slave_stats.obytes;
981                 }
982         }
983
984         if (update_stats == 1)
985                 internals->slave_update_idx = 0;
986
987         slave_count = i;
988         qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
989         for (i = 0; i < slave_count; i++)
990                 internals->tlb_slaves_order[i] = bwg_array[i].slave;
991
992         rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
993                         (struct bond_dev_private *)internals);
994 }
995
996 static uint16_t
997 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
998 {
999         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1000         struct bond_dev_private *internals = bd_tx_q->dev_private;
1001
1002         struct rte_eth_dev *primary_port =
1003                         &rte_eth_devices[internals->primary_port];
1004         uint16_t num_tx_total = 0;
1005         uint16_t i, j;
1006
1007         uint16_t num_of_slaves = internals->active_slave_count;
1008         uint16_t slaves[RTE_MAX_ETHPORTS];
1009
1010         struct ether_hdr *ether_hdr;
1011         struct ether_addr primary_slave_addr;
1012         struct ether_addr active_slave_addr;
1013
1014         if (num_of_slaves < 1)
1015                 return num_tx_total;
1016
1017         memcpy(slaves, internals->tlb_slaves_order,
1018                                 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
1019
1020
1021         ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
1022
1023         if (nb_pkts > 3) {
1024                 for (i = 0; i < 3; i++)
1025                         rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
1026         }
1027
1028         for (i = 0; i < num_of_slaves; i++) {
1029                 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
1030                 for (j = num_tx_total; j < nb_pkts; j++) {
1031                         if (j + 3 < nb_pkts)
1032                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
1033
1034                         ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
1035                         if (is_same_ether_addr(&ether_hdr->s_addr, &primary_slave_addr))
1036                                 ether_addr_copy(&active_slave_addr, &ether_hdr->s_addr);
1037 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1038                                         mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
1039 #endif
1040                 }
1041
1042                 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1043                                 bufs + num_tx_total, nb_pkts - num_tx_total);
1044
1045                 if (num_tx_total == nb_pkts)
1046                         break;
1047         }
1048
1049         return num_tx_total;
1050 }
1051
1052 void
1053 bond_tlb_disable(struct bond_dev_private *internals)
1054 {
1055         rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
1056 }
1057
1058 void
1059 bond_tlb_enable(struct bond_dev_private *internals)
1060 {
1061         bond_ethdev_update_tlb_slave_cb(internals);
1062 }
1063
1064 static uint16_t
1065 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1066 {
1067         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1068         struct bond_dev_private *internals = bd_tx_q->dev_private;
1069
1070         struct ether_hdr *eth_h;
1071         uint16_t ether_type, offset;
1072
1073         struct client_data *client_info;
1074
1075         /*
1076          * We create transmit buffers for every slave and one additional to send
1077          * through tlb. In worst case every packet will be send on one port.
1078          */
1079         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
1080         uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
1081
1082         /*
1083          * We create separate transmit buffers for update packets as they won't
1084          * be counted in num_tx_total.
1085          */
1086         struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1087         uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1088
1089         struct rte_mbuf *upd_pkt;
1090         size_t pkt_size;
1091
1092         uint16_t num_send, num_not_send = 0;
1093         uint16_t num_tx_total = 0;
1094         uint16_t slave_idx;
1095
1096         int i, j;
1097
1098         /* Search tx buffer for ARP packets and forward them to alb */
1099         for (i = 0; i < nb_pkts; i++) {
1100                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
1101                 ether_type = eth_h->ether_type;
1102                 offset = get_vlan_offset(eth_h, &ether_type);
1103
1104                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
1105                         slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1106
1107                         /* Change src mac in eth header */
1108                         rte_eth_macaddr_get(slave_idx, &eth_h->s_addr);
1109
1110                         /* Add packet to slave tx buffer */
1111                         slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1112                         slave_bufs_pkts[slave_idx]++;
1113                 } else {
1114                         /* If packet is not ARP, send it with TLB policy */
1115                         slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1116                                         bufs[i];
1117                         slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1118                 }
1119         }
1120
1121         /* Update connected client ARP tables */
1122         if (internals->mode6.ntt) {
1123                 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1124                         client_info = &internals->mode6.client_table[i];
1125
1126                         if (client_info->in_use) {
1127                                 /* Allocate new packet to send ARP update on current slave */
1128                                 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1129                                 if (upd_pkt == NULL) {
1130                                         RTE_BOND_LOG(ERR,
1131                                                      "Failed to allocate ARP packet from pool");
1132                                         continue;
1133                                 }
1134                                 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
1135                                                 + client_info->vlan_count * sizeof(struct vlan_hdr);
1136                                 upd_pkt->data_len = pkt_size;
1137                                 upd_pkt->pkt_len = pkt_size;
1138
1139                                 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1140                                                 internals);
1141
1142                                 /* Add packet to update tx buffer */
1143                                 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1144                                 update_bufs_pkts[slave_idx]++;
1145                         }
1146                 }
1147                 internals->mode6.ntt = 0;
1148         }
1149
1150         /* Send ARP packets on proper slaves */
1151         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1152                 if (slave_bufs_pkts[i] > 0) {
1153                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1154                                         slave_bufs[i], slave_bufs_pkts[i]);
1155                         for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1156                                 bufs[nb_pkts - 1 - num_not_send - j] =
1157                                                 slave_bufs[i][nb_pkts - 1 - j];
1158                         }
1159
1160                         num_tx_total += num_send;
1161                         num_not_send += slave_bufs_pkts[i] - num_send;
1162
1163 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1164         /* Print TX stats including update packets */
1165                         for (j = 0; j < slave_bufs_pkts[i]; j++) {
1166                                 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
1167                                 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1168                         }
1169 #endif
1170                 }
1171         }
1172
1173         /* Send update packets on proper slaves */
1174         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1175                 if (update_bufs_pkts[i] > 0) {
1176                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1177                                         update_bufs_pkts[i]);
1178                         for (j = num_send; j < update_bufs_pkts[i]; j++) {
1179                                 rte_pktmbuf_free(update_bufs[i][j]);
1180                         }
1181 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1182                         for (j = 0; j < update_bufs_pkts[i]; j++) {
1183                                 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
1184                                 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1185                         }
1186 #endif
1187                 }
1188         }
1189
1190         /* Send non-ARP packets using tlb policy */
1191         if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1192                 num_send = bond_ethdev_tx_burst_tlb(queue,
1193                                 slave_bufs[RTE_MAX_ETHPORTS],
1194                                 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1195
1196                 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1197                         bufs[nb_pkts - 1 - num_not_send - j] =
1198                                         slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1199                 }
1200
1201                 num_tx_total += num_send;
1202         }
1203
1204         return num_tx_total;
1205 }
1206
1207 static uint16_t
1208 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1209                 uint16_t nb_bufs)
1210 {
1211         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1212         struct bond_dev_private *internals = bd_tx_q->dev_private;
1213
1214         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1215         uint16_t slave_count;
1216
1217         /* Array to sort mbufs for transmission on each slave into */
1218         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1219         /* Number of mbufs for transmission on each slave */
1220         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1221         /* Mapping array generated by hash function to map mbufs to slaves */
1222         uint16_t bufs_slave_port_idxs[nb_bufs];
1223
1224         uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
1225         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1226
1227         uint16_t i, j;
1228
1229         if (unlikely(nb_bufs == 0))
1230                 return 0;
1231
1232         /* Copy slave list to protect against slave up/down changes during tx
1233          * bursting */
1234         slave_count = internals->active_slave_count;
1235         if (unlikely(slave_count < 1))
1236                 return 0;
1237
1238         memcpy(slave_port_ids, internals->active_slaves,
1239                         sizeof(slave_port_ids[0]) * slave_count);
1240
1241         /*
1242          * Populate slaves mbuf with the packets which are to be sent on it
1243          * selecting output slave using hash based on xmit policy
1244          */
1245         internals->burst_xmit_hash(bufs, nb_bufs, slave_count,
1246                         bufs_slave_port_idxs);
1247
1248         for (i = 0; i < nb_bufs; i++) {
1249                 /* Populate slave mbuf arrays with mbufs for that slave. */
1250                 uint8_t slave_idx = bufs_slave_port_idxs[i];
1251
1252                 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
1253         }
1254
1255         /* Send packet burst on each slave device */
1256         for (i = 0; i < slave_count; i++) {
1257                 if (slave_nb_bufs[i] == 0)
1258                         continue;
1259
1260                 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1261                                 bd_tx_q->queue_id, slave_bufs[i],
1262                                 slave_nb_bufs[i]);
1263
1264                 total_tx_count += slave_tx_count;
1265
1266                 /* If tx burst fails move packets to end of bufs */
1267                 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1268                         slave_tx_fail_count[i] = slave_nb_bufs[i] -
1269                                         slave_tx_count;
1270                         total_tx_fail_count += slave_tx_fail_count[i];
1271
1272                         /*
1273                          * Shift bufs to beginning of array to allow reordering
1274                          * later
1275                          */
1276                         for (j = 0; j < slave_tx_fail_count[i]; j++) {
1277                                 slave_bufs[i][j] =
1278                                         slave_bufs[i][(slave_tx_count - 1) + j];
1279                         }
1280                 }
1281         }
1282
1283         /*
1284          * If there are tx burst failures we move packets to end of bufs to
1285          * preserve expected PMD behaviour of all failed transmitted being
1286          * at the end of the input mbuf array
1287          */
1288         if (unlikely(total_tx_fail_count > 0)) {
1289                 int bufs_idx = nb_bufs - total_tx_fail_count - 1;
1290
1291                 for (i = 0; i < slave_count; i++) {
1292                         if (slave_tx_fail_count[i] > 0) {
1293                                 for (j = 0; j < slave_tx_fail_count[i]; j++)
1294                                         bufs[bufs_idx++] = slave_bufs[i][j];
1295                         }
1296                 }
1297         }
1298
1299         return total_tx_count;
1300 }
1301
1302 static uint16_t
1303 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1304                 uint16_t nb_bufs)
1305 {
1306         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1307         struct bond_dev_private *internals = bd_tx_q->dev_private;
1308
1309         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1310         uint16_t slave_count;
1311
1312         uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
1313         uint16_t dist_slave_count;
1314
1315         /* 2-D array to sort mbufs for transmission on each slave into */
1316         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1317         /* Number of mbufs for transmission on each slave */
1318         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1319         /* Mapping array generated by hash function to map mbufs to slaves */
1320         uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
1321
1322         uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
1323         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1324
1325         uint16_t i, j;
1326
1327         if (unlikely(nb_bufs == 0))
1328                 return 0;
1329
1330         /* Copy slave list to protect against slave up/down changes during tx
1331          * bursting */
1332         slave_count = internals->active_slave_count;
1333         if (unlikely(slave_count < 1))
1334                 return 0;
1335
1336         memcpy(slave_port_ids, internals->active_slaves,
1337                         sizeof(slave_port_ids[0]) * slave_count);
1338
1339         dist_slave_count = 0;
1340         for (i = 0; i < slave_count; i++) {
1341                 struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
1342
1343                 if (ACTOR_STATE(port, DISTRIBUTING))
1344                         dist_slave_port_ids[dist_slave_count++] =
1345                                         slave_port_ids[i];
1346         }
1347
1348         if (likely(dist_slave_count > 1)) {
1349
1350                 /*
1351                  * Populate slaves mbuf with the packets which are to be sent
1352                  * on it, selecting output slave using hash based on xmit policy
1353                  */
1354                 internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
1355                                 bufs_slave_port_idxs);
1356
1357                 for (i = 0; i < nb_bufs; i++) {
1358                         /*
1359                          * Populate slave mbuf arrays with mbufs for that
1360                          * slave
1361                          */
1362                         uint8_t slave_idx = bufs_slave_port_idxs[i];
1363
1364                         slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] =
1365                                         bufs[i];
1366                 }
1367
1368
1369                 /* Send packet burst on each slave device */
1370                 for (i = 0; i < dist_slave_count; i++) {
1371                         if (slave_nb_bufs[i] == 0)
1372                                 continue;
1373
1374                         slave_tx_count = rte_eth_tx_burst(
1375                                         dist_slave_port_ids[i],
1376                                         bd_tx_q->queue_id, slave_bufs[i],
1377                                         slave_nb_bufs[i]);
1378
1379                         total_tx_count += slave_tx_count;
1380
1381                         /* If tx burst fails move packets to end of bufs */
1382                         if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1383                                 slave_tx_fail_count[i] = slave_nb_bufs[i] -
1384                                                 slave_tx_count;
1385                                 total_tx_fail_count += slave_tx_fail_count[i];
1386
1387                                 /*
1388                                  * Shift bufs to beginning of array to allow
1389                                  * reordering later
1390                                  */
1391                                 for (j = 0; j < slave_tx_fail_count[i]; j++)
1392                                         slave_bufs[i][j] =
1393                                                 slave_bufs[i]
1394                                                         [(slave_tx_count - 1)
1395                                                         + j];
1396                         }
1397                 }
1398
1399                 /*
1400                  * If there are tx burst failures we move packets to end of
1401                  * bufs to preserve expected PMD behaviour of all failed
1402                  * transmitted being at the end of the input mbuf array
1403                  */
1404                 if (unlikely(total_tx_fail_count > 0)) {
1405                         int bufs_idx = nb_bufs - total_tx_fail_count - 1;
1406
1407                         for (i = 0; i < slave_count; i++) {
1408                                 if (slave_tx_fail_count[i] > 0) {
1409                                         for (j = 0;
1410                                                 j < slave_tx_fail_count[i];
1411                                                 j++) {
1412                                                 bufs[bufs_idx++] =
1413                                                         slave_bufs[i][j];
1414                                         }
1415                                 }
1416                         }
1417                 }
1418         }
1419
1420         /* Check for LACP control packets and send if available */
1421         for (i = 0; i < slave_count; i++) {
1422                 struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
1423                 struct rte_mbuf *ctrl_pkt = NULL;
1424
1425                 if (likely(rte_ring_empty(port->tx_ring)))
1426                         continue;
1427
1428                 if (rte_ring_dequeue(port->tx_ring,
1429                                      (void **)&ctrl_pkt) != -ENOENT) {
1430                         slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1431                                         bd_tx_q->queue_id, &ctrl_pkt, 1);
1432                         /*
1433                          * re-enqueue LAG control plane packets to buffering
1434                          * ring if transmission fails so the packet isn't lost.
1435                          */
1436                         if (slave_tx_count != 1)
1437                                 rte_ring_enqueue(port->tx_ring, ctrl_pkt);
1438                 }
1439         }
1440
1441         return total_tx_count;
1442 }
1443
1444 static uint16_t
1445 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1446                 uint16_t nb_pkts)
1447 {
1448         struct bond_dev_private *internals;
1449         struct bond_tx_queue *bd_tx_q;
1450
1451         uint8_t tx_failed_flag = 0, num_of_slaves;
1452         uint16_t slaves[RTE_MAX_ETHPORTS];
1453
1454         uint16_t max_nb_of_tx_pkts = 0;
1455
1456         int slave_tx_total[RTE_MAX_ETHPORTS];
1457         int i, most_successful_tx_slave = -1;
1458
1459         bd_tx_q = (struct bond_tx_queue *)queue;
1460         internals = bd_tx_q->dev_private;
1461
1462         /* Copy slave list to protect against slave up/down changes during tx
1463          * bursting */
1464         num_of_slaves = internals->active_slave_count;
1465         memcpy(slaves, internals->active_slaves,
1466                         sizeof(internals->active_slaves[0]) * num_of_slaves);
1467
1468         if (num_of_slaves < 1)
1469                 return 0;
1470
1471         /* Increment reference count on mbufs */
1472         for (i = 0; i < nb_pkts; i++)
1473                 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1474
1475         /* Transmit burst on each active slave */
1476         for (i = 0; i < num_of_slaves; i++) {
1477                 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1478                                         bufs, nb_pkts);
1479
1480                 if (unlikely(slave_tx_total[i] < nb_pkts))
1481                         tx_failed_flag = 1;
1482
1483                 /* record the value and slave index for the slave which transmits the
1484                  * maximum number of packets */
1485                 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1486                         max_nb_of_tx_pkts = slave_tx_total[i];
1487                         most_successful_tx_slave = i;
1488                 }
1489         }
1490
1491         /* if slaves fail to transmit packets from burst, the calling application
1492          * is not expected to know about multiple references to packets so we must
1493          * handle failures of all packets except those of the most successful slave
1494          */
1495         if (unlikely(tx_failed_flag))
1496                 for (i = 0; i < num_of_slaves; i++)
1497                         if (i != most_successful_tx_slave)
1498                                 while (slave_tx_total[i] < nb_pkts)
1499                                         rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1500
1501         return max_nb_of_tx_pkts;
1502 }
1503
1504 void
1505 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1506 {
1507         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1508
1509         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1510                 /**
1511                  * If in mode 4 then save the link properties of the first
1512                  * slave, all subsequent slaves must match these properties
1513                  */
1514                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1515
1516                 bond_link->link_autoneg = slave_link->link_autoneg;
1517                 bond_link->link_duplex = slave_link->link_duplex;
1518                 bond_link->link_speed = slave_link->link_speed;
1519         } else {
1520                 /**
1521                  * In any other mode the link properties are set to default
1522                  * values of AUTONEG/DUPLEX
1523                  */
1524                 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1525                 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1526         }
1527 }
1528
1529 int
1530 link_properties_valid(struct rte_eth_dev *ethdev,
1531                 struct rte_eth_link *slave_link)
1532 {
1533         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1534
1535         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1536                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1537
1538                 if (bond_link->link_duplex != slave_link->link_duplex ||
1539                         bond_link->link_autoneg != slave_link->link_autoneg ||
1540                         bond_link->link_speed != slave_link->link_speed)
1541                         return -1;
1542         }
1543
1544         return 0;
1545 }
1546
1547 int
1548 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1549 {
1550         struct ether_addr *mac_addr;
1551
1552         if (eth_dev == NULL) {
1553                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1554                 return -1;
1555         }
1556
1557         if (dst_mac_addr == NULL) {
1558                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1559                 return -1;
1560         }
1561
1562         mac_addr = eth_dev->data->mac_addrs;
1563
1564         ether_addr_copy(mac_addr, dst_mac_addr);
1565         return 0;
1566 }
1567
1568 int
1569 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1570 {
1571         struct ether_addr *mac_addr;
1572
1573         if (eth_dev == NULL) {
1574                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1575                 return -1;
1576         }
1577
1578         if (new_mac_addr == NULL) {
1579                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1580                 return -1;
1581         }
1582
1583         mac_addr = eth_dev->data->mac_addrs;
1584
1585         /* If new MAC is different to current MAC then update */
1586         if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1587                 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1588
1589         return 0;
1590 }
1591
1592 static const struct ether_addr null_mac_addr;
1593
1594 /*
1595  * Add additional MAC addresses to the slave
1596  */
1597 int
1598 slave_add_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1599                 uint16_t slave_port_id)
1600 {
1601         int i, ret;
1602         struct ether_addr *mac_addr;
1603
1604         for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1605                 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1606                 if (is_same_ether_addr(mac_addr, &null_mac_addr))
1607                         break;
1608
1609                 ret = rte_eth_dev_mac_addr_add(slave_port_id, mac_addr, 0);
1610                 if (ret < 0) {
1611                         /* rollback */
1612                         for (i--; i > 0; i--)
1613                                 rte_eth_dev_mac_addr_remove(slave_port_id,
1614                                         &bonded_eth_dev->data->mac_addrs[i]);
1615                         return ret;
1616                 }
1617         }
1618
1619         return 0;
1620 }
1621
1622 /*
1623  * Remove additional MAC addresses from the slave
1624  */
1625 int
1626 slave_remove_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1627                 uint16_t slave_port_id)
1628 {
1629         int i, rc, ret;
1630         struct ether_addr *mac_addr;
1631
1632         rc = 0;
1633         for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1634                 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1635                 if (is_same_ether_addr(mac_addr, &null_mac_addr))
1636                         break;
1637
1638                 ret = rte_eth_dev_mac_addr_remove(slave_port_id, mac_addr);
1639                 /* save only the first error */
1640                 if (ret < 0 && rc == 0)
1641                         rc = ret;
1642         }
1643
1644         return rc;
1645 }
1646
1647 int
1648 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1649 {
1650         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1651         int i;
1652
1653         /* Update slave devices MAC addresses */
1654         if (internals->slave_count < 1)
1655                 return -1;
1656
1657         switch (internals->mode) {
1658         case BONDING_MODE_ROUND_ROBIN:
1659         case BONDING_MODE_BALANCE:
1660         case BONDING_MODE_BROADCAST:
1661                 for (i = 0; i < internals->slave_count; i++) {
1662                         if (rte_eth_dev_default_mac_addr_set(
1663                                         internals->slaves[i].port_id,
1664                                         bonded_eth_dev->data->mac_addrs)) {
1665                                 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1666                                                 internals->slaves[i].port_id);
1667                                 return -1;
1668                         }
1669                 }
1670                 break;
1671         case BONDING_MODE_8023AD:
1672                 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1673                 break;
1674         case BONDING_MODE_ACTIVE_BACKUP:
1675         case BONDING_MODE_TLB:
1676         case BONDING_MODE_ALB:
1677         default:
1678                 for (i = 0; i < internals->slave_count; i++) {
1679                         if (internals->slaves[i].port_id ==
1680                                         internals->current_primary_port) {
1681                                 if (rte_eth_dev_default_mac_addr_set(
1682                                                 internals->primary_port,
1683                                                 bonded_eth_dev->data->mac_addrs)) {
1684                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1685                                                         internals->current_primary_port);
1686                                         return -1;
1687                                 }
1688                         } else {
1689                                 if (rte_eth_dev_default_mac_addr_set(
1690                                                 internals->slaves[i].port_id,
1691                                                 &internals->slaves[i].persisted_mac_addr)) {
1692                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1693                                                         internals->slaves[i].port_id);
1694                                         return -1;
1695                                 }
1696                         }
1697                 }
1698         }
1699
1700         return 0;
1701 }
1702
1703 int
1704 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1705 {
1706         struct bond_dev_private *internals;
1707
1708         internals = eth_dev->data->dev_private;
1709
1710         switch (mode) {
1711         case BONDING_MODE_ROUND_ROBIN:
1712                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1713                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1714                 break;
1715         case BONDING_MODE_ACTIVE_BACKUP:
1716                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1717                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1718                 break;
1719         case BONDING_MODE_BALANCE:
1720                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1721                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1722                 break;
1723         case BONDING_MODE_BROADCAST:
1724                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1725                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1726                 break;
1727         case BONDING_MODE_8023AD:
1728                 if (bond_mode_8023ad_enable(eth_dev) != 0)
1729                         return -1;
1730
1731                 if (internals->mode4.dedicated_queues.enabled == 0) {
1732                         eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1733                         eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1734                         RTE_BOND_LOG(WARNING,
1735                                 "Using mode 4, it is necessary to do TX burst "
1736                                 "and RX burst at least every 100ms.");
1737                 } else {
1738                         /* Use flow director's optimization */
1739                         eth_dev->rx_pkt_burst =
1740                                         bond_ethdev_rx_burst_8023ad_fast_queue;
1741                         eth_dev->tx_pkt_burst =
1742                                         bond_ethdev_tx_burst_8023ad_fast_queue;
1743                 }
1744                 break;
1745         case BONDING_MODE_TLB:
1746                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1747                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1748                 break;
1749         case BONDING_MODE_ALB:
1750                 if (bond_mode_alb_enable(eth_dev) != 0)
1751                         return -1;
1752
1753                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1754                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1755                 break;
1756         default:
1757                 return -1;
1758         }
1759
1760         internals->mode = mode;
1761
1762         return 0;
1763 }
1764
1765
1766 static int
1767 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1768                 struct rte_eth_dev *slave_eth_dev)
1769 {
1770         int errval = 0;
1771         struct bond_dev_private *internals = (struct bond_dev_private *)
1772                 bonded_eth_dev->data->dev_private;
1773         struct port *port = &mode_8023ad_ports[slave_eth_dev->data->port_id];
1774
1775         if (port->slow_pool == NULL) {
1776                 char mem_name[256];
1777                 int slave_id = slave_eth_dev->data->port_id;
1778
1779                 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1780                                 slave_id);
1781                 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1782                         250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1783                         slave_eth_dev->data->numa_node);
1784
1785                 /* Any memory allocation failure in initialization is critical because
1786                  * resources can't be free, so reinitialization is impossible. */
1787                 if (port->slow_pool == NULL) {
1788                         rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1789                                 slave_id, mem_name, rte_strerror(rte_errno));
1790                 }
1791         }
1792
1793         if (internals->mode4.dedicated_queues.enabled == 1) {
1794                 /* Configure slow Rx queue */
1795
1796                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1797                                 internals->mode4.dedicated_queues.rx_qid, 128,
1798                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1799                                 NULL, port->slow_pool);
1800                 if (errval != 0) {
1801                         RTE_BOND_LOG(ERR,
1802                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1803                                         slave_eth_dev->data->port_id,
1804                                         internals->mode4.dedicated_queues.rx_qid,
1805                                         errval);
1806                         return errval;
1807                 }
1808
1809                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1810                                 internals->mode4.dedicated_queues.tx_qid, 512,
1811                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1812                                 NULL);
1813                 if (errval != 0) {
1814                         RTE_BOND_LOG(ERR,
1815                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1816                                 slave_eth_dev->data->port_id,
1817                                 internals->mode4.dedicated_queues.tx_qid,
1818                                 errval);
1819                         return errval;
1820                 }
1821         }
1822         return 0;
1823 }
1824
1825 int
1826 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1827                 struct rte_eth_dev *slave_eth_dev)
1828 {
1829         struct bond_rx_queue *bd_rx_q;
1830         struct bond_tx_queue *bd_tx_q;
1831         uint16_t nb_rx_queues;
1832         uint16_t nb_tx_queues;
1833
1834         int errval;
1835         uint16_t q_id;
1836         struct rte_flow_error flow_error;
1837
1838         struct bond_dev_private *internals = (struct bond_dev_private *)
1839                 bonded_eth_dev->data->dev_private;
1840
1841         /* Stop slave */
1842         rte_eth_dev_stop(slave_eth_dev->data->port_id);
1843
1844         /* Enable interrupts on slave device if supported */
1845         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1846                 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1847
1848         /* If RSS is enabled for bonding, try to enable it for slaves  */
1849         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1850                 if (bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len
1851                                 != 0) {
1852                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1853                                         bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
1854                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1855                                         bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
1856                 } else {
1857                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1858                 }
1859
1860                 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1861                                 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1862                 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1863                                 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1864         }
1865
1866         if (bonded_eth_dev->data->dev_conf.rxmode.offloads &
1867                         DEV_RX_OFFLOAD_VLAN_FILTER)
1868                 slave_eth_dev->data->dev_conf.rxmode.offloads |=
1869                                 DEV_RX_OFFLOAD_VLAN_FILTER;
1870         else
1871                 slave_eth_dev->data->dev_conf.rxmode.offloads &=
1872                                 ~DEV_RX_OFFLOAD_VLAN_FILTER;
1873
1874         nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1875         nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1876
1877         if (internals->mode == BONDING_MODE_8023AD) {
1878                 if (internals->mode4.dedicated_queues.enabled == 1) {
1879                         nb_rx_queues++;
1880                         nb_tx_queues++;
1881                 }
1882         }
1883
1884         errval = rte_eth_dev_set_mtu(slave_eth_dev->data->port_id,
1885                                      bonded_eth_dev->data->mtu);
1886         if (errval != 0 && errval != -ENOTSUP) {
1887                 RTE_BOND_LOG(ERR, "rte_eth_dev_set_mtu: port %u, err (%d)",
1888                                 slave_eth_dev->data->port_id, errval);
1889                 return errval;
1890         }
1891
1892         /* Configure device */
1893         errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1894                         nb_rx_queues, nb_tx_queues,
1895                         &(slave_eth_dev->data->dev_conf));
1896         if (errval != 0) {
1897                 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u, err (%d)",
1898                                 slave_eth_dev->data->port_id, errval);
1899                 return errval;
1900         }
1901
1902         /* Setup Rx Queues */
1903         for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1904                 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1905
1906                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1907                                 bd_rx_q->nb_rx_desc,
1908                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1909                                 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1910                 if (errval != 0) {
1911                         RTE_BOND_LOG(ERR,
1912                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1913                                         slave_eth_dev->data->port_id, q_id, errval);
1914                         return errval;
1915                 }
1916         }
1917
1918         /* Setup Tx Queues */
1919         for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1920                 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1921
1922                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1923                                 bd_tx_q->nb_tx_desc,
1924                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1925                                 &bd_tx_q->tx_conf);
1926                 if (errval != 0) {
1927                         RTE_BOND_LOG(ERR,
1928                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1929                                 slave_eth_dev->data->port_id, q_id, errval);
1930                         return errval;
1931                 }
1932         }
1933
1934         if (internals->mode == BONDING_MODE_8023AD &&
1935                         internals->mode4.dedicated_queues.enabled == 1) {
1936                 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1937                                 != 0)
1938                         return errval;
1939
1940                 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1941                                 slave_eth_dev->data->port_id) != 0) {
1942                         RTE_BOND_LOG(ERR,
1943                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1944                                 slave_eth_dev->data->port_id, q_id, errval);
1945                         return -1;
1946                 }
1947
1948                 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1949                         rte_flow_destroy(slave_eth_dev->data->port_id,
1950                                         internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1951                                         &flow_error);
1952
1953                 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1954                                 slave_eth_dev->data->port_id);
1955         }
1956
1957         /* Start device */
1958         errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1959         if (errval != 0) {
1960                 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1961                                 slave_eth_dev->data->port_id, errval);
1962                 return -1;
1963         }
1964
1965         /* If RSS is enabled for bonding, synchronize RETA */
1966         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1967                 int i;
1968                 struct bond_dev_private *internals;
1969
1970                 internals = bonded_eth_dev->data->dev_private;
1971
1972                 for (i = 0; i < internals->slave_count; i++) {
1973                         if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1974                                 errval = rte_eth_dev_rss_reta_update(
1975                                                 slave_eth_dev->data->port_id,
1976                                                 &internals->reta_conf[0],
1977                                                 internals->slaves[i].reta_size);
1978                                 if (errval != 0) {
1979                                         RTE_BOND_LOG(WARNING,
1980                                                      "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1981                                                      " RSS Configuration for bonding may be inconsistent.",
1982                                                      slave_eth_dev->data->port_id, errval);
1983                                 }
1984                                 break;
1985                         }
1986                 }
1987         }
1988
1989         /* If lsc interrupt is set, check initial slave's link status */
1990         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1991                 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1992                 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1993                         RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1994                         NULL);
1995         }
1996
1997         return 0;
1998 }
1999
2000 void
2001 slave_remove(struct bond_dev_private *internals,
2002                 struct rte_eth_dev *slave_eth_dev)
2003 {
2004         uint8_t i;
2005
2006         for (i = 0; i < internals->slave_count; i++)
2007                 if (internals->slaves[i].port_id ==
2008                                 slave_eth_dev->data->port_id)
2009                         break;
2010
2011         if (i < (internals->slave_count - 1)) {
2012                 struct rte_flow *flow;
2013
2014                 memmove(&internals->slaves[i], &internals->slaves[i + 1],
2015                                 sizeof(internals->slaves[0]) *
2016                                 (internals->slave_count - i - 1));
2017                 TAILQ_FOREACH(flow, &internals->flow_list, next) {
2018                         memmove(&flow->flows[i], &flow->flows[i + 1],
2019                                 sizeof(flow->flows[0]) *
2020                                 (internals->slave_count - i - 1));
2021                         flow->flows[internals->slave_count - 1] = NULL;
2022                 }
2023         }
2024
2025         internals->slave_count--;
2026
2027         /* force reconfiguration of slave interfaces */
2028         _rte_eth_dev_reset(slave_eth_dev);
2029 }
2030
2031 static void
2032 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
2033
2034 void
2035 slave_add(struct bond_dev_private *internals,
2036                 struct rte_eth_dev *slave_eth_dev)
2037 {
2038         struct bond_slave_details *slave_details =
2039                         &internals->slaves[internals->slave_count];
2040
2041         slave_details->port_id = slave_eth_dev->data->port_id;
2042         slave_details->last_link_status = 0;
2043
2044         /* Mark slave devices that don't support interrupts so we can
2045          * compensate when we start the bond
2046          */
2047         if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
2048                 slave_details->link_status_poll_enabled = 1;
2049         }
2050
2051         slave_details->link_status_wait_to_complete = 0;
2052         /* clean tlb_last_obytes when adding port for bonding device */
2053         memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
2054                         sizeof(struct ether_addr));
2055 }
2056
2057 void
2058 bond_ethdev_primary_set(struct bond_dev_private *internals,
2059                 uint16_t slave_port_id)
2060 {
2061         int i;
2062
2063         if (internals->active_slave_count < 1)
2064                 internals->current_primary_port = slave_port_id;
2065         else
2066                 /* Search bonded device slave ports for new proposed primary port */
2067                 for (i = 0; i < internals->active_slave_count; i++) {
2068                         if (internals->active_slaves[i] == slave_port_id)
2069                                 internals->current_primary_port = slave_port_id;
2070                 }
2071 }
2072
2073 static void
2074 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
2075
2076 static int
2077 bond_ethdev_start(struct rte_eth_dev *eth_dev)
2078 {
2079         struct bond_dev_private *internals;
2080         int i;
2081
2082         /* slave eth dev will be started by bonded device */
2083         if (check_for_bonded_ethdev(eth_dev)) {
2084                 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
2085                                 eth_dev->data->port_id);
2086                 return -1;
2087         }
2088
2089         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2090         eth_dev->data->dev_started = 1;
2091
2092         internals = eth_dev->data->dev_private;
2093
2094         if (internals->slave_count == 0) {
2095                 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
2096                 goto out_err;
2097         }
2098
2099         if (internals->user_defined_mac == 0) {
2100                 struct ether_addr *new_mac_addr = NULL;
2101
2102                 for (i = 0; i < internals->slave_count; i++)
2103                         if (internals->slaves[i].port_id == internals->primary_port)
2104                                 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
2105
2106                 if (new_mac_addr == NULL)
2107                         goto out_err;
2108
2109                 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
2110                         RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
2111                                         eth_dev->data->port_id);
2112                         goto out_err;
2113                 }
2114         }
2115
2116         /* If bonded device is configure in promiscuous mode then re-apply config */
2117         if (internals->promiscuous_en)
2118                 bond_ethdev_promiscuous_enable(eth_dev);
2119
2120         if (internals->mode == BONDING_MODE_8023AD) {
2121                 if (internals->mode4.dedicated_queues.enabled == 1) {
2122                         internals->mode4.dedicated_queues.rx_qid =
2123                                         eth_dev->data->nb_rx_queues;
2124                         internals->mode4.dedicated_queues.tx_qid =
2125                                         eth_dev->data->nb_tx_queues;
2126                 }
2127         }
2128
2129
2130         /* Reconfigure each slave device if starting bonded device */
2131         for (i = 0; i < internals->slave_count; i++) {
2132                 struct rte_eth_dev *slave_ethdev =
2133                                 &(rte_eth_devices[internals->slaves[i].port_id]);
2134                 if (slave_configure(eth_dev, slave_ethdev) != 0) {
2135                         RTE_BOND_LOG(ERR,
2136                                 "bonded port (%d) failed to reconfigure slave device (%d)",
2137                                 eth_dev->data->port_id,
2138                                 internals->slaves[i].port_id);
2139                         goto out_err;
2140                 }
2141                 /* We will need to poll for link status if any slave doesn't
2142                  * support interrupts
2143                  */
2144                 if (internals->slaves[i].link_status_poll_enabled)
2145                         internals->link_status_polling_enabled = 1;
2146         }
2147
2148         /* start polling if needed */
2149         if (internals->link_status_polling_enabled) {
2150                 rte_eal_alarm_set(
2151                         internals->link_status_polling_interval_ms * 1000,
2152                         bond_ethdev_slave_link_status_change_monitor,
2153                         (void *)&rte_eth_devices[internals->port_id]);
2154         }
2155
2156         /* Update all slave devices MACs*/
2157         if (mac_address_slaves_update(eth_dev) != 0)
2158                 goto out_err;
2159
2160         if (internals->user_defined_primary_port)
2161                 bond_ethdev_primary_set(internals, internals->primary_port);
2162
2163         if (internals->mode == BONDING_MODE_8023AD)
2164                 bond_mode_8023ad_start(eth_dev);
2165
2166         if (internals->mode == BONDING_MODE_TLB ||
2167                         internals->mode == BONDING_MODE_ALB)
2168                 bond_tlb_enable(internals);
2169
2170         return 0;
2171
2172 out_err:
2173         eth_dev->data->dev_started = 0;
2174         return -1;
2175 }
2176
2177 static void
2178 bond_ethdev_free_queues(struct rte_eth_dev *dev)
2179 {
2180         uint8_t i;
2181
2182         if (dev->data->rx_queues != NULL) {
2183                 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2184                         rte_free(dev->data->rx_queues[i]);
2185                         dev->data->rx_queues[i] = NULL;
2186                 }
2187                 dev->data->nb_rx_queues = 0;
2188         }
2189
2190         if (dev->data->tx_queues != NULL) {
2191                 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2192                         rte_free(dev->data->tx_queues[i]);
2193                         dev->data->tx_queues[i] = NULL;
2194                 }
2195                 dev->data->nb_tx_queues = 0;
2196         }
2197 }
2198
2199 void
2200 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2201 {
2202         struct bond_dev_private *internals = eth_dev->data->dev_private;
2203         uint8_t i;
2204
2205         if (internals->mode == BONDING_MODE_8023AD) {
2206                 struct port *port;
2207                 void *pkt = NULL;
2208
2209                 bond_mode_8023ad_stop(eth_dev);
2210
2211                 /* Discard all messages to/from mode 4 state machines */
2212                 for (i = 0; i < internals->active_slave_count; i++) {
2213                         port = &mode_8023ad_ports[internals->active_slaves[i]];
2214
2215                         RTE_ASSERT(port->rx_ring != NULL);
2216                         while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2217                                 rte_pktmbuf_free(pkt);
2218
2219                         RTE_ASSERT(port->tx_ring != NULL);
2220                         while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2221                                 rte_pktmbuf_free(pkt);
2222                 }
2223         }
2224
2225         if (internals->mode == BONDING_MODE_TLB ||
2226                         internals->mode == BONDING_MODE_ALB) {
2227                 bond_tlb_disable(internals);
2228                 for (i = 0; i < internals->active_slave_count; i++)
2229                         tlb_last_obytets[internals->active_slaves[i]] = 0;
2230         }
2231
2232         internals->link_status_polling_enabled = 0;
2233         for (i = 0; i < internals->slave_count; i++)
2234                 internals->slaves[i].last_link_status = 0;
2235
2236         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2237         eth_dev->data->dev_started = 0;
2238 }
2239
2240 void
2241 bond_ethdev_close(struct rte_eth_dev *dev)
2242 {
2243         struct bond_dev_private *internals = dev->data->dev_private;
2244         uint8_t bond_port_id = internals->port_id;
2245         int skipped = 0;
2246         struct rte_flow_error ferror;
2247
2248         RTE_BOND_LOG(INFO, "Closing bonded device %s", dev->device->name);
2249         while (internals->slave_count != skipped) {
2250                 uint16_t port_id = internals->slaves[skipped].port_id;
2251
2252                 rte_eth_dev_stop(port_id);
2253
2254                 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2255                         RTE_BOND_LOG(ERR,
2256                                      "Failed to remove port %d from bonded device %s",
2257                                      port_id, dev->device->name);
2258                         skipped++;
2259                 }
2260         }
2261         bond_flow_ops.flush(dev, &ferror);
2262         bond_ethdev_free_queues(dev);
2263         rte_bitmap_reset(internals->vlan_filter_bmp);
2264 }
2265
2266 /* forward declaration */
2267 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2268
2269 static void
2270 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2271 {
2272         struct bond_dev_private *internals = dev->data->dev_private;
2273
2274         uint16_t max_nb_rx_queues = UINT16_MAX;
2275         uint16_t max_nb_tx_queues = UINT16_MAX;
2276
2277         dev_info->max_mac_addrs = BOND_MAX_MAC_ADDRS;
2278
2279         dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2280                         internals->candidate_max_rx_pktlen :
2281                         ETHER_MAX_JUMBO_FRAME_LEN;
2282
2283         /* Max number of tx/rx queues that the bonded device can support is the
2284          * minimum values of the bonded slaves, as all slaves must be capable
2285          * of supporting the same number of tx/rx queues.
2286          */
2287         if (internals->slave_count > 0) {
2288                 struct rte_eth_dev_info slave_info;
2289                 uint8_t idx;
2290
2291                 for (idx = 0; idx < internals->slave_count; idx++) {
2292                         rte_eth_dev_info_get(internals->slaves[idx].port_id,
2293                                         &slave_info);
2294
2295                         if (slave_info.max_rx_queues < max_nb_rx_queues)
2296                                 max_nb_rx_queues = slave_info.max_rx_queues;
2297
2298                         if (slave_info.max_tx_queues < max_nb_tx_queues)
2299                                 max_nb_tx_queues = slave_info.max_tx_queues;
2300                 }
2301         }
2302
2303         dev_info->max_rx_queues = max_nb_rx_queues;
2304         dev_info->max_tx_queues = max_nb_tx_queues;
2305
2306         /**
2307          * If dedicated hw queues enabled for link bonding device in LACP mode
2308          * then we need to reduce the maximum number of data path queues by 1.
2309          */
2310         if (internals->mode == BONDING_MODE_8023AD &&
2311                 internals->mode4.dedicated_queues.enabled == 1) {
2312                 dev_info->max_rx_queues--;
2313                 dev_info->max_tx_queues--;
2314         }
2315
2316         dev_info->min_rx_bufsize = 0;
2317
2318         dev_info->rx_offload_capa = internals->rx_offload_capa;
2319         dev_info->tx_offload_capa = internals->tx_offload_capa;
2320         dev_info->rx_queue_offload_capa = internals->rx_queue_offload_capa;
2321         dev_info->tx_queue_offload_capa = internals->tx_queue_offload_capa;
2322         dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2323
2324         dev_info->reta_size = internals->reta_size;
2325 }
2326
2327 static int
2328 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2329 {
2330         int res;
2331         uint16_t i;
2332         struct bond_dev_private *internals = dev->data->dev_private;
2333
2334         /* don't do this while a slave is being added */
2335         rte_spinlock_lock(&internals->lock);
2336
2337         if (on)
2338                 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2339         else
2340                 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2341
2342         for (i = 0; i < internals->slave_count; i++) {
2343                 uint16_t port_id = internals->slaves[i].port_id;
2344
2345                 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2346                 if (res == ENOTSUP)
2347                         RTE_BOND_LOG(WARNING,
2348                                      "Setting VLAN filter on slave port %u not supported.",
2349                                      port_id);
2350         }
2351
2352         rte_spinlock_unlock(&internals->lock);
2353         return 0;
2354 }
2355
2356 static int
2357 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2358                 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2359                 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2360 {
2361         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2362                         rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2363                                         0, dev->data->numa_node);
2364         if (bd_rx_q == NULL)
2365                 return -1;
2366
2367         bd_rx_q->queue_id = rx_queue_id;
2368         bd_rx_q->dev_private = dev->data->dev_private;
2369
2370         bd_rx_q->nb_rx_desc = nb_rx_desc;
2371
2372         memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2373         bd_rx_q->mb_pool = mb_pool;
2374
2375         dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2376
2377         return 0;
2378 }
2379
2380 static int
2381 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2382                 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2383                 const struct rte_eth_txconf *tx_conf)
2384 {
2385         struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
2386                         rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2387                                         0, dev->data->numa_node);
2388
2389         if (bd_tx_q == NULL)
2390                 return -1;
2391
2392         bd_tx_q->queue_id = tx_queue_id;
2393         bd_tx_q->dev_private = dev->data->dev_private;
2394
2395         bd_tx_q->nb_tx_desc = nb_tx_desc;
2396         memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2397
2398         dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2399
2400         return 0;
2401 }
2402
2403 static void
2404 bond_ethdev_rx_queue_release(void *queue)
2405 {
2406         if (queue == NULL)
2407                 return;
2408
2409         rte_free(queue);
2410 }
2411
2412 static void
2413 bond_ethdev_tx_queue_release(void *queue)
2414 {
2415         if (queue == NULL)
2416                 return;
2417
2418         rte_free(queue);
2419 }
2420
2421 static void
2422 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2423 {
2424         struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2425         struct bond_dev_private *internals;
2426
2427         /* Default value for polling slave found is true as we don't want to
2428          * disable the polling thread if we cannot get the lock */
2429         int i, polling_slave_found = 1;
2430
2431         if (cb_arg == NULL)
2432                 return;
2433
2434         bonded_ethdev = (struct rte_eth_dev *)cb_arg;
2435         internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
2436
2437         if (!bonded_ethdev->data->dev_started ||
2438                 !internals->link_status_polling_enabled)
2439                 return;
2440
2441         /* If device is currently being configured then don't check slaves link
2442          * status, wait until next period */
2443         if (rte_spinlock_trylock(&internals->lock)) {
2444                 if (internals->slave_count > 0)
2445                         polling_slave_found = 0;
2446
2447                 for (i = 0; i < internals->slave_count; i++) {
2448                         if (!internals->slaves[i].link_status_poll_enabled)
2449                                 continue;
2450
2451                         slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2452                         polling_slave_found = 1;
2453
2454                         /* Update slave link status */
2455                         (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2456                                         internals->slaves[i].link_status_wait_to_complete);
2457
2458                         /* if link status has changed since last checked then call lsc
2459                          * event callback */
2460                         if (slave_ethdev->data->dev_link.link_status !=
2461                                         internals->slaves[i].last_link_status) {
2462                                 internals->slaves[i].last_link_status =
2463                                                 slave_ethdev->data->dev_link.link_status;
2464
2465                                 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2466                                                 RTE_ETH_EVENT_INTR_LSC,
2467                                                 &bonded_ethdev->data->port_id,
2468                                                 NULL);
2469                         }
2470                 }
2471                 rte_spinlock_unlock(&internals->lock);
2472         }
2473
2474         if (polling_slave_found)
2475                 /* Set alarm to continue monitoring link status of slave ethdev's */
2476                 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2477                                 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2478 }
2479
2480 static int
2481 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2482 {
2483         void (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2484
2485         struct bond_dev_private *bond_ctx;
2486         struct rte_eth_link slave_link;
2487
2488         uint32_t idx;
2489
2490         bond_ctx = ethdev->data->dev_private;
2491
2492         ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2493
2494         if (ethdev->data->dev_started == 0 ||
2495                         bond_ctx->active_slave_count == 0) {
2496                 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2497                 return 0;
2498         }
2499
2500         ethdev->data->dev_link.link_status = ETH_LINK_UP;
2501
2502         if (wait_to_complete)
2503                 link_update = rte_eth_link_get;
2504         else
2505                 link_update = rte_eth_link_get_nowait;
2506
2507         switch (bond_ctx->mode) {
2508         case BONDING_MODE_BROADCAST:
2509                 /**
2510                  * Setting link speed to UINT32_MAX to ensure we pick up the
2511                  * value of the first active slave
2512                  */
2513                 ethdev->data->dev_link.link_speed = UINT32_MAX;
2514
2515                 /**
2516                  * link speed is minimum value of all the slaves link speed as
2517                  * packet loss will occur on this slave if transmission at rates
2518                  * greater than this are attempted
2519                  */
2520                 for (idx = 1; idx < bond_ctx->active_slave_count; idx++) {
2521                         link_update(bond_ctx->active_slaves[0], &slave_link);
2522
2523                         if (slave_link.link_speed <
2524                                         ethdev->data->dev_link.link_speed)
2525                                 ethdev->data->dev_link.link_speed =
2526                                                 slave_link.link_speed;
2527                 }
2528                 break;
2529         case BONDING_MODE_ACTIVE_BACKUP:
2530                 /* Current primary slave */
2531                 link_update(bond_ctx->current_primary_port, &slave_link);
2532
2533                 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2534                 break;
2535         case BONDING_MODE_8023AD:
2536                 ethdev->data->dev_link.link_autoneg =
2537                                 bond_ctx->mode4.slave_link.link_autoneg;
2538                 ethdev->data->dev_link.link_duplex =
2539                                 bond_ctx->mode4.slave_link.link_duplex;
2540                 /* fall through to update link speed */
2541         case BONDING_MODE_ROUND_ROBIN:
2542         case BONDING_MODE_BALANCE:
2543         case BONDING_MODE_TLB:
2544         case BONDING_MODE_ALB:
2545         default:
2546                 /**
2547                  * In theses mode the maximum theoretical link speed is the sum
2548                  * of all the slaves
2549                  */
2550                 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2551
2552                 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2553                         link_update(bond_ctx->active_slaves[idx], &slave_link);
2554
2555                         ethdev->data->dev_link.link_speed +=
2556                                         slave_link.link_speed;
2557                 }
2558         }
2559
2560
2561         return 0;
2562 }
2563
2564
2565 static int
2566 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2567 {
2568         struct bond_dev_private *internals = dev->data->dev_private;
2569         struct rte_eth_stats slave_stats;
2570         int i, j;
2571
2572         for (i = 0; i < internals->slave_count; i++) {
2573                 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2574
2575                 stats->ipackets += slave_stats.ipackets;
2576                 stats->opackets += slave_stats.opackets;
2577                 stats->ibytes += slave_stats.ibytes;
2578                 stats->obytes += slave_stats.obytes;
2579                 stats->imissed += slave_stats.imissed;
2580                 stats->ierrors += slave_stats.ierrors;
2581                 stats->oerrors += slave_stats.oerrors;
2582                 stats->rx_nombuf += slave_stats.rx_nombuf;
2583
2584                 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2585                         stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2586                         stats->q_opackets[j] += slave_stats.q_opackets[j];
2587                         stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2588                         stats->q_obytes[j] += slave_stats.q_obytes[j];
2589                         stats->q_errors[j] += slave_stats.q_errors[j];
2590                 }
2591
2592         }
2593
2594         return 0;
2595 }
2596
2597 static void
2598 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2599 {
2600         struct bond_dev_private *internals = dev->data->dev_private;
2601         int i;
2602
2603         for (i = 0; i < internals->slave_count; i++)
2604                 rte_eth_stats_reset(internals->slaves[i].port_id);
2605 }
2606
2607 static void
2608 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2609 {
2610         struct bond_dev_private *internals = eth_dev->data->dev_private;
2611         int i;
2612
2613         internals->promiscuous_en = 1;
2614
2615         switch (internals->mode) {
2616         /* Promiscuous mode is propagated to all slaves */
2617         case BONDING_MODE_ROUND_ROBIN:
2618         case BONDING_MODE_BALANCE:
2619         case BONDING_MODE_BROADCAST:
2620                 for (i = 0; i < internals->slave_count; i++)
2621                         rte_eth_promiscuous_enable(internals->slaves[i].port_id);
2622                 break;
2623         /* In mode4 promiscus mode is managed when slave is added/removed */
2624         case BONDING_MODE_8023AD:
2625                 break;
2626         /* Promiscuous mode is propagated only to primary slave */
2627         case BONDING_MODE_ACTIVE_BACKUP:
2628         case BONDING_MODE_TLB:
2629         case BONDING_MODE_ALB:
2630         default:
2631                 rte_eth_promiscuous_enable(internals->current_primary_port);
2632         }
2633 }
2634
2635 static void
2636 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2637 {
2638         struct bond_dev_private *internals = dev->data->dev_private;
2639         int i;
2640
2641         internals->promiscuous_en = 0;
2642
2643         switch (internals->mode) {
2644         /* Promiscuous mode is propagated to all slaves */
2645         case BONDING_MODE_ROUND_ROBIN:
2646         case BONDING_MODE_BALANCE:
2647         case BONDING_MODE_BROADCAST:
2648                 for (i = 0; i < internals->slave_count; i++)
2649                         rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2650                 break;
2651         /* In mode4 promiscus mode is set managed when slave is added/removed */
2652         case BONDING_MODE_8023AD:
2653                 break;
2654         /* Promiscuous mode is propagated only to primary slave */
2655         case BONDING_MODE_ACTIVE_BACKUP:
2656         case BONDING_MODE_TLB:
2657         case BONDING_MODE_ALB:
2658         default:
2659                 rte_eth_promiscuous_disable(internals->current_primary_port);
2660         }
2661 }
2662
2663 static void
2664 bond_ethdev_delayed_lsc_propagation(void *arg)
2665 {
2666         if (arg == NULL)
2667                 return;
2668
2669         _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2670                         RTE_ETH_EVENT_INTR_LSC, NULL);
2671 }
2672
2673 int
2674 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2675                 void *param, void *ret_param __rte_unused)
2676 {
2677         struct rte_eth_dev *bonded_eth_dev;
2678         struct bond_dev_private *internals;
2679         struct rte_eth_link link;
2680         int rc = -1;
2681
2682         int i, valid_slave = 0;
2683         uint8_t active_pos;
2684         uint8_t lsc_flag = 0;
2685
2686         if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2687                 return rc;
2688
2689         bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
2690
2691         if (check_for_bonded_ethdev(bonded_eth_dev))
2692                 return rc;
2693
2694         internals = bonded_eth_dev->data->dev_private;
2695
2696         /* If the device isn't started don't handle interrupts */
2697         if (!bonded_eth_dev->data->dev_started)
2698                 return rc;
2699
2700         /* verify that port_id is a valid slave of bonded port */
2701         for (i = 0; i < internals->slave_count; i++) {
2702                 if (internals->slaves[i].port_id == port_id) {
2703                         valid_slave = 1;
2704                         break;
2705                 }
2706         }
2707
2708         if (!valid_slave)
2709                 return rc;
2710
2711         /* Synchronize lsc callback parallel calls either by real link event
2712          * from the slaves PMDs or by the bonding PMD itself.
2713          */
2714         rte_spinlock_lock(&internals->lsc_lock);
2715
2716         /* Search for port in active port list */
2717         active_pos = find_slave_by_id(internals->active_slaves,
2718                         internals->active_slave_count, port_id);
2719
2720         rte_eth_link_get_nowait(port_id, &link);
2721         if (link.link_status) {
2722                 if (active_pos < internals->active_slave_count)
2723                         goto link_update;
2724
2725                 /* if no active slave ports then set this port to be primary port */
2726                 if (internals->active_slave_count < 1) {
2727                         /* If first active slave, then change link status */
2728                         bonded_eth_dev->data->dev_link.link_status = ETH_LINK_UP;
2729                         internals->current_primary_port = port_id;
2730                         lsc_flag = 1;
2731
2732                         mac_address_slaves_update(bonded_eth_dev);
2733                 }
2734
2735                 /* check link state properties if bonded link is up*/
2736                 if (bonded_eth_dev->data->dev_link.link_status == ETH_LINK_UP) {
2737                         if (link_properties_valid(bonded_eth_dev, &link) != 0)
2738                                 RTE_BOND_LOG(ERR, "Invalid link properties "
2739                                              "for slave %d in bonding mode %d",
2740                                              port_id, internals->mode);
2741                 } else {
2742                         /* inherit slave link properties */
2743                         link_properties_set(bonded_eth_dev, &link);
2744                 }
2745
2746                 activate_slave(bonded_eth_dev, port_id);
2747
2748                 /* If user has defined the primary port then default to using it */
2749                 if (internals->user_defined_primary_port &&
2750                                 internals->primary_port == port_id)
2751                         bond_ethdev_primary_set(internals, port_id);
2752         } else {
2753                 if (active_pos == internals->active_slave_count)
2754                         goto link_update;
2755
2756                 /* Remove from active slave list */
2757                 deactivate_slave(bonded_eth_dev, port_id);
2758
2759                 if (internals->active_slave_count < 1)
2760                         lsc_flag = 1;
2761
2762                 /* Update primary id, take first active slave from list or if none
2763                  * available set to -1 */
2764                 if (port_id == internals->current_primary_port) {
2765                         if (internals->active_slave_count > 0)
2766                                 bond_ethdev_primary_set(internals,
2767                                                 internals->active_slaves[0]);
2768                         else
2769                                 internals->current_primary_port = internals->primary_port;
2770                 }
2771         }
2772
2773 link_update:
2774         /**
2775          * Update bonded device link properties after any change to active
2776          * slaves
2777          */
2778         bond_ethdev_link_update(bonded_eth_dev, 0);
2779
2780         if (lsc_flag) {
2781                 /* Cancel any possible outstanding interrupts if delays are enabled */
2782                 if (internals->link_up_delay_ms > 0 ||
2783                         internals->link_down_delay_ms > 0)
2784                         rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2785                                         bonded_eth_dev);
2786
2787                 if (bonded_eth_dev->data->dev_link.link_status) {
2788                         if (internals->link_up_delay_ms > 0)
2789                                 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2790                                                 bond_ethdev_delayed_lsc_propagation,
2791                                                 (void *)bonded_eth_dev);
2792                         else
2793                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2794                                                 RTE_ETH_EVENT_INTR_LSC,
2795                                                 NULL);
2796
2797                 } else {
2798                         if (internals->link_down_delay_ms > 0)
2799                                 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2800                                                 bond_ethdev_delayed_lsc_propagation,
2801                                                 (void *)bonded_eth_dev);
2802                         else
2803                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2804                                                 RTE_ETH_EVENT_INTR_LSC,
2805                                                 NULL);
2806                 }
2807         }
2808
2809         rte_spinlock_unlock(&internals->lsc_lock);
2810
2811         return rc;
2812 }
2813
2814 static int
2815 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2816                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2817 {
2818         unsigned i, j;
2819         int result = 0;
2820         int slave_reta_size;
2821         unsigned reta_count;
2822         struct bond_dev_private *internals = dev->data->dev_private;
2823
2824         if (reta_size != internals->reta_size)
2825                 return -EINVAL;
2826
2827          /* Copy RETA table */
2828         reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2829
2830         for (i = 0; i < reta_count; i++) {
2831                 internals->reta_conf[i].mask = reta_conf[i].mask;
2832                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2833                         if ((reta_conf[i].mask >> j) & 0x01)
2834                                 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2835         }
2836
2837         /* Fill rest of array */
2838         for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2839                 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2840                                 sizeof(internals->reta_conf[0]) * reta_count);
2841
2842         /* Propagate RETA over slaves */
2843         for (i = 0; i < internals->slave_count; i++) {
2844                 slave_reta_size = internals->slaves[i].reta_size;
2845                 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2846                                 &internals->reta_conf[0], slave_reta_size);
2847                 if (result < 0)
2848                         return result;
2849         }
2850
2851         return 0;
2852 }
2853
2854 static int
2855 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2856                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2857 {
2858         int i, j;
2859         struct bond_dev_private *internals = dev->data->dev_private;
2860
2861         if (reta_size != internals->reta_size)
2862                 return -EINVAL;
2863
2864          /* Copy RETA table */
2865         for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2866                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2867                         if ((reta_conf[i].mask >> j) & 0x01)
2868                                 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2869
2870         return 0;
2871 }
2872
2873 static int
2874 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2875                 struct rte_eth_rss_conf *rss_conf)
2876 {
2877         int i, result = 0;
2878         struct bond_dev_private *internals = dev->data->dev_private;
2879         struct rte_eth_rss_conf bond_rss_conf;
2880
2881         memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2882
2883         bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2884
2885         if (bond_rss_conf.rss_hf != 0)
2886                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2887
2888         if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2889                         sizeof(internals->rss_key)) {
2890                 if (bond_rss_conf.rss_key_len == 0)
2891                         bond_rss_conf.rss_key_len = 40;
2892                 internals->rss_key_len = bond_rss_conf.rss_key_len;
2893                 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2894                                 internals->rss_key_len);
2895         }
2896
2897         for (i = 0; i < internals->slave_count; i++) {
2898                 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2899                                 &bond_rss_conf);
2900                 if (result < 0)
2901                         return result;
2902         }
2903
2904         return 0;
2905 }
2906
2907 static int
2908 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2909                 struct rte_eth_rss_conf *rss_conf)
2910 {
2911         struct bond_dev_private *internals = dev->data->dev_private;
2912
2913         rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2914         rss_conf->rss_key_len = internals->rss_key_len;
2915         if (rss_conf->rss_key)
2916                 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2917
2918         return 0;
2919 }
2920
2921 static int
2922 bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
2923 {
2924         struct rte_eth_dev *slave_eth_dev;
2925         struct bond_dev_private *internals = dev->data->dev_private;
2926         int ret, i;
2927
2928         rte_spinlock_lock(&internals->lock);
2929
2930         for (i = 0; i < internals->slave_count; i++) {
2931                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2932                 if (*slave_eth_dev->dev_ops->mtu_set == NULL) {
2933                         rte_spinlock_unlock(&internals->lock);
2934                         return -ENOTSUP;
2935                 }
2936         }
2937         for (i = 0; i < internals->slave_count; i++) {
2938                 ret = rte_eth_dev_set_mtu(internals->slaves[i].port_id, mtu);
2939                 if (ret < 0) {
2940                         rte_spinlock_unlock(&internals->lock);
2941                         return ret;
2942                 }
2943         }
2944
2945         rte_spinlock_unlock(&internals->lock);
2946         return 0;
2947 }
2948
2949 static int
2950 bond_ethdev_mac_address_set(struct rte_eth_dev *dev, struct ether_addr *addr)
2951 {
2952         if (mac_address_set(dev, addr)) {
2953                 RTE_BOND_LOG(ERR, "Failed to update MAC address");
2954                 return -EINVAL;
2955         }
2956
2957         return 0;
2958 }
2959
2960 static int
2961 bond_filter_ctrl(struct rte_eth_dev *dev __rte_unused,
2962                  enum rte_filter_type type, enum rte_filter_op op, void *arg)
2963 {
2964         if (type == RTE_ETH_FILTER_GENERIC && op == RTE_ETH_FILTER_GET) {
2965                 *(const void **)arg = &bond_flow_ops;
2966                 return 0;
2967         }
2968         return -ENOTSUP;
2969 }
2970
2971 static int
2972 bond_ethdev_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac_addr,
2973                                 __rte_unused uint32_t index, uint32_t vmdq)
2974 {
2975         struct rte_eth_dev *slave_eth_dev;
2976         struct bond_dev_private *internals = dev->data->dev_private;
2977         int ret, i;
2978
2979         rte_spinlock_lock(&internals->lock);
2980
2981         for (i = 0; i < internals->slave_count; i++) {
2982                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2983                 if (*slave_eth_dev->dev_ops->mac_addr_add == NULL ||
2984                          *slave_eth_dev->dev_ops->mac_addr_remove == NULL) {
2985                         ret = -ENOTSUP;
2986                         goto end;
2987                 }
2988         }
2989
2990         for (i = 0; i < internals->slave_count; i++) {
2991                 ret = rte_eth_dev_mac_addr_add(internals->slaves[i].port_id,
2992                                 mac_addr, vmdq);
2993                 if (ret < 0) {
2994                         /* rollback */
2995                         for (i--; i >= 0; i--)
2996                                 rte_eth_dev_mac_addr_remove(
2997                                         internals->slaves[i].port_id, mac_addr);
2998                         goto end;
2999                 }
3000         }
3001
3002         ret = 0;
3003 end:
3004         rte_spinlock_unlock(&internals->lock);
3005         return ret;
3006 }
3007
3008 static void
3009 bond_ethdev_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
3010 {
3011         struct rte_eth_dev *slave_eth_dev;
3012         struct bond_dev_private *internals = dev->data->dev_private;
3013         int i;
3014
3015         rte_spinlock_lock(&internals->lock);
3016
3017         for (i = 0; i < internals->slave_count; i++) {
3018                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
3019                 if (*slave_eth_dev->dev_ops->mac_addr_remove == NULL)
3020                         goto end;
3021         }
3022
3023         struct ether_addr *mac_addr = &dev->data->mac_addrs[index];
3024
3025         for (i = 0; i < internals->slave_count; i++)
3026                 rte_eth_dev_mac_addr_remove(internals->slaves[i].port_id,
3027                                 mac_addr);
3028
3029 end:
3030         rte_spinlock_unlock(&internals->lock);
3031 }
3032
3033 const struct eth_dev_ops default_dev_ops = {
3034         .dev_start            = bond_ethdev_start,
3035         .dev_stop             = bond_ethdev_stop,
3036         .dev_close            = bond_ethdev_close,
3037         .dev_configure        = bond_ethdev_configure,
3038         .dev_infos_get        = bond_ethdev_info,
3039         .vlan_filter_set      = bond_ethdev_vlan_filter_set,
3040         .rx_queue_setup       = bond_ethdev_rx_queue_setup,
3041         .tx_queue_setup       = bond_ethdev_tx_queue_setup,
3042         .rx_queue_release     = bond_ethdev_rx_queue_release,
3043         .tx_queue_release     = bond_ethdev_tx_queue_release,
3044         .link_update          = bond_ethdev_link_update,
3045         .stats_get            = bond_ethdev_stats_get,
3046         .stats_reset          = bond_ethdev_stats_reset,
3047         .promiscuous_enable   = bond_ethdev_promiscuous_enable,
3048         .promiscuous_disable  = bond_ethdev_promiscuous_disable,
3049         .reta_update          = bond_ethdev_rss_reta_update,
3050         .reta_query           = bond_ethdev_rss_reta_query,
3051         .rss_hash_update      = bond_ethdev_rss_hash_update,
3052         .rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get,
3053         .mtu_set              = bond_ethdev_mtu_set,
3054         .mac_addr_set         = bond_ethdev_mac_address_set,
3055         .mac_addr_add         = bond_ethdev_mac_addr_add,
3056         .mac_addr_remove      = bond_ethdev_mac_addr_remove,
3057         .filter_ctrl          = bond_filter_ctrl
3058 };
3059
3060 static int
3061 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
3062 {
3063         const char *name = rte_vdev_device_name(dev);
3064         uint8_t socket_id = dev->device.numa_node;
3065         struct bond_dev_private *internals = NULL;
3066         struct rte_eth_dev *eth_dev = NULL;
3067         uint32_t vlan_filter_bmp_size;
3068
3069         /* now do all data allocation - for eth_dev structure, dummy pci driver
3070          * and internal (private) data
3071          */
3072
3073         /* reserve an ethdev entry */
3074         eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
3075         if (eth_dev == NULL) {
3076                 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
3077                 goto err;
3078         }
3079
3080         internals = eth_dev->data->dev_private;
3081         eth_dev->data->nb_rx_queues = (uint16_t)1;
3082         eth_dev->data->nb_tx_queues = (uint16_t)1;
3083
3084         /* Allocate memory for storing MAC addresses */
3085         eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN *
3086                         BOND_MAX_MAC_ADDRS, 0, socket_id);
3087         if (eth_dev->data->mac_addrs == NULL) {
3088                 RTE_BOND_LOG(ERR,
3089                              "Failed to allocate %u bytes needed to store MAC addresses",
3090                              ETHER_ADDR_LEN * BOND_MAX_MAC_ADDRS);
3091                 goto err;
3092         }
3093
3094         eth_dev->dev_ops = &default_dev_ops;
3095         eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC;
3096
3097         rte_spinlock_init(&internals->lock);
3098         rte_spinlock_init(&internals->lsc_lock);
3099
3100         internals->port_id = eth_dev->data->port_id;
3101         internals->mode = BONDING_MODE_INVALID;
3102         internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
3103         internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
3104         internals->burst_xmit_hash = burst_xmit_l2_hash;
3105         internals->user_defined_mac = 0;
3106
3107         internals->link_status_polling_enabled = 0;
3108
3109         internals->link_status_polling_interval_ms =
3110                 DEFAULT_POLLING_INTERVAL_10_MS;
3111         internals->link_down_delay_ms = 0;
3112         internals->link_up_delay_ms = 0;
3113
3114         internals->slave_count = 0;
3115         internals->active_slave_count = 0;
3116         internals->rx_offload_capa = 0;
3117         internals->tx_offload_capa = 0;
3118         internals->rx_queue_offload_capa = 0;
3119         internals->tx_queue_offload_capa = 0;
3120         internals->candidate_max_rx_pktlen = 0;
3121         internals->max_rx_pktlen = 0;
3122
3123         /* Initially allow to choose any offload type */
3124         internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
3125
3126         memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
3127         memset(internals->slaves, 0, sizeof(internals->slaves));
3128
3129         TAILQ_INIT(&internals->flow_list);
3130         internals->flow_isolated_valid = 0;
3131
3132         /* Set mode 4 default configuration */
3133         bond_mode_8023ad_setup(eth_dev, NULL);
3134         if (bond_ethdev_mode_set(eth_dev, mode)) {
3135                 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode to %d",
3136                                  eth_dev->data->port_id, mode);
3137                 goto err;
3138         }
3139
3140         vlan_filter_bmp_size =
3141                 rte_bitmap_get_memory_footprint(ETHER_MAX_VLAN_ID + 1);
3142         internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
3143                                                    RTE_CACHE_LINE_SIZE);
3144         if (internals->vlan_filter_bmpmem == NULL) {
3145                 RTE_BOND_LOG(ERR,
3146                              "Failed to allocate vlan bitmap for bonded device %u",
3147                              eth_dev->data->port_id);
3148                 goto err;
3149         }
3150
3151         internals->vlan_filter_bmp = rte_bitmap_init(ETHER_MAX_VLAN_ID + 1,
3152                         internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
3153         if (internals->vlan_filter_bmp == NULL) {
3154                 RTE_BOND_LOG(ERR,
3155                              "Failed to init vlan bitmap for bonded device %u",
3156                              eth_dev->data->port_id);
3157                 rte_free(internals->vlan_filter_bmpmem);
3158                 goto err;
3159         }
3160
3161         return eth_dev->data->port_id;
3162
3163 err:
3164         rte_free(internals);
3165         if (eth_dev != NULL) {
3166                 rte_free(eth_dev->data->mac_addrs);
3167                 rte_eth_dev_release_port(eth_dev);
3168         }
3169         return -1;
3170 }
3171
3172 static int
3173 bond_probe(struct rte_vdev_device *dev)
3174 {
3175         const char *name;
3176         struct bond_dev_private *internals;
3177         struct rte_kvargs *kvlist;
3178         uint8_t bonding_mode, socket_id/*, agg_mode*/;
3179         int  arg_count, port_id;
3180         uint8_t agg_mode;
3181         struct rte_eth_dev *eth_dev;
3182
3183         if (!dev)
3184                 return -EINVAL;
3185
3186         name = rte_vdev_device_name(dev);
3187         RTE_BOND_LOG(INFO, "Initializing pmd_bond for %s", name);
3188
3189         if (rte_eal_process_type() == RTE_PROC_SECONDARY &&
3190             strlen(rte_vdev_device_args(dev)) == 0) {
3191                 eth_dev = rte_eth_dev_attach_secondary(name);
3192                 if (!eth_dev) {
3193                         RTE_BOND_LOG(ERR, "Failed to probe %s", name);
3194                         return -1;
3195                 }
3196                 /* TODO: request info from primary to set up Rx and Tx */
3197                 eth_dev->dev_ops = &default_dev_ops;
3198                 eth_dev->device = &dev->device;
3199                 rte_eth_dev_probing_finish(eth_dev);
3200                 return 0;
3201         }
3202
3203         kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
3204                 pmd_bond_init_valid_arguments);
3205         if (kvlist == NULL)
3206                 return -1;
3207
3208         /* Parse link bonding mode */
3209         if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
3210                 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
3211                                 &bond_ethdev_parse_slave_mode_kvarg,
3212                                 &bonding_mode) != 0) {
3213                         RTE_BOND_LOG(ERR, "Invalid mode for bonded device %s",
3214                                         name);
3215                         goto parse_error;
3216                 }
3217         } else {
3218                 RTE_BOND_LOG(ERR, "Mode must be specified only once for bonded "
3219                                 "device %s", name);
3220                 goto parse_error;
3221         }
3222
3223         /* Parse socket id to create bonding device on */
3224         arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
3225         if (arg_count == 1) {
3226                 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
3227                                 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
3228                                 != 0) {
3229                         RTE_BOND_LOG(ERR, "Invalid socket Id specified for "
3230                                         "bonded device %s", name);
3231                         goto parse_error;
3232                 }
3233         } else if (arg_count > 1) {
3234                 RTE_BOND_LOG(ERR, "Socket Id can be specified only once for "
3235                                 "bonded device %s", name);
3236                 goto parse_error;
3237         } else {
3238                 socket_id = rte_socket_id();
3239         }
3240
3241         dev->device.numa_node = socket_id;
3242
3243         /* Create link bonding eth device */
3244         port_id = bond_alloc(dev, bonding_mode);
3245         if (port_id < 0) {
3246                 RTE_BOND_LOG(ERR, "Failed to create socket %s in mode %u on "
3247                                 "socket %u.",   name, bonding_mode, socket_id);
3248                 goto parse_error;
3249         }
3250         internals = rte_eth_devices[port_id].data->dev_private;
3251         internals->kvlist = kvlist;
3252
3253         rte_eth_dev_probing_finish(&rte_eth_devices[port_id]);
3254
3255         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3256                 if (rte_kvargs_process(kvlist,
3257                                 PMD_BOND_AGG_MODE_KVARG,
3258                                 &bond_ethdev_parse_slave_agg_mode_kvarg,
3259                                 &agg_mode) != 0) {
3260                         RTE_BOND_LOG(ERR,
3261                                         "Failed to parse agg selection mode for bonded device %s",
3262                                         name);
3263                         goto parse_error;
3264                 }
3265
3266                 if (internals->mode == BONDING_MODE_8023AD)
3267                         rte_eth_bond_8023ad_agg_selection_set(port_id,
3268                                         agg_mode);
3269         } else {
3270                 rte_eth_bond_8023ad_agg_selection_set(port_id, AGG_STABLE);
3271         }
3272
3273         RTE_BOND_LOG(INFO, "Create bonded device %s on port %d in mode %u on "
3274                         "socket %u.",   name, port_id, bonding_mode, socket_id);
3275         return 0;
3276
3277 parse_error:
3278         rte_kvargs_free(kvlist);
3279
3280         return -1;
3281 }
3282
3283 static int
3284 bond_remove(struct rte_vdev_device *dev)
3285 {
3286         struct rte_eth_dev *eth_dev;
3287         struct bond_dev_private *internals;
3288         const char *name;
3289
3290         if (!dev)
3291                 return -EINVAL;
3292
3293         name = rte_vdev_device_name(dev);
3294         RTE_BOND_LOG(INFO, "Uninitializing pmd_bond for %s", name);
3295
3296         /* now free all data allocation - for eth_dev structure,
3297          * dummy pci driver and internal (private) data
3298          */
3299
3300         /* find an ethdev entry */
3301         eth_dev = rte_eth_dev_allocated(name);
3302         if (eth_dev == NULL)
3303                 return -ENODEV;
3304
3305         RTE_ASSERT(eth_dev->device == &dev->device);
3306
3307         internals = eth_dev->data->dev_private;
3308         if (internals->slave_count != 0)
3309                 return -EBUSY;
3310
3311         if (eth_dev->data->dev_started == 1) {
3312                 bond_ethdev_stop(eth_dev);
3313                 bond_ethdev_close(eth_dev);
3314         }
3315
3316         eth_dev->dev_ops = NULL;
3317         eth_dev->rx_pkt_burst = NULL;
3318         eth_dev->tx_pkt_burst = NULL;
3319
3320         internals = eth_dev->data->dev_private;
3321         /* Try to release mempool used in mode6. If the bond
3322          * device is not mode6, free the NULL is not problem.
3323          */
3324         rte_mempool_free(internals->mode6.mempool);
3325         rte_bitmap_free(internals->vlan_filter_bmp);
3326         rte_free(internals->vlan_filter_bmpmem);
3327         rte_free(eth_dev->data->dev_private);
3328         rte_free(eth_dev->data->mac_addrs);
3329
3330         rte_eth_dev_release_port(eth_dev);
3331
3332         return 0;
3333 }
3334
3335 /* this part will resolve the slave portids after all the other pdev and vdev
3336  * have been allocated */
3337 static int
3338 bond_ethdev_configure(struct rte_eth_dev *dev)
3339 {
3340         const char *name = dev->device->name;
3341         struct bond_dev_private *internals = dev->data->dev_private;
3342         struct rte_kvargs *kvlist = internals->kvlist;
3343         int arg_count;
3344         uint16_t port_id = dev - rte_eth_devices;
3345         uint8_t agg_mode;
3346
3347         static const uint8_t default_rss_key[40] = {
3348                 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3349                 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3350                 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3351                 0xBE, 0xAC, 0x01, 0xFA
3352         };
3353
3354         unsigned i, j;
3355
3356         /* If RSS is enabled, fill table and key with default values */
3357         if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
3358                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = internals->rss_key;
3359                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = 0;
3360                 memcpy(internals->rss_key, default_rss_key, 40);
3361
3362                 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3363                         internals->reta_conf[i].mask = ~0LL;
3364                         for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3365                                 internals->reta_conf[i].reta[j] = j % dev->data->nb_rx_queues;
3366                 }
3367         }
3368
3369         /* set the max_rx_pktlen */
3370         internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3371
3372         /*
3373          * if no kvlist, it means that this bonded device has been created
3374          * through the bonding api.
3375          */
3376         if (!kvlist)
3377                 return 0;
3378
3379         /* Parse MAC address for bonded device */
3380         arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3381         if (arg_count == 1) {
3382                 struct ether_addr bond_mac;
3383
3384                 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3385                                        &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3386                         RTE_BOND_LOG(INFO, "Invalid mac address for bonded device %s",
3387                                      name);
3388                         return -1;
3389                 }
3390
3391                 /* Set MAC address */
3392                 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3393                         RTE_BOND_LOG(ERR,
3394                                      "Failed to set mac address on bonded device %s",
3395                                      name);
3396                         return -1;
3397                 }
3398         } else if (arg_count > 1) {
3399                 RTE_BOND_LOG(ERR,
3400                              "MAC address can be specified only once for bonded device %s",
3401                              name);
3402                 return -1;
3403         }
3404
3405         /* Parse/set balance mode transmit policy */
3406         arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3407         if (arg_count == 1) {
3408                 uint8_t xmit_policy;
3409
3410                 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3411                                        &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3412                     0) {
3413                         RTE_BOND_LOG(INFO,
3414                                      "Invalid xmit policy specified for bonded device %s",
3415                                      name);
3416                         return -1;
3417                 }
3418
3419                 /* Set balance mode transmit policy*/
3420                 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3421                         RTE_BOND_LOG(ERR,
3422                                      "Failed to set balance xmit policy on bonded device %s",
3423                                      name);
3424                         return -1;
3425                 }
3426         } else if (arg_count > 1) {
3427                 RTE_BOND_LOG(ERR,
3428                              "Transmit policy can be specified only once for bonded device %s",
3429                              name);
3430                 return -1;
3431         }
3432
3433         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3434                 if (rte_kvargs_process(kvlist,
3435                                        PMD_BOND_AGG_MODE_KVARG,
3436                                        &bond_ethdev_parse_slave_agg_mode_kvarg,
3437                                        &agg_mode) != 0) {
3438                         RTE_BOND_LOG(ERR,
3439                                      "Failed to parse agg selection mode for bonded device %s",
3440                                      name);
3441                 }
3442                 if (internals->mode == BONDING_MODE_8023AD)
3443                         rte_eth_bond_8023ad_agg_selection_set(port_id,
3444                                                               agg_mode);
3445         }
3446
3447         /* Parse/add slave ports to bonded device */
3448         if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3449                 struct bond_ethdev_slave_ports slave_ports;
3450                 unsigned i;
3451
3452                 memset(&slave_ports, 0, sizeof(slave_ports));
3453
3454                 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3455                                        &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3456                         RTE_BOND_LOG(ERR,
3457                                      "Failed to parse slave ports for bonded device %s",
3458                                      name);
3459                         return -1;
3460                 }
3461
3462                 for (i = 0; i < slave_ports.slave_count; i++) {
3463                         if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3464                                 RTE_BOND_LOG(ERR,
3465                                              "Failed to add port %d as slave to bonded device %s",
3466                                              slave_ports.slaves[i], name);
3467                         }
3468                 }
3469
3470         } else {
3471                 RTE_BOND_LOG(INFO, "No slaves specified for bonded device %s", name);
3472                 return -1;
3473         }
3474
3475         /* Parse/set primary slave port id*/
3476         arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3477         if (arg_count == 1) {
3478                 uint16_t primary_slave_port_id;
3479
3480                 if (rte_kvargs_process(kvlist,
3481                                        PMD_BOND_PRIMARY_SLAVE_KVARG,
3482                                        &bond_ethdev_parse_primary_slave_port_id_kvarg,
3483                                        &primary_slave_port_id) < 0) {
3484                         RTE_BOND_LOG(INFO,
3485                                      "Invalid primary slave port id specified for bonded device %s",
3486                                      name);
3487                         return -1;
3488                 }
3489
3490                 /* Set balance mode transmit policy*/
3491                 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3492                     != 0) {
3493                         RTE_BOND_LOG(ERR,
3494                                      "Failed to set primary slave port %d on bonded device %s",
3495                                      primary_slave_port_id, name);
3496                         return -1;
3497                 }
3498         } else if (arg_count > 1) {
3499                 RTE_BOND_LOG(INFO,
3500                              "Primary slave can be specified only once for bonded device %s",
3501                              name);
3502                 return -1;
3503         }
3504
3505         /* Parse link status monitor polling interval */
3506         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3507         if (arg_count == 1) {
3508                 uint32_t lsc_poll_interval_ms;
3509
3510                 if (rte_kvargs_process(kvlist,
3511                                        PMD_BOND_LSC_POLL_PERIOD_KVARG,
3512                                        &bond_ethdev_parse_time_ms_kvarg,
3513                                        &lsc_poll_interval_ms) < 0) {
3514                         RTE_BOND_LOG(INFO,
3515                                      "Invalid lsc polling interval value specified for bonded"
3516                                      " device %s", name);
3517                         return -1;
3518                 }
3519
3520                 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3521                     != 0) {
3522                         RTE_BOND_LOG(ERR,
3523                                      "Failed to set lsc monitor polling interval (%u ms) on bonded device %s",
3524                                      lsc_poll_interval_ms, name);
3525                         return -1;
3526                 }
3527         } else if (arg_count > 1) {
3528                 RTE_BOND_LOG(INFO,
3529                              "LSC polling interval can be specified only once for bonded"
3530                              " device %s", name);
3531                 return -1;
3532         }
3533
3534         /* Parse link up interrupt propagation delay */
3535         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3536         if (arg_count == 1) {
3537                 uint32_t link_up_delay_ms;
3538
3539                 if (rte_kvargs_process(kvlist,
3540                                        PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3541                                        &bond_ethdev_parse_time_ms_kvarg,
3542                                        &link_up_delay_ms) < 0) {
3543                         RTE_BOND_LOG(INFO,
3544                                      "Invalid link up propagation delay value specified for"
3545                                      " bonded device %s", name);
3546                         return -1;
3547                 }
3548
3549                 /* Set balance mode transmit policy*/
3550                 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3551                     != 0) {
3552                         RTE_BOND_LOG(ERR,
3553                                      "Failed to set link up propagation delay (%u ms) on bonded"
3554                                      " device %s", link_up_delay_ms, name);
3555                         return -1;
3556                 }
3557         } else if (arg_count > 1) {
3558                 RTE_BOND_LOG(INFO,
3559                              "Link up propagation delay can be specified only once for"
3560                              " bonded device %s", name);
3561                 return -1;
3562         }
3563
3564         /* Parse link down interrupt propagation delay */
3565         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3566         if (arg_count == 1) {
3567                 uint32_t link_down_delay_ms;
3568
3569                 if (rte_kvargs_process(kvlist,
3570                                        PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3571                                        &bond_ethdev_parse_time_ms_kvarg,
3572                                        &link_down_delay_ms) < 0) {
3573                         RTE_BOND_LOG(INFO,
3574                                      "Invalid link down propagation delay value specified for"
3575                                      " bonded device %s", name);
3576                         return -1;
3577                 }
3578
3579                 /* Set balance mode transmit policy*/
3580                 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3581                     != 0) {
3582                         RTE_BOND_LOG(ERR,
3583                                      "Failed to set link down propagation delay (%u ms) on bonded device %s",
3584                                      link_down_delay_ms, name);
3585                         return -1;
3586                 }
3587         } else if (arg_count > 1) {
3588                 RTE_BOND_LOG(INFO,
3589                              "Link down propagation delay can be specified only once for  bonded device %s",
3590                              name);
3591                 return -1;
3592         }
3593
3594         return 0;
3595 }
3596
3597 struct rte_vdev_driver pmd_bond_drv = {
3598         .probe = bond_probe,
3599         .remove = bond_remove,
3600 };
3601
3602 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3603 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3604
3605 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3606         "slave=<ifc> "
3607         "primary=<ifc> "
3608         "mode=[0-6] "
3609         "xmit_policy=[l2 | l23 | l34] "
3610         "agg_mode=[count | stable | bandwidth] "
3611         "socket_id=<int> "
3612         "mac=<mac addr> "
3613         "lsc_poll_period_ms=<int> "
3614         "up_delay=<int> "
3615         "down_delay=<int>");
3616
3617 int bond_logtype;
3618
3619 RTE_INIT(bond_init_log)
3620 {
3621         bond_logtype = rte_log_register("pmd.net.bon");
3622         if (bond_logtype >= 0)
3623                 rte_log_set_level(bond_logtype, RTE_LOG_NOTICE);
3624 }