New upstream version 18.11-rc3
[deb_dpdk.git] / drivers / net / bonding / rte_eth_bond_pmd.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 #include <stdlib.h>
5 #include <netinet/in.h>
6
7 #include <rte_mbuf.h>
8 #include <rte_malloc.h>
9 #include <rte_ethdev_driver.h>
10 #include <rte_ethdev_vdev.h>
11 #include <rte_tcp.h>
12 #include <rte_udp.h>
13 #include <rte_ip.h>
14 #include <rte_ip_frag.h>
15 #include <rte_devargs.h>
16 #include <rte_kvargs.h>
17 #include <rte_bus_vdev.h>
18 #include <rte_alarm.h>
19 #include <rte_cycles.h>
20 #include <rte_string_fns.h>
21
22 #include "rte_eth_bond.h"
23 #include "rte_eth_bond_private.h"
24 #include "rte_eth_bond_8023ad_private.h"
25
26 #define REORDER_PERIOD_MS 10
27 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
28 #define BOND_MAX_MAC_ADDRS 16
29
30 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
31
32 /* Table for statistics in mode 5 TLB */
33 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
34
35 static inline size_t
36 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
37 {
38         size_t vlan_offset = 0;
39
40         if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto ||
41                 rte_cpu_to_be_16(ETHER_TYPE_QINQ) == *proto) {
42                 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
43
44                 vlan_offset = sizeof(struct vlan_hdr);
45                 *proto = vlan_hdr->eth_proto;
46
47                 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
48                         vlan_hdr = vlan_hdr + 1;
49                         *proto = vlan_hdr->eth_proto;
50                         vlan_offset += sizeof(struct vlan_hdr);
51                 }
52         }
53         return vlan_offset;
54 }
55
56 static uint16_t
57 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
58 {
59         struct bond_dev_private *internals;
60
61         uint16_t num_rx_total = 0;
62         uint16_t slave_count;
63         uint16_t active_slave;
64         int i;
65
66         /* Cast to structure, containing bonded device's port id and queue id */
67         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
68         internals = bd_rx_q->dev_private;
69         slave_count = internals->active_slave_count;
70         active_slave = internals->active_slave;
71
72         for (i = 0; i < slave_count && nb_pkts; i++) {
73                 uint16_t num_rx_slave;
74
75                 /* Offset of pointer to *bufs increases as packets are received
76                  * from other slaves */
77                 num_rx_slave =
78                         rte_eth_rx_burst(internals->active_slaves[active_slave],
79                                          bd_rx_q->queue_id,
80                                          bufs + num_rx_total, nb_pkts);
81                 num_rx_total += num_rx_slave;
82                 nb_pkts -= num_rx_slave;
83                 if (++active_slave == slave_count)
84                         active_slave = 0;
85         }
86
87         if (++internals->active_slave == slave_count)
88                 internals->active_slave = 0;
89         return num_rx_total;
90 }
91
92 static uint16_t
93 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
94                 uint16_t nb_pkts)
95 {
96         struct bond_dev_private *internals;
97
98         /* Cast to structure, containing bonded device's port id and queue id */
99         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
100
101         internals = bd_rx_q->dev_private;
102
103         return rte_eth_rx_burst(internals->current_primary_port,
104                         bd_rx_q->queue_id, bufs, nb_pkts);
105 }
106
107 static inline uint8_t
108 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
109 {
110         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
111
112         return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
113                 (ethertype == ether_type_slow_be &&
114                 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
115 }
116
117 /*****************************************************************************
118  * Flow director's setup for mode 4 optimization
119  */
120
121 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
122         .dst.addr_bytes = { 0 },
123         .src.addr_bytes = { 0 },
124         .type = RTE_BE16(ETHER_TYPE_SLOW),
125 };
126
127 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
128         .dst.addr_bytes = { 0 },
129         .src.addr_bytes = { 0 },
130         .type = 0xFFFF,
131 };
132
133 static struct rte_flow_item flow_item_8023ad[] = {
134         {
135                 .type = RTE_FLOW_ITEM_TYPE_ETH,
136                 .spec = &flow_item_eth_type_8023ad,
137                 .last = NULL,
138                 .mask = &flow_item_eth_mask_type_8023ad,
139         },
140         {
141                 .type = RTE_FLOW_ITEM_TYPE_END,
142                 .spec = NULL,
143                 .last = NULL,
144                 .mask = NULL,
145         }
146 };
147
148 const struct rte_flow_attr flow_attr_8023ad = {
149         .group = 0,
150         .priority = 0,
151         .ingress = 1,
152         .egress = 0,
153         .reserved = 0,
154 };
155
156 int
157 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
158                 uint16_t slave_port) {
159         struct rte_eth_dev_info slave_info;
160         struct rte_flow_error error;
161         struct bond_dev_private *internals = (struct bond_dev_private *)
162                         (bond_dev->data->dev_private);
163
164         const struct rte_flow_action_queue lacp_queue_conf = {
165                 .index = 0,
166         };
167
168         const struct rte_flow_action actions[] = {
169                 {
170                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
171                         .conf = &lacp_queue_conf
172                 },
173                 {
174                         .type = RTE_FLOW_ACTION_TYPE_END,
175                 }
176         };
177
178         int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
179                         flow_item_8023ad, actions, &error);
180         if (ret < 0) {
181                 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
182                                 __func__, error.message, slave_port,
183                                 internals->mode4.dedicated_queues.rx_qid);
184                 return -1;
185         }
186
187         rte_eth_dev_info_get(slave_port, &slave_info);
188         if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
189                         slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
190                 RTE_BOND_LOG(ERR,
191                         "%s: Slave %d capabilities doesn't allow to allocate additional queues",
192                         __func__, slave_port);
193                 return -1;
194         }
195
196         return 0;
197 }
198
199 int
200 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
201         struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
202         struct bond_dev_private *internals = (struct bond_dev_private *)
203                         (bond_dev->data->dev_private);
204         struct rte_eth_dev_info bond_info;
205         uint16_t idx;
206
207         /* Verify if all slaves in bonding supports flow director and */
208         if (internals->slave_count > 0) {
209                 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
210
211                 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
212                 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
213
214                 for (idx = 0; idx < internals->slave_count; idx++) {
215                         if (bond_ethdev_8023ad_flow_verify(bond_dev,
216                                         internals->slaves[idx].port_id) != 0)
217                                 return -1;
218                 }
219         }
220
221         return 0;
222 }
223
224 int
225 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
226
227         struct rte_flow_error error;
228         struct bond_dev_private *internals = (struct bond_dev_private *)
229                         (bond_dev->data->dev_private);
230
231         struct rte_flow_action_queue lacp_queue_conf = {
232                 .index = internals->mode4.dedicated_queues.rx_qid,
233         };
234
235         const struct rte_flow_action actions[] = {
236                 {
237                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
238                         .conf = &lacp_queue_conf
239                 },
240                 {
241                         .type = RTE_FLOW_ACTION_TYPE_END,
242                 }
243         };
244
245         internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
246                         &flow_attr_8023ad, flow_item_8023ad, actions, &error);
247         if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
248                 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
249                                 "(slave_port=%d queue_id=%d)",
250                                 error.message, slave_port,
251                                 internals->mode4.dedicated_queues.rx_qid);
252                 return -1;
253         }
254
255         return 0;
256 }
257
258 static uint16_t
259 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
260                 uint16_t nb_pkts)
261 {
262         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
263         struct bond_dev_private *internals = bd_rx_q->dev_private;
264         uint16_t num_rx_total = 0;      /* Total number of received packets */
265         uint16_t slaves[RTE_MAX_ETHPORTS];
266         uint16_t slave_count;
267         uint16_t active_slave;
268         uint16_t i;
269
270         /* Copy slave list to protect against slave up/down changes during tx
271          * bursting */
272         slave_count = internals->active_slave_count;
273         active_slave = internals->active_slave;
274         memcpy(slaves, internals->active_slaves,
275                         sizeof(internals->active_slaves[0]) * slave_count);
276
277         for (i = 0; i < slave_count && nb_pkts; i++) {
278                 uint16_t num_rx_slave;
279
280                 /* Read packets from this slave */
281                 num_rx_slave = rte_eth_rx_burst(slaves[active_slave],
282                                                 bd_rx_q->queue_id,
283                                                 bufs + num_rx_total, nb_pkts);
284                 num_rx_total += num_rx_slave;
285                 nb_pkts -= num_rx_slave;
286
287                 if (++active_slave == slave_count)
288                         active_slave = 0;
289         }
290
291         if (++internals->active_slave == slave_count)
292                 internals->active_slave = 0;
293
294         return num_rx_total;
295 }
296
297 static uint16_t
298 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
299                 uint16_t nb_bufs)
300 {
301         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
302         struct bond_dev_private *internals = bd_tx_q->dev_private;
303
304         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
305         uint16_t slave_count;
306
307         uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
308         uint16_t dist_slave_count;
309
310         /* 2-D array to sort mbufs for transmission on each slave into */
311         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
312         /* Number of mbufs for transmission on each slave */
313         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
314         /* Mapping array generated by hash function to map mbufs to slaves */
315         uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
316
317         uint16_t slave_tx_count;
318         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
319
320         uint16_t i;
321
322         if (unlikely(nb_bufs == 0))
323                 return 0;
324
325         /* Copy slave list to protect against slave up/down changes during tx
326          * bursting */
327         slave_count = internals->active_slave_count;
328         if (unlikely(slave_count < 1))
329                 return 0;
330
331         memcpy(slave_port_ids, internals->active_slaves,
332                         sizeof(slave_port_ids[0]) * slave_count);
333
334
335         dist_slave_count = 0;
336         for (i = 0; i < slave_count; i++) {
337                 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
338
339                 if (ACTOR_STATE(port, DISTRIBUTING))
340                         dist_slave_port_ids[dist_slave_count++] =
341                                         slave_port_ids[i];
342         }
343
344         if (unlikely(dist_slave_count < 1))
345                 return 0;
346
347         /*
348          * Populate slaves mbuf with the packets which are to be sent on it
349          * selecting output slave using hash based on xmit policy
350          */
351         internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
352                         bufs_slave_port_idxs);
353
354         for (i = 0; i < nb_bufs; i++) {
355                 /* Populate slave mbuf arrays with mbufs for that slave. */
356                 uint8_t slave_idx = bufs_slave_port_idxs[i];
357
358                 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
359         }
360
361
362         /* Send packet burst on each slave device */
363         for (i = 0; i < dist_slave_count; i++) {
364                 if (slave_nb_bufs[i] == 0)
365                         continue;
366
367                 slave_tx_count = rte_eth_tx_burst(dist_slave_port_ids[i],
368                                 bd_tx_q->queue_id, slave_bufs[i],
369                                 slave_nb_bufs[i]);
370
371                 total_tx_count += slave_tx_count;
372
373                 /* If tx burst fails move packets to end of bufs */
374                 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
375                         int slave_tx_fail_count = slave_nb_bufs[i] -
376                                         slave_tx_count;
377                         total_tx_fail_count += slave_tx_fail_count;
378                         memcpy(&bufs[nb_bufs - total_tx_fail_count],
379                                &slave_bufs[i][slave_tx_count],
380                                slave_tx_fail_count * sizeof(bufs[0]));
381                 }
382         }
383
384         return total_tx_count;
385 }
386
387
388 static uint16_t
389 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
390                 uint16_t nb_pkts)
391 {
392         /* Cast to structure, containing bonded device's port id and queue id */
393         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
394         struct bond_dev_private *internals = bd_rx_q->dev_private;
395         struct rte_eth_dev *bonded_eth_dev =
396                                         &rte_eth_devices[internals->port_id];
397         struct ether_addr *bond_mac = bonded_eth_dev->data->mac_addrs;
398         struct ether_hdr *hdr;
399
400         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
401         uint16_t num_rx_total = 0;      /* Total number of received packets */
402         uint16_t slaves[RTE_MAX_ETHPORTS];
403         uint16_t slave_count, idx;
404
405         uint8_t collecting;  /* current slave collecting status */
406         const uint8_t promisc = internals->promiscuous_en;
407         uint8_t i, j, k;
408         uint8_t subtype;
409
410         /* Copy slave list to protect against slave up/down changes during tx
411          * bursting */
412         slave_count = internals->active_slave_count;
413         memcpy(slaves, internals->active_slaves,
414                         sizeof(internals->active_slaves[0]) * slave_count);
415
416         idx = internals->active_slave;
417         if (idx >= slave_count) {
418                 internals->active_slave = 0;
419                 idx = 0;
420         }
421         for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
422                 j = num_rx_total;
423                 collecting = ACTOR_STATE(&bond_mode_8023ad_ports[slaves[idx]],
424                                          COLLECTING);
425
426                 /* Read packets from this slave */
427                 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
428                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
429
430                 for (k = j; k < 2 && k < num_rx_total; k++)
431                         rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
432
433                 /* Handle slow protocol packets. */
434                 while (j < num_rx_total) {
435
436                         /* If packet is not pure L2 and is known, skip it */
437                         if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) {
438                                 j++;
439                                 continue;
440                         }
441
442                         if (j + 3 < num_rx_total)
443                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
444
445                         hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
446                         subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
447
448                         /* Remove packet from array if it is slow packet or slave is not
449                          * in collecting state or bonding interface is not in promiscuous
450                          * mode and packet address does not match. */
451                         if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]) ||
452                                 !collecting ||
453                                 (!promisc &&
454                                  !is_multicast_ether_addr(&hdr->d_addr) &&
455                                  !is_same_ether_addr(bond_mac,
456                                                      &hdr->d_addr)))) {
457
458                                 if (hdr->ether_type == ether_type_slow_be) {
459                                         bond_mode_8023ad_handle_slow_pkt(
460                                             internals, slaves[idx], bufs[j]);
461                                 } else
462                                         rte_pktmbuf_free(bufs[j]);
463
464                                 /* Packet is managed by mode 4 or dropped, shift the array */
465                                 num_rx_total--;
466                                 if (j < num_rx_total) {
467                                         memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
468                                                 (num_rx_total - j));
469                                 }
470                         } else
471                                 j++;
472                 }
473                 if (unlikely(++idx == slave_count))
474                         idx = 0;
475         }
476
477         if (++internals->active_slave == slave_count)
478                 internals->active_slave = 0;
479
480         return num_rx_total;
481 }
482
483 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
484 uint32_t burstnumberRX;
485 uint32_t burstnumberTX;
486
487 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
488
489 static void
490 arp_op_name(uint16_t arp_op, char *buf)
491 {
492         switch (arp_op) {
493         case ARP_OP_REQUEST:
494                 snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
495                 return;
496         case ARP_OP_REPLY:
497                 snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
498                 return;
499         case ARP_OP_REVREQUEST:
500                 snprintf(buf, sizeof("Reverse ARP Request"), "%s",
501                                 "Reverse ARP Request");
502                 return;
503         case ARP_OP_REVREPLY:
504                 snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
505                                 "Reverse ARP Reply");
506                 return;
507         case ARP_OP_INVREQUEST:
508                 snprintf(buf, sizeof("Peer Identify Request"), "%s",
509                                 "Peer Identify Request");
510                 return;
511         case ARP_OP_INVREPLY:
512                 snprintf(buf, sizeof("Peer Identify Reply"), "%s",
513                                 "Peer Identify Reply");
514                 return;
515         default:
516                 break;
517         }
518         snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
519         return;
520 }
521 #endif
522 #define MaxIPv4String   16
523 static void
524 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
525 {
526         uint32_t ipv4_addr;
527
528         ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
529         snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
530                 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
531                 ipv4_addr & 0xFF);
532 }
533
534 #define MAX_CLIENTS_NUMBER      128
535 uint8_t active_clients;
536 struct client_stats_t {
537         uint16_t port;
538         uint32_t ipv4_addr;
539         uint32_t ipv4_rx_packets;
540         uint32_t ipv4_tx_packets;
541 };
542 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
543
544 static void
545 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
546 {
547         int i = 0;
548
549         for (; i < MAX_CLIENTS_NUMBER; i++)     {
550                 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port))      {
551                         /* Just update RX packets number for this client */
552                         if (TXorRXindicator == &burstnumberRX)
553                                 client_stats[i].ipv4_rx_packets++;
554                         else
555                                 client_stats[i].ipv4_tx_packets++;
556                         return;
557                 }
558         }
559         /* We have a new client. Insert him to the table, and increment stats */
560         if (TXorRXindicator == &burstnumberRX)
561                 client_stats[active_clients].ipv4_rx_packets++;
562         else
563                 client_stats[active_clients].ipv4_tx_packets++;
564         client_stats[active_clients].ipv4_addr = addr;
565         client_stats[active_clients].port = port;
566         active_clients++;
567
568 }
569
570 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
571 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
572         rte_log(RTE_LOG_DEBUG, bond_logtype,                            \
573                 "%s port:%d SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X SrcIP:%s " \
574                 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X DstIP:%s %s %d\n", \
575                 info,                                                   \
576                 port,                                                   \
577                 eth_h->s_addr.addr_bytes[0], eth_h->s_addr.addr_bytes[1], \
578                 eth_h->s_addr.addr_bytes[2], eth_h->s_addr.addr_bytes[3], \
579                 eth_h->s_addr.addr_bytes[4], eth_h->s_addr.addr_bytes[5], \
580                 src_ip,                                                 \
581                 eth_h->d_addr.addr_bytes[0], eth_h->d_addr.addr_bytes[1], \
582                 eth_h->d_addr.addr_bytes[2], eth_h->d_addr.addr_bytes[3], \
583                 eth_h->d_addr.addr_bytes[4], eth_h->d_addr.addr_bytes[5], \
584                 dst_ip,                                                 \
585                 arp_op, ++burstnumber)
586 #endif
587
588 static void
589 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
590                 uint16_t port, uint32_t __attribute__((unused)) *burstnumber)
591 {
592         struct ipv4_hdr *ipv4_h;
593 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
594         struct arp_hdr *arp_h;
595         char dst_ip[16];
596         char ArpOp[24];
597         char buf[16];
598 #endif
599         char src_ip[16];
600
601         uint16_t ether_type = eth_h->ether_type;
602         uint16_t offset = get_vlan_offset(eth_h, &ether_type);
603
604 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
605         strlcpy(buf, info, 16);
606 #endif
607
608         if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
609                 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
610                 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
611 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
612                 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
613                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
614 #endif
615                 update_client_stats(ipv4_h->src_addr, port, burstnumber);
616         }
617 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
618         else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
619                 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
620                 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
621                 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
622                 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
623                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
624         }
625 #endif
626 }
627 #endif
628
629 static uint16_t
630 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
631 {
632         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
633         struct bond_dev_private *internals = bd_tx_q->dev_private;
634         struct ether_hdr *eth_h;
635         uint16_t ether_type, offset;
636         uint16_t nb_recv_pkts;
637         int i;
638
639         nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
640
641         for (i = 0; i < nb_recv_pkts; i++) {
642                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
643                 ether_type = eth_h->ether_type;
644                 offset = get_vlan_offset(eth_h, &ether_type);
645
646                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
647 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
648                         mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
649 #endif
650                         bond_mode_alb_arp_recv(eth_h, offset, internals);
651                 }
652 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
653                 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
654                         mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
655 #endif
656         }
657
658         return nb_recv_pkts;
659 }
660
661 static uint16_t
662 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
663                 uint16_t nb_pkts)
664 {
665         struct bond_dev_private *internals;
666         struct bond_tx_queue *bd_tx_q;
667
668         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
669         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
670
671         uint16_t num_of_slaves;
672         uint16_t slaves[RTE_MAX_ETHPORTS];
673
674         uint16_t num_tx_total = 0, num_tx_slave;
675
676         static int slave_idx = 0;
677         int i, cslave_idx = 0, tx_fail_total = 0;
678
679         bd_tx_q = (struct bond_tx_queue *)queue;
680         internals = bd_tx_q->dev_private;
681
682         /* Copy slave list to protect against slave up/down changes during tx
683          * bursting */
684         num_of_slaves = internals->active_slave_count;
685         memcpy(slaves, internals->active_slaves,
686                         sizeof(internals->active_slaves[0]) * num_of_slaves);
687
688         if (num_of_slaves < 1)
689                 return num_tx_total;
690
691         /* Populate slaves mbuf with which packets are to be sent on it  */
692         for (i = 0; i < nb_pkts; i++) {
693                 cslave_idx = (slave_idx + i) % num_of_slaves;
694                 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
695         }
696
697         /* increment current slave index so the next call to tx burst starts on the
698          * next slave */
699         slave_idx = ++cslave_idx;
700
701         /* Send packet burst on each slave device */
702         for (i = 0; i < num_of_slaves; i++) {
703                 if (slave_nb_pkts[i] > 0) {
704                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
705                                         slave_bufs[i], slave_nb_pkts[i]);
706
707                         /* if tx burst fails move packets to end of bufs */
708                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
709                                 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
710
711                                 tx_fail_total += tx_fail_slave;
712
713                                 memcpy(&bufs[nb_pkts - tx_fail_total],
714                                        &slave_bufs[i][num_tx_slave],
715                                        tx_fail_slave * sizeof(bufs[0]));
716                         }
717                         num_tx_total += num_tx_slave;
718                 }
719         }
720
721         return num_tx_total;
722 }
723
724 static uint16_t
725 bond_ethdev_tx_burst_active_backup(void *queue,
726                 struct rte_mbuf **bufs, uint16_t nb_pkts)
727 {
728         struct bond_dev_private *internals;
729         struct bond_tx_queue *bd_tx_q;
730
731         bd_tx_q = (struct bond_tx_queue *)queue;
732         internals = bd_tx_q->dev_private;
733
734         if (internals->active_slave_count < 1)
735                 return 0;
736
737         return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
738                         bufs, nb_pkts);
739 }
740
741 static inline uint16_t
742 ether_hash(struct ether_hdr *eth_hdr)
743 {
744         unaligned_uint16_t *word_src_addr =
745                 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
746         unaligned_uint16_t *word_dst_addr =
747                 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
748
749         return (word_src_addr[0] ^ word_dst_addr[0]) ^
750                         (word_src_addr[1] ^ word_dst_addr[1]) ^
751                         (word_src_addr[2] ^ word_dst_addr[2]);
752 }
753
754 static inline uint32_t
755 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
756 {
757         return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
758 }
759
760 static inline uint32_t
761 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
762 {
763         unaligned_uint32_t *word_src_addr =
764                 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
765         unaligned_uint32_t *word_dst_addr =
766                 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
767
768         return (word_src_addr[0] ^ word_dst_addr[0]) ^
769                         (word_src_addr[1] ^ word_dst_addr[1]) ^
770                         (word_src_addr[2] ^ word_dst_addr[2]) ^
771                         (word_src_addr[3] ^ word_dst_addr[3]);
772 }
773
774
775 void
776 burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
777                 uint8_t slave_count, uint16_t *slaves)
778 {
779         struct ether_hdr *eth_hdr;
780         uint32_t hash;
781         int i;
782
783         for (i = 0; i < nb_pkts; i++) {
784                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
785
786                 hash = ether_hash(eth_hdr);
787
788                 slaves[i] = (hash ^= hash >> 8) % slave_count;
789         }
790 }
791
792 void
793 burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
794                 uint8_t slave_count, uint16_t *slaves)
795 {
796         uint16_t i;
797         struct ether_hdr *eth_hdr;
798         uint16_t proto;
799         size_t vlan_offset;
800         uint32_t hash, l3hash;
801
802         for (i = 0; i < nb_pkts; i++) {
803                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
804                 l3hash = 0;
805
806                 proto = eth_hdr->ether_type;
807                 hash = ether_hash(eth_hdr);
808
809                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
810
811                 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
812                         struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
813                                         ((char *)(eth_hdr + 1) + vlan_offset);
814                         l3hash = ipv4_hash(ipv4_hdr);
815
816                 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
817                         struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
818                                         ((char *)(eth_hdr + 1) + vlan_offset);
819                         l3hash = ipv6_hash(ipv6_hdr);
820                 }
821
822                 hash = hash ^ l3hash;
823                 hash ^= hash >> 16;
824                 hash ^= hash >> 8;
825
826                 slaves[i] = hash % slave_count;
827         }
828 }
829
830 void
831 burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
832                 uint8_t slave_count, uint16_t *slaves)
833 {
834         struct ether_hdr *eth_hdr;
835         uint16_t proto;
836         size_t vlan_offset;
837         int i;
838
839         struct udp_hdr *udp_hdr;
840         struct tcp_hdr *tcp_hdr;
841         uint32_t hash, l3hash, l4hash;
842
843         for (i = 0; i < nb_pkts; i++) {
844                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
845                 proto = eth_hdr->ether_type;
846                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
847                 l3hash = 0;
848                 l4hash = 0;
849
850                 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
851                         struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
852                                         ((char *)(eth_hdr + 1) + vlan_offset);
853                         size_t ip_hdr_offset;
854
855                         l3hash = ipv4_hash(ipv4_hdr);
856
857                         /* there is no L4 header in fragmented packet */
858                         if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)
859                                                                 == 0)) {
860                                 ip_hdr_offset = (ipv4_hdr->version_ihl
861                                         & IPV4_HDR_IHL_MASK) *
862                                         IPV4_IHL_MULTIPLIER;
863
864                                 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
865                                         tcp_hdr = (struct tcp_hdr *)
866                                                 ((char *)ipv4_hdr +
867                                                         ip_hdr_offset);
868                                         l4hash = HASH_L4_PORTS(tcp_hdr);
869                                 } else if (ipv4_hdr->next_proto_id ==
870                                                                 IPPROTO_UDP) {
871                                         udp_hdr = (struct udp_hdr *)
872                                                 ((char *)ipv4_hdr +
873                                                         ip_hdr_offset);
874                                         l4hash = HASH_L4_PORTS(udp_hdr);
875                                 }
876                         }
877                 } else if  (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
878                         struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
879                                         ((char *)(eth_hdr + 1) + vlan_offset);
880                         l3hash = ipv6_hash(ipv6_hdr);
881
882                         if (ipv6_hdr->proto == IPPROTO_TCP) {
883                                 tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
884                                 l4hash = HASH_L4_PORTS(tcp_hdr);
885                         } else if (ipv6_hdr->proto == IPPROTO_UDP) {
886                                 udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
887                                 l4hash = HASH_L4_PORTS(udp_hdr);
888                         }
889                 }
890
891                 hash = l3hash ^ l4hash;
892                 hash ^= hash >> 16;
893                 hash ^= hash >> 8;
894
895                 slaves[i] = hash % slave_count;
896         }
897 }
898
899 struct bwg_slave {
900         uint64_t bwg_left_int;
901         uint64_t bwg_left_remainder;
902         uint8_t slave;
903 };
904
905 void
906 bond_tlb_activate_slave(struct bond_dev_private *internals) {
907         int i;
908
909         for (i = 0; i < internals->active_slave_count; i++) {
910                 tlb_last_obytets[internals->active_slaves[i]] = 0;
911         }
912 }
913
914 static int
915 bandwidth_cmp(const void *a, const void *b)
916 {
917         const struct bwg_slave *bwg_a = a;
918         const struct bwg_slave *bwg_b = b;
919         int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
920         int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
921                         (int64_t)bwg_a->bwg_left_remainder;
922         if (diff > 0)
923                 return 1;
924         else if (diff < 0)
925                 return -1;
926         else if (diff2 > 0)
927                 return 1;
928         else if (diff2 < 0)
929                 return -1;
930         else
931                 return 0;
932 }
933
934 static void
935 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
936                 struct bwg_slave *bwg_slave)
937 {
938         struct rte_eth_link link_status;
939
940         rte_eth_link_get_nowait(port_id, &link_status);
941         uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
942         if (link_bwg == 0)
943                 return;
944         link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
945         bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
946         bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
947 }
948
949 static void
950 bond_ethdev_update_tlb_slave_cb(void *arg)
951 {
952         struct bond_dev_private *internals = arg;
953         struct rte_eth_stats slave_stats;
954         struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
955         uint8_t slave_count;
956         uint64_t tx_bytes;
957
958         uint8_t update_stats = 0;
959         uint8_t i, slave_id;
960
961         internals->slave_update_idx++;
962
963
964         if (internals->slave_update_idx >= REORDER_PERIOD_MS)
965                 update_stats = 1;
966
967         for (i = 0; i < internals->active_slave_count; i++) {
968                 slave_id = internals->active_slaves[i];
969                 rte_eth_stats_get(slave_id, &slave_stats);
970                 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
971                 bandwidth_left(slave_id, tx_bytes,
972                                 internals->slave_update_idx, &bwg_array[i]);
973                 bwg_array[i].slave = slave_id;
974
975                 if (update_stats) {
976                         tlb_last_obytets[slave_id] = slave_stats.obytes;
977                 }
978         }
979
980         if (update_stats == 1)
981                 internals->slave_update_idx = 0;
982
983         slave_count = i;
984         qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
985         for (i = 0; i < slave_count; i++)
986                 internals->tlb_slaves_order[i] = bwg_array[i].slave;
987
988         rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
989                         (struct bond_dev_private *)internals);
990 }
991
992 static uint16_t
993 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
994 {
995         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
996         struct bond_dev_private *internals = bd_tx_q->dev_private;
997
998         struct rte_eth_dev *primary_port =
999                         &rte_eth_devices[internals->primary_port];
1000         uint16_t num_tx_total = 0;
1001         uint16_t i, j;
1002
1003         uint16_t num_of_slaves = internals->active_slave_count;
1004         uint16_t slaves[RTE_MAX_ETHPORTS];
1005
1006         struct ether_hdr *ether_hdr;
1007         struct ether_addr primary_slave_addr;
1008         struct ether_addr active_slave_addr;
1009
1010         if (num_of_slaves < 1)
1011                 return num_tx_total;
1012
1013         memcpy(slaves, internals->tlb_slaves_order,
1014                                 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
1015
1016
1017         ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
1018
1019         if (nb_pkts > 3) {
1020                 for (i = 0; i < 3; i++)
1021                         rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
1022         }
1023
1024         for (i = 0; i < num_of_slaves; i++) {
1025                 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
1026                 for (j = num_tx_total; j < nb_pkts; j++) {
1027                         if (j + 3 < nb_pkts)
1028                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
1029
1030                         ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
1031                         if (is_same_ether_addr(&ether_hdr->s_addr, &primary_slave_addr))
1032                                 ether_addr_copy(&active_slave_addr, &ether_hdr->s_addr);
1033 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1034                                         mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
1035 #endif
1036                 }
1037
1038                 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1039                                 bufs + num_tx_total, nb_pkts - num_tx_total);
1040
1041                 if (num_tx_total == nb_pkts)
1042                         break;
1043         }
1044
1045         return num_tx_total;
1046 }
1047
1048 void
1049 bond_tlb_disable(struct bond_dev_private *internals)
1050 {
1051         rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
1052 }
1053
1054 void
1055 bond_tlb_enable(struct bond_dev_private *internals)
1056 {
1057         bond_ethdev_update_tlb_slave_cb(internals);
1058 }
1059
1060 static uint16_t
1061 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1062 {
1063         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1064         struct bond_dev_private *internals = bd_tx_q->dev_private;
1065
1066         struct ether_hdr *eth_h;
1067         uint16_t ether_type, offset;
1068
1069         struct client_data *client_info;
1070
1071         /*
1072          * We create transmit buffers for every slave and one additional to send
1073          * through tlb. In worst case every packet will be send on one port.
1074          */
1075         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
1076         uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
1077
1078         /*
1079          * We create separate transmit buffers for update packets as they won't
1080          * be counted in num_tx_total.
1081          */
1082         struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1083         uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1084
1085         struct rte_mbuf *upd_pkt;
1086         size_t pkt_size;
1087
1088         uint16_t num_send, num_not_send = 0;
1089         uint16_t num_tx_total = 0;
1090         uint16_t slave_idx;
1091
1092         int i, j;
1093
1094         /* Search tx buffer for ARP packets and forward them to alb */
1095         for (i = 0; i < nb_pkts; i++) {
1096                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
1097                 ether_type = eth_h->ether_type;
1098                 offset = get_vlan_offset(eth_h, &ether_type);
1099
1100                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
1101                         slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1102
1103                         /* Change src mac in eth header */
1104                         rte_eth_macaddr_get(slave_idx, &eth_h->s_addr);
1105
1106                         /* Add packet to slave tx buffer */
1107                         slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1108                         slave_bufs_pkts[slave_idx]++;
1109                 } else {
1110                         /* If packet is not ARP, send it with TLB policy */
1111                         slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1112                                         bufs[i];
1113                         slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1114                 }
1115         }
1116
1117         /* Update connected client ARP tables */
1118         if (internals->mode6.ntt) {
1119                 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1120                         client_info = &internals->mode6.client_table[i];
1121
1122                         if (client_info->in_use) {
1123                                 /* Allocate new packet to send ARP update on current slave */
1124                                 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1125                                 if (upd_pkt == NULL) {
1126                                         RTE_BOND_LOG(ERR,
1127                                                      "Failed to allocate ARP packet from pool");
1128                                         continue;
1129                                 }
1130                                 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
1131                                                 + client_info->vlan_count * sizeof(struct vlan_hdr);
1132                                 upd_pkt->data_len = pkt_size;
1133                                 upd_pkt->pkt_len = pkt_size;
1134
1135                                 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1136                                                 internals);
1137
1138                                 /* Add packet to update tx buffer */
1139                                 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1140                                 update_bufs_pkts[slave_idx]++;
1141                         }
1142                 }
1143                 internals->mode6.ntt = 0;
1144         }
1145
1146         /* Send ARP packets on proper slaves */
1147         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1148                 if (slave_bufs_pkts[i] > 0) {
1149                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1150                                         slave_bufs[i], slave_bufs_pkts[i]);
1151                         for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1152                                 bufs[nb_pkts - 1 - num_not_send - j] =
1153                                                 slave_bufs[i][nb_pkts - 1 - j];
1154                         }
1155
1156                         num_tx_total += num_send;
1157                         num_not_send += slave_bufs_pkts[i] - num_send;
1158
1159 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1160         /* Print TX stats including update packets */
1161                         for (j = 0; j < slave_bufs_pkts[i]; j++) {
1162                                 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
1163                                 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1164                         }
1165 #endif
1166                 }
1167         }
1168
1169         /* Send update packets on proper slaves */
1170         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1171                 if (update_bufs_pkts[i] > 0) {
1172                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1173                                         update_bufs_pkts[i]);
1174                         for (j = num_send; j < update_bufs_pkts[i]; j++) {
1175                                 rte_pktmbuf_free(update_bufs[i][j]);
1176                         }
1177 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1178                         for (j = 0; j < update_bufs_pkts[i]; j++) {
1179                                 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
1180                                 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1181                         }
1182 #endif
1183                 }
1184         }
1185
1186         /* Send non-ARP packets using tlb policy */
1187         if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1188                 num_send = bond_ethdev_tx_burst_tlb(queue,
1189                                 slave_bufs[RTE_MAX_ETHPORTS],
1190                                 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1191
1192                 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1193                         bufs[nb_pkts - 1 - num_not_send - j] =
1194                                         slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1195                 }
1196
1197                 num_tx_total += num_send;
1198         }
1199
1200         return num_tx_total;
1201 }
1202
1203 static uint16_t
1204 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1205                 uint16_t nb_bufs)
1206 {
1207         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1208         struct bond_dev_private *internals = bd_tx_q->dev_private;
1209
1210         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1211         uint16_t slave_count;
1212
1213         /* Array to sort mbufs for transmission on each slave into */
1214         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1215         /* Number of mbufs for transmission on each slave */
1216         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1217         /* Mapping array generated by hash function to map mbufs to slaves */
1218         uint16_t bufs_slave_port_idxs[nb_bufs];
1219
1220         uint16_t slave_tx_count;
1221         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1222
1223         uint16_t i;
1224
1225         if (unlikely(nb_bufs == 0))
1226                 return 0;
1227
1228         /* Copy slave list to protect against slave up/down changes during tx
1229          * bursting */
1230         slave_count = internals->active_slave_count;
1231         if (unlikely(slave_count < 1))
1232                 return 0;
1233
1234         memcpy(slave_port_ids, internals->active_slaves,
1235                         sizeof(slave_port_ids[0]) * slave_count);
1236
1237         /*
1238          * Populate slaves mbuf with the packets which are to be sent on it
1239          * selecting output slave using hash based on xmit policy
1240          */
1241         internals->burst_xmit_hash(bufs, nb_bufs, slave_count,
1242                         bufs_slave_port_idxs);
1243
1244         for (i = 0; i < nb_bufs; i++) {
1245                 /* Populate slave mbuf arrays with mbufs for that slave. */
1246                 uint8_t slave_idx = bufs_slave_port_idxs[i];
1247
1248                 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
1249         }
1250
1251         /* Send packet burst on each slave device */
1252         for (i = 0; i < slave_count; i++) {
1253                 if (slave_nb_bufs[i] == 0)
1254                         continue;
1255
1256                 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1257                                 bd_tx_q->queue_id, slave_bufs[i],
1258                                 slave_nb_bufs[i]);
1259
1260                 total_tx_count += slave_tx_count;
1261
1262                 /* If tx burst fails move packets to end of bufs */
1263                 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1264                         int slave_tx_fail_count = slave_nb_bufs[i] -
1265                                         slave_tx_count;
1266                         total_tx_fail_count += slave_tx_fail_count;
1267                         memcpy(&bufs[nb_bufs - total_tx_fail_count],
1268                                &slave_bufs[i][slave_tx_count],
1269                                slave_tx_fail_count * sizeof(bufs[0]));
1270                 }
1271         }
1272
1273         return total_tx_count;
1274 }
1275
1276 static uint16_t
1277 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1278                 uint16_t nb_bufs)
1279 {
1280         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1281         struct bond_dev_private *internals = bd_tx_q->dev_private;
1282
1283         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1284         uint16_t slave_count;
1285
1286         uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
1287         uint16_t dist_slave_count;
1288
1289         /* 2-D array to sort mbufs for transmission on each slave into */
1290         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1291         /* Number of mbufs for transmission on each slave */
1292         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1293         /* Mapping array generated by hash function to map mbufs to slaves */
1294         uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
1295
1296         uint16_t slave_tx_count;
1297         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1298
1299         uint16_t i;
1300
1301         if (unlikely(nb_bufs == 0))
1302                 return 0;
1303
1304         /* Copy slave list to protect against slave up/down changes during tx
1305          * bursting */
1306         slave_count = internals->active_slave_count;
1307         if (unlikely(slave_count < 1))
1308                 return 0;
1309
1310         memcpy(slave_port_ids, internals->active_slaves,
1311                         sizeof(slave_port_ids[0]) * slave_count);
1312
1313         dist_slave_count = 0;
1314         for (i = 0; i < slave_count; i++) {
1315                 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1316
1317                 if (ACTOR_STATE(port, DISTRIBUTING))
1318                         dist_slave_port_ids[dist_slave_count++] =
1319                                         slave_port_ids[i];
1320         }
1321
1322         if (likely(dist_slave_count > 1)) {
1323
1324                 /*
1325                  * Populate slaves mbuf with the packets which are to be sent
1326                  * on it, selecting output slave using hash based on xmit policy
1327                  */
1328                 internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
1329                                 bufs_slave_port_idxs);
1330
1331                 for (i = 0; i < nb_bufs; i++) {
1332                         /*
1333                          * Populate slave mbuf arrays with mbufs for that
1334                          * slave
1335                          */
1336                         uint8_t slave_idx = bufs_slave_port_idxs[i];
1337
1338                         slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] =
1339                                         bufs[i];
1340                 }
1341
1342
1343                 /* Send packet burst on each slave device */
1344                 for (i = 0; i < dist_slave_count; i++) {
1345                         if (slave_nb_bufs[i] == 0)
1346                                 continue;
1347
1348                         slave_tx_count = rte_eth_tx_burst(
1349                                         dist_slave_port_ids[i],
1350                                         bd_tx_q->queue_id, slave_bufs[i],
1351                                         slave_nb_bufs[i]);
1352
1353                         total_tx_count += slave_tx_count;
1354
1355                         /* If tx burst fails move packets to end of bufs */
1356                         if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1357                                 int slave_tx_fail_count = slave_nb_bufs[i] -
1358                                                 slave_tx_count;
1359                                 total_tx_fail_count += slave_tx_fail_count;
1360
1361                                 memcpy(&bufs[nb_bufs - total_tx_fail_count],
1362                                        &slave_bufs[i][slave_tx_count],
1363                                        slave_tx_fail_count * sizeof(bufs[0]));
1364                         }
1365                 }
1366         }
1367
1368         /* Check for LACP control packets and send if available */
1369         for (i = 0; i < slave_count; i++) {
1370                 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1371                 struct rte_mbuf *ctrl_pkt = NULL;
1372
1373                 if (likely(rte_ring_empty(port->tx_ring)))
1374                         continue;
1375
1376                 if (rte_ring_dequeue(port->tx_ring,
1377                                      (void **)&ctrl_pkt) != -ENOENT) {
1378                         slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1379                                         bd_tx_q->queue_id, &ctrl_pkt, 1);
1380                         /*
1381                          * re-enqueue LAG control plane packets to buffering
1382                          * ring if transmission fails so the packet isn't lost.
1383                          */
1384                         if (slave_tx_count != 1)
1385                                 rte_ring_enqueue(port->tx_ring, ctrl_pkt);
1386                 }
1387         }
1388
1389         return total_tx_count;
1390 }
1391
1392 static uint16_t
1393 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1394                 uint16_t nb_pkts)
1395 {
1396         struct bond_dev_private *internals;
1397         struct bond_tx_queue *bd_tx_q;
1398
1399         uint8_t tx_failed_flag = 0, num_of_slaves;
1400         uint16_t slaves[RTE_MAX_ETHPORTS];
1401
1402         uint16_t max_nb_of_tx_pkts = 0;
1403
1404         int slave_tx_total[RTE_MAX_ETHPORTS];
1405         int i, most_successful_tx_slave = -1;
1406
1407         bd_tx_q = (struct bond_tx_queue *)queue;
1408         internals = bd_tx_q->dev_private;
1409
1410         /* Copy slave list to protect against slave up/down changes during tx
1411          * bursting */
1412         num_of_slaves = internals->active_slave_count;
1413         memcpy(slaves, internals->active_slaves,
1414                         sizeof(internals->active_slaves[0]) * num_of_slaves);
1415
1416         if (num_of_slaves < 1)
1417                 return 0;
1418
1419         /* Increment reference count on mbufs */
1420         for (i = 0; i < nb_pkts; i++)
1421                 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1422
1423         /* Transmit burst on each active slave */
1424         for (i = 0; i < num_of_slaves; i++) {
1425                 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1426                                         bufs, nb_pkts);
1427
1428                 if (unlikely(slave_tx_total[i] < nb_pkts))
1429                         tx_failed_flag = 1;
1430
1431                 /* record the value and slave index for the slave which transmits the
1432                  * maximum number of packets */
1433                 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1434                         max_nb_of_tx_pkts = slave_tx_total[i];
1435                         most_successful_tx_slave = i;
1436                 }
1437         }
1438
1439         /* if slaves fail to transmit packets from burst, the calling application
1440          * is not expected to know about multiple references to packets so we must
1441          * handle failures of all packets except those of the most successful slave
1442          */
1443         if (unlikely(tx_failed_flag))
1444                 for (i = 0; i < num_of_slaves; i++)
1445                         if (i != most_successful_tx_slave)
1446                                 while (slave_tx_total[i] < nb_pkts)
1447                                         rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1448
1449         return max_nb_of_tx_pkts;
1450 }
1451
1452 void
1453 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1454 {
1455         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1456
1457         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1458                 /**
1459                  * If in mode 4 then save the link properties of the first
1460                  * slave, all subsequent slaves must match these properties
1461                  */
1462                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1463
1464                 bond_link->link_autoneg = slave_link->link_autoneg;
1465                 bond_link->link_duplex = slave_link->link_duplex;
1466                 bond_link->link_speed = slave_link->link_speed;
1467         } else {
1468                 /**
1469                  * In any other mode the link properties are set to default
1470                  * values of AUTONEG/DUPLEX
1471                  */
1472                 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1473                 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1474         }
1475 }
1476
1477 int
1478 link_properties_valid(struct rte_eth_dev *ethdev,
1479                 struct rte_eth_link *slave_link)
1480 {
1481         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1482
1483         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1484                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1485
1486                 if (bond_link->link_duplex != slave_link->link_duplex ||
1487                         bond_link->link_autoneg != slave_link->link_autoneg ||
1488                         bond_link->link_speed != slave_link->link_speed)
1489                         return -1;
1490         }
1491
1492         return 0;
1493 }
1494
1495 int
1496 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1497 {
1498         struct ether_addr *mac_addr;
1499
1500         if (eth_dev == NULL) {
1501                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1502                 return -1;
1503         }
1504
1505         if (dst_mac_addr == NULL) {
1506                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1507                 return -1;
1508         }
1509
1510         mac_addr = eth_dev->data->mac_addrs;
1511
1512         ether_addr_copy(mac_addr, dst_mac_addr);
1513         return 0;
1514 }
1515
1516 int
1517 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1518 {
1519         struct ether_addr *mac_addr;
1520
1521         if (eth_dev == NULL) {
1522                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1523                 return -1;
1524         }
1525
1526         if (new_mac_addr == NULL) {
1527                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1528                 return -1;
1529         }
1530
1531         mac_addr = eth_dev->data->mac_addrs;
1532
1533         /* If new MAC is different to current MAC then update */
1534         if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1535                 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1536
1537         return 0;
1538 }
1539
1540 static const struct ether_addr null_mac_addr;
1541
1542 /*
1543  * Add additional MAC addresses to the slave
1544  */
1545 int
1546 slave_add_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1547                 uint16_t slave_port_id)
1548 {
1549         int i, ret;
1550         struct ether_addr *mac_addr;
1551
1552         for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1553                 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1554                 if (is_same_ether_addr(mac_addr, &null_mac_addr))
1555                         break;
1556
1557                 ret = rte_eth_dev_mac_addr_add(slave_port_id, mac_addr, 0);
1558                 if (ret < 0) {
1559                         /* rollback */
1560                         for (i--; i > 0; i--)
1561                                 rte_eth_dev_mac_addr_remove(slave_port_id,
1562                                         &bonded_eth_dev->data->mac_addrs[i]);
1563                         return ret;
1564                 }
1565         }
1566
1567         return 0;
1568 }
1569
1570 /*
1571  * Remove additional MAC addresses from the slave
1572  */
1573 int
1574 slave_remove_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1575                 uint16_t slave_port_id)
1576 {
1577         int i, rc, ret;
1578         struct ether_addr *mac_addr;
1579
1580         rc = 0;
1581         for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1582                 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1583                 if (is_same_ether_addr(mac_addr, &null_mac_addr))
1584                         break;
1585
1586                 ret = rte_eth_dev_mac_addr_remove(slave_port_id, mac_addr);
1587                 /* save only the first error */
1588                 if (ret < 0 && rc == 0)
1589                         rc = ret;
1590         }
1591
1592         return rc;
1593 }
1594
1595 int
1596 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1597 {
1598         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1599         int i;
1600
1601         /* Update slave devices MAC addresses */
1602         if (internals->slave_count < 1)
1603                 return -1;
1604
1605         switch (internals->mode) {
1606         case BONDING_MODE_ROUND_ROBIN:
1607         case BONDING_MODE_BALANCE:
1608         case BONDING_MODE_BROADCAST:
1609                 for (i = 0; i < internals->slave_count; i++) {
1610                         if (rte_eth_dev_default_mac_addr_set(
1611                                         internals->slaves[i].port_id,
1612                                         bonded_eth_dev->data->mac_addrs)) {
1613                                 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1614                                                 internals->slaves[i].port_id);
1615                                 return -1;
1616                         }
1617                 }
1618                 break;
1619         case BONDING_MODE_8023AD:
1620                 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1621                 break;
1622         case BONDING_MODE_ACTIVE_BACKUP:
1623         case BONDING_MODE_TLB:
1624         case BONDING_MODE_ALB:
1625         default:
1626                 for (i = 0; i < internals->slave_count; i++) {
1627                         if (internals->slaves[i].port_id ==
1628                                         internals->current_primary_port) {
1629                                 if (rte_eth_dev_default_mac_addr_set(
1630                                                 internals->primary_port,
1631                                                 bonded_eth_dev->data->mac_addrs)) {
1632                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1633                                                         internals->current_primary_port);
1634                                         return -1;
1635                                 }
1636                         } else {
1637                                 if (rte_eth_dev_default_mac_addr_set(
1638                                                 internals->slaves[i].port_id,
1639                                                 &internals->slaves[i].persisted_mac_addr)) {
1640                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1641                                                         internals->slaves[i].port_id);
1642                                         return -1;
1643                                 }
1644                         }
1645                 }
1646         }
1647
1648         return 0;
1649 }
1650
1651 int
1652 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1653 {
1654         struct bond_dev_private *internals;
1655
1656         internals = eth_dev->data->dev_private;
1657
1658         switch (mode) {
1659         case BONDING_MODE_ROUND_ROBIN:
1660                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1661                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1662                 break;
1663         case BONDING_MODE_ACTIVE_BACKUP:
1664                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1665                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1666                 break;
1667         case BONDING_MODE_BALANCE:
1668                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1669                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1670                 break;
1671         case BONDING_MODE_BROADCAST:
1672                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1673                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1674                 break;
1675         case BONDING_MODE_8023AD:
1676                 if (bond_mode_8023ad_enable(eth_dev) != 0)
1677                         return -1;
1678
1679                 if (internals->mode4.dedicated_queues.enabled == 0) {
1680                         eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1681                         eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1682                         RTE_BOND_LOG(WARNING,
1683                                 "Using mode 4, it is necessary to do TX burst "
1684                                 "and RX burst at least every 100ms.");
1685                 } else {
1686                         /* Use flow director's optimization */
1687                         eth_dev->rx_pkt_burst =
1688                                         bond_ethdev_rx_burst_8023ad_fast_queue;
1689                         eth_dev->tx_pkt_burst =
1690                                         bond_ethdev_tx_burst_8023ad_fast_queue;
1691                 }
1692                 break;
1693         case BONDING_MODE_TLB:
1694                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1695                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1696                 break;
1697         case BONDING_MODE_ALB:
1698                 if (bond_mode_alb_enable(eth_dev) != 0)
1699                         return -1;
1700
1701                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1702                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1703                 break;
1704         default:
1705                 return -1;
1706         }
1707
1708         internals->mode = mode;
1709
1710         return 0;
1711 }
1712
1713
1714 static int
1715 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1716                 struct rte_eth_dev *slave_eth_dev)
1717 {
1718         int errval = 0;
1719         struct bond_dev_private *internals = (struct bond_dev_private *)
1720                 bonded_eth_dev->data->dev_private;
1721         struct port *port = &bond_mode_8023ad_ports[slave_eth_dev->data->port_id];
1722
1723         if (port->slow_pool == NULL) {
1724                 char mem_name[256];
1725                 int slave_id = slave_eth_dev->data->port_id;
1726
1727                 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1728                                 slave_id);
1729                 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1730                         250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1731                         slave_eth_dev->data->numa_node);
1732
1733                 /* Any memory allocation failure in initialization is critical because
1734                  * resources can't be free, so reinitialization is impossible. */
1735                 if (port->slow_pool == NULL) {
1736                         rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1737                                 slave_id, mem_name, rte_strerror(rte_errno));
1738                 }
1739         }
1740
1741         if (internals->mode4.dedicated_queues.enabled == 1) {
1742                 /* Configure slow Rx queue */
1743
1744                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1745                                 internals->mode4.dedicated_queues.rx_qid, 128,
1746                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1747                                 NULL, port->slow_pool);
1748                 if (errval != 0) {
1749                         RTE_BOND_LOG(ERR,
1750                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1751                                         slave_eth_dev->data->port_id,
1752                                         internals->mode4.dedicated_queues.rx_qid,
1753                                         errval);
1754                         return errval;
1755                 }
1756
1757                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1758                                 internals->mode4.dedicated_queues.tx_qid, 512,
1759                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1760                                 NULL);
1761                 if (errval != 0) {
1762                         RTE_BOND_LOG(ERR,
1763                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1764                                 slave_eth_dev->data->port_id,
1765                                 internals->mode4.dedicated_queues.tx_qid,
1766                                 errval);
1767                         return errval;
1768                 }
1769         }
1770         return 0;
1771 }
1772
1773 int
1774 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1775                 struct rte_eth_dev *slave_eth_dev)
1776 {
1777         struct bond_rx_queue *bd_rx_q;
1778         struct bond_tx_queue *bd_tx_q;
1779         uint16_t nb_rx_queues;
1780         uint16_t nb_tx_queues;
1781
1782         int errval;
1783         uint16_t q_id;
1784         struct rte_flow_error flow_error;
1785
1786         struct bond_dev_private *internals = (struct bond_dev_private *)
1787                 bonded_eth_dev->data->dev_private;
1788
1789         /* Stop slave */
1790         rte_eth_dev_stop(slave_eth_dev->data->port_id);
1791
1792         /* Enable interrupts on slave device if supported */
1793         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1794                 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1795
1796         /* If RSS is enabled for bonding, try to enable it for slaves  */
1797         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1798                 if (internals->rss_key_len != 0) {
1799                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1800                                         internals->rss_key_len;
1801                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1802                                         internals->rss_key;
1803                 } else {
1804                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1805                 }
1806
1807                 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1808                                 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1809                 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1810                                 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1811         }
1812
1813         if (bonded_eth_dev->data->dev_conf.rxmode.offloads &
1814                         DEV_RX_OFFLOAD_VLAN_FILTER)
1815                 slave_eth_dev->data->dev_conf.rxmode.offloads |=
1816                                 DEV_RX_OFFLOAD_VLAN_FILTER;
1817         else
1818                 slave_eth_dev->data->dev_conf.rxmode.offloads &=
1819                                 ~DEV_RX_OFFLOAD_VLAN_FILTER;
1820
1821         nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1822         nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1823
1824         if (internals->mode == BONDING_MODE_8023AD) {
1825                 if (internals->mode4.dedicated_queues.enabled == 1) {
1826                         nb_rx_queues++;
1827                         nb_tx_queues++;
1828                 }
1829         }
1830
1831         errval = rte_eth_dev_set_mtu(slave_eth_dev->data->port_id,
1832                                      bonded_eth_dev->data->mtu);
1833         if (errval != 0 && errval != -ENOTSUP) {
1834                 RTE_BOND_LOG(ERR, "rte_eth_dev_set_mtu: port %u, err (%d)",
1835                                 slave_eth_dev->data->port_id, errval);
1836                 return errval;
1837         }
1838
1839         /* Configure device */
1840         errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1841                         nb_rx_queues, nb_tx_queues,
1842                         &(slave_eth_dev->data->dev_conf));
1843         if (errval != 0) {
1844                 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u, err (%d)",
1845                                 slave_eth_dev->data->port_id, errval);
1846                 return errval;
1847         }
1848
1849         /* Setup Rx Queues */
1850         for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1851                 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1852
1853                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1854                                 bd_rx_q->nb_rx_desc,
1855                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1856                                 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1857                 if (errval != 0) {
1858                         RTE_BOND_LOG(ERR,
1859                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1860                                         slave_eth_dev->data->port_id, q_id, errval);
1861                         return errval;
1862                 }
1863         }
1864
1865         /* Setup Tx Queues */
1866         for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1867                 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1868
1869                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1870                                 bd_tx_q->nb_tx_desc,
1871                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1872                                 &bd_tx_q->tx_conf);
1873                 if (errval != 0) {
1874                         RTE_BOND_LOG(ERR,
1875                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1876                                 slave_eth_dev->data->port_id, q_id, errval);
1877                         return errval;
1878                 }
1879         }
1880
1881         if (internals->mode == BONDING_MODE_8023AD &&
1882                         internals->mode4.dedicated_queues.enabled == 1) {
1883                 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1884                                 != 0)
1885                         return errval;
1886
1887                 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1888                                 slave_eth_dev->data->port_id) != 0) {
1889                         RTE_BOND_LOG(ERR,
1890                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1891                                 slave_eth_dev->data->port_id, q_id, errval);
1892                         return -1;
1893                 }
1894
1895                 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1896                         rte_flow_destroy(slave_eth_dev->data->port_id,
1897                                         internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1898                                         &flow_error);
1899
1900                 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1901                                 slave_eth_dev->data->port_id);
1902         }
1903
1904         /* Start device */
1905         errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1906         if (errval != 0) {
1907                 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1908                                 slave_eth_dev->data->port_id, errval);
1909                 return -1;
1910         }
1911
1912         /* If RSS is enabled for bonding, synchronize RETA */
1913         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1914                 int i;
1915                 struct bond_dev_private *internals;
1916
1917                 internals = bonded_eth_dev->data->dev_private;
1918
1919                 for (i = 0; i < internals->slave_count; i++) {
1920                         if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1921                                 errval = rte_eth_dev_rss_reta_update(
1922                                                 slave_eth_dev->data->port_id,
1923                                                 &internals->reta_conf[0],
1924                                                 internals->slaves[i].reta_size);
1925                                 if (errval != 0) {
1926                                         RTE_BOND_LOG(WARNING,
1927                                                      "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1928                                                      " RSS Configuration for bonding may be inconsistent.",
1929                                                      slave_eth_dev->data->port_id, errval);
1930                                 }
1931                                 break;
1932                         }
1933                 }
1934         }
1935
1936         /* If lsc interrupt is set, check initial slave's link status */
1937         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1938                 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1939                 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1940                         RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1941                         NULL);
1942         }
1943
1944         return 0;
1945 }
1946
1947 void
1948 slave_remove(struct bond_dev_private *internals,
1949                 struct rte_eth_dev *slave_eth_dev)
1950 {
1951         uint8_t i;
1952
1953         for (i = 0; i < internals->slave_count; i++)
1954                 if (internals->slaves[i].port_id ==
1955                                 slave_eth_dev->data->port_id)
1956                         break;
1957
1958         if (i < (internals->slave_count - 1)) {
1959                 struct rte_flow *flow;
1960
1961                 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1962                                 sizeof(internals->slaves[0]) *
1963                                 (internals->slave_count - i - 1));
1964                 TAILQ_FOREACH(flow, &internals->flow_list, next) {
1965                         memmove(&flow->flows[i], &flow->flows[i + 1],
1966                                 sizeof(flow->flows[0]) *
1967                                 (internals->slave_count - i - 1));
1968                         flow->flows[internals->slave_count - 1] = NULL;
1969                 }
1970         }
1971
1972         internals->slave_count--;
1973
1974         /* force reconfiguration of slave interfaces */
1975         _rte_eth_dev_reset(slave_eth_dev);
1976 }
1977
1978 static void
1979 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1980
1981 void
1982 slave_add(struct bond_dev_private *internals,
1983                 struct rte_eth_dev *slave_eth_dev)
1984 {
1985         struct bond_slave_details *slave_details =
1986                         &internals->slaves[internals->slave_count];
1987
1988         slave_details->port_id = slave_eth_dev->data->port_id;
1989         slave_details->last_link_status = 0;
1990
1991         /* Mark slave devices that don't support interrupts so we can
1992          * compensate when we start the bond
1993          */
1994         if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1995                 slave_details->link_status_poll_enabled = 1;
1996         }
1997
1998         slave_details->link_status_wait_to_complete = 0;
1999         /* clean tlb_last_obytes when adding port for bonding device */
2000         memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
2001                         sizeof(struct ether_addr));
2002 }
2003
2004 void
2005 bond_ethdev_primary_set(struct bond_dev_private *internals,
2006                 uint16_t slave_port_id)
2007 {
2008         int i;
2009
2010         if (internals->active_slave_count < 1)
2011                 internals->current_primary_port = slave_port_id;
2012         else
2013                 /* Search bonded device slave ports for new proposed primary port */
2014                 for (i = 0; i < internals->active_slave_count; i++) {
2015                         if (internals->active_slaves[i] == slave_port_id)
2016                                 internals->current_primary_port = slave_port_id;
2017                 }
2018 }
2019
2020 static void
2021 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
2022
2023 static int
2024 bond_ethdev_start(struct rte_eth_dev *eth_dev)
2025 {
2026         struct bond_dev_private *internals;
2027         int i;
2028
2029         /* slave eth dev will be started by bonded device */
2030         if (check_for_bonded_ethdev(eth_dev)) {
2031                 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
2032                                 eth_dev->data->port_id);
2033                 return -1;
2034         }
2035
2036         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2037         eth_dev->data->dev_started = 1;
2038
2039         internals = eth_dev->data->dev_private;
2040
2041         if (internals->slave_count == 0) {
2042                 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
2043                 goto out_err;
2044         }
2045
2046         if (internals->user_defined_mac == 0) {
2047                 struct ether_addr *new_mac_addr = NULL;
2048
2049                 for (i = 0; i < internals->slave_count; i++)
2050                         if (internals->slaves[i].port_id == internals->primary_port)
2051                                 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
2052
2053                 if (new_mac_addr == NULL)
2054                         goto out_err;
2055
2056                 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
2057                         RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
2058                                         eth_dev->data->port_id);
2059                         goto out_err;
2060                 }
2061         }
2062
2063         /* If bonded device is configure in promiscuous mode then re-apply config */
2064         if (internals->promiscuous_en)
2065                 bond_ethdev_promiscuous_enable(eth_dev);
2066
2067         if (internals->mode == BONDING_MODE_8023AD) {
2068                 if (internals->mode4.dedicated_queues.enabled == 1) {
2069                         internals->mode4.dedicated_queues.rx_qid =
2070                                         eth_dev->data->nb_rx_queues;
2071                         internals->mode4.dedicated_queues.tx_qid =
2072                                         eth_dev->data->nb_tx_queues;
2073                 }
2074         }
2075
2076
2077         /* Reconfigure each slave device if starting bonded device */
2078         for (i = 0; i < internals->slave_count; i++) {
2079                 struct rte_eth_dev *slave_ethdev =
2080                                 &(rte_eth_devices[internals->slaves[i].port_id]);
2081                 if (slave_configure(eth_dev, slave_ethdev) != 0) {
2082                         RTE_BOND_LOG(ERR,
2083                                 "bonded port (%d) failed to reconfigure slave device (%d)",
2084                                 eth_dev->data->port_id,
2085                                 internals->slaves[i].port_id);
2086                         goto out_err;
2087                 }
2088                 /* We will need to poll for link status if any slave doesn't
2089                  * support interrupts
2090                  */
2091                 if (internals->slaves[i].link_status_poll_enabled)
2092                         internals->link_status_polling_enabled = 1;
2093         }
2094
2095         /* start polling if needed */
2096         if (internals->link_status_polling_enabled) {
2097                 rte_eal_alarm_set(
2098                         internals->link_status_polling_interval_ms * 1000,
2099                         bond_ethdev_slave_link_status_change_monitor,
2100                         (void *)&rte_eth_devices[internals->port_id]);
2101         }
2102
2103         /* Update all slave devices MACs*/
2104         if (mac_address_slaves_update(eth_dev) != 0)
2105                 goto out_err;
2106
2107         if (internals->user_defined_primary_port)
2108                 bond_ethdev_primary_set(internals, internals->primary_port);
2109
2110         if (internals->mode == BONDING_MODE_8023AD)
2111                 bond_mode_8023ad_start(eth_dev);
2112
2113         if (internals->mode == BONDING_MODE_TLB ||
2114                         internals->mode == BONDING_MODE_ALB)
2115                 bond_tlb_enable(internals);
2116
2117         return 0;
2118
2119 out_err:
2120         eth_dev->data->dev_started = 0;
2121         return -1;
2122 }
2123
2124 static void
2125 bond_ethdev_free_queues(struct rte_eth_dev *dev)
2126 {
2127         uint8_t i;
2128
2129         if (dev->data->rx_queues != NULL) {
2130                 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2131                         rte_free(dev->data->rx_queues[i]);
2132                         dev->data->rx_queues[i] = NULL;
2133                 }
2134                 dev->data->nb_rx_queues = 0;
2135         }
2136
2137         if (dev->data->tx_queues != NULL) {
2138                 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2139                         rte_free(dev->data->tx_queues[i]);
2140                         dev->data->tx_queues[i] = NULL;
2141                 }
2142                 dev->data->nb_tx_queues = 0;
2143         }
2144 }
2145
2146 void
2147 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2148 {
2149         struct bond_dev_private *internals = eth_dev->data->dev_private;
2150         uint8_t i;
2151
2152         if (internals->mode == BONDING_MODE_8023AD) {
2153                 struct port *port;
2154                 void *pkt = NULL;
2155
2156                 bond_mode_8023ad_stop(eth_dev);
2157
2158                 /* Discard all messages to/from mode 4 state machines */
2159                 for (i = 0; i < internals->active_slave_count; i++) {
2160                         port = &bond_mode_8023ad_ports[internals->active_slaves[i]];
2161
2162                         RTE_ASSERT(port->rx_ring != NULL);
2163                         while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2164                                 rte_pktmbuf_free(pkt);
2165
2166                         RTE_ASSERT(port->tx_ring != NULL);
2167                         while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2168                                 rte_pktmbuf_free(pkt);
2169                 }
2170         }
2171
2172         if (internals->mode == BONDING_MODE_TLB ||
2173                         internals->mode == BONDING_MODE_ALB) {
2174                 bond_tlb_disable(internals);
2175                 for (i = 0; i < internals->active_slave_count; i++)
2176                         tlb_last_obytets[internals->active_slaves[i]] = 0;
2177         }
2178
2179         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2180         eth_dev->data->dev_started = 0;
2181
2182         internals->link_status_polling_enabled = 0;
2183         for (i = 0; i < internals->slave_count; i++) {
2184                 uint16_t slave_id = internals->slaves[i].port_id;
2185                 if (find_slave_by_id(internals->active_slaves,
2186                                 internals->active_slave_count, slave_id) !=
2187                                                 internals->active_slave_count) {
2188                         internals->slaves[i].last_link_status = 0;
2189                         rte_eth_dev_stop(slave_id);
2190                         deactivate_slave(eth_dev, slave_id);
2191                 }
2192         }
2193 }
2194
2195 void
2196 bond_ethdev_close(struct rte_eth_dev *dev)
2197 {
2198         struct bond_dev_private *internals = dev->data->dev_private;
2199         uint8_t bond_port_id = internals->port_id;
2200         int skipped = 0;
2201         struct rte_flow_error ferror;
2202
2203         RTE_BOND_LOG(INFO, "Closing bonded device %s", dev->device->name);
2204         while (internals->slave_count != skipped) {
2205                 uint16_t port_id = internals->slaves[skipped].port_id;
2206
2207                 rte_eth_dev_stop(port_id);
2208
2209                 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2210                         RTE_BOND_LOG(ERR,
2211                                      "Failed to remove port %d from bonded device %s",
2212                                      port_id, dev->device->name);
2213                         skipped++;
2214                 }
2215         }
2216         bond_flow_ops.flush(dev, &ferror);
2217         bond_ethdev_free_queues(dev);
2218         rte_bitmap_reset(internals->vlan_filter_bmp);
2219 }
2220
2221 /* forward declaration */
2222 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2223
2224 static void
2225 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2226 {
2227         struct bond_dev_private *internals = dev->data->dev_private;
2228
2229         uint16_t max_nb_rx_queues = UINT16_MAX;
2230         uint16_t max_nb_tx_queues = UINT16_MAX;
2231
2232         dev_info->max_mac_addrs = BOND_MAX_MAC_ADDRS;
2233
2234         dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2235                         internals->candidate_max_rx_pktlen :
2236                         ETHER_MAX_JUMBO_FRAME_LEN;
2237
2238         /* Max number of tx/rx queues that the bonded device can support is the
2239          * minimum values of the bonded slaves, as all slaves must be capable
2240          * of supporting the same number of tx/rx queues.
2241          */
2242         if (internals->slave_count > 0) {
2243                 struct rte_eth_dev_info slave_info;
2244                 uint8_t idx;
2245
2246                 for (idx = 0; idx < internals->slave_count; idx++) {
2247                         rte_eth_dev_info_get(internals->slaves[idx].port_id,
2248                                         &slave_info);
2249
2250                         if (slave_info.max_rx_queues < max_nb_rx_queues)
2251                                 max_nb_rx_queues = slave_info.max_rx_queues;
2252
2253                         if (slave_info.max_tx_queues < max_nb_tx_queues)
2254                                 max_nb_tx_queues = slave_info.max_tx_queues;
2255                 }
2256         }
2257
2258         dev_info->max_rx_queues = max_nb_rx_queues;
2259         dev_info->max_tx_queues = max_nb_tx_queues;
2260
2261         memcpy(&dev_info->default_rxconf, &internals->default_rxconf,
2262                sizeof(dev_info->default_rxconf));
2263         memcpy(&dev_info->default_txconf, &internals->default_txconf,
2264                sizeof(dev_info->default_txconf));
2265
2266         memcpy(&dev_info->rx_desc_lim, &internals->rx_desc_lim,
2267                sizeof(dev_info->rx_desc_lim));
2268         memcpy(&dev_info->tx_desc_lim, &internals->tx_desc_lim,
2269                sizeof(dev_info->tx_desc_lim));
2270
2271         /**
2272          * If dedicated hw queues enabled for link bonding device in LACP mode
2273          * then we need to reduce the maximum number of data path queues by 1.
2274          */
2275         if (internals->mode == BONDING_MODE_8023AD &&
2276                 internals->mode4.dedicated_queues.enabled == 1) {
2277                 dev_info->max_rx_queues--;
2278                 dev_info->max_tx_queues--;
2279         }
2280
2281         dev_info->min_rx_bufsize = 0;
2282
2283         dev_info->rx_offload_capa = internals->rx_offload_capa;
2284         dev_info->tx_offload_capa = internals->tx_offload_capa;
2285         dev_info->rx_queue_offload_capa = internals->rx_queue_offload_capa;
2286         dev_info->tx_queue_offload_capa = internals->tx_queue_offload_capa;
2287         dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2288
2289         dev_info->reta_size = internals->reta_size;
2290 }
2291
2292 static int
2293 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2294 {
2295         int res;
2296         uint16_t i;
2297         struct bond_dev_private *internals = dev->data->dev_private;
2298
2299         /* don't do this while a slave is being added */
2300         rte_spinlock_lock(&internals->lock);
2301
2302         if (on)
2303                 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2304         else
2305                 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2306
2307         for (i = 0; i < internals->slave_count; i++) {
2308                 uint16_t port_id = internals->slaves[i].port_id;
2309
2310                 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2311                 if (res == ENOTSUP)
2312                         RTE_BOND_LOG(WARNING,
2313                                      "Setting VLAN filter on slave port %u not supported.",
2314                                      port_id);
2315         }
2316
2317         rte_spinlock_unlock(&internals->lock);
2318         return 0;
2319 }
2320
2321 static int
2322 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2323                 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2324                 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2325 {
2326         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2327                         rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2328                                         0, dev->data->numa_node);
2329         if (bd_rx_q == NULL)
2330                 return -1;
2331
2332         bd_rx_q->queue_id = rx_queue_id;
2333         bd_rx_q->dev_private = dev->data->dev_private;
2334
2335         bd_rx_q->nb_rx_desc = nb_rx_desc;
2336
2337         memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2338         bd_rx_q->mb_pool = mb_pool;
2339
2340         dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2341
2342         return 0;
2343 }
2344
2345 static int
2346 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2347                 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2348                 const struct rte_eth_txconf *tx_conf)
2349 {
2350         struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
2351                         rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2352                                         0, dev->data->numa_node);
2353
2354         if (bd_tx_q == NULL)
2355                 return -1;
2356
2357         bd_tx_q->queue_id = tx_queue_id;
2358         bd_tx_q->dev_private = dev->data->dev_private;
2359
2360         bd_tx_q->nb_tx_desc = nb_tx_desc;
2361         memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2362
2363         dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2364
2365         return 0;
2366 }
2367
2368 static void
2369 bond_ethdev_rx_queue_release(void *queue)
2370 {
2371         if (queue == NULL)
2372                 return;
2373
2374         rte_free(queue);
2375 }
2376
2377 static void
2378 bond_ethdev_tx_queue_release(void *queue)
2379 {
2380         if (queue == NULL)
2381                 return;
2382
2383         rte_free(queue);
2384 }
2385
2386 static void
2387 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2388 {
2389         struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2390         struct bond_dev_private *internals;
2391
2392         /* Default value for polling slave found is true as we don't want to
2393          * disable the polling thread if we cannot get the lock */
2394         int i, polling_slave_found = 1;
2395
2396         if (cb_arg == NULL)
2397                 return;
2398
2399         bonded_ethdev = (struct rte_eth_dev *)cb_arg;
2400         internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
2401
2402         if (!bonded_ethdev->data->dev_started ||
2403                 !internals->link_status_polling_enabled)
2404                 return;
2405
2406         /* If device is currently being configured then don't check slaves link
2407          * status, wait until next period */
2408         if (rte_spinlock_trylock(&internals->lock)) {
2409                 if (internals->slave_count > 0)
2410                         polling_slave_found = 0;
2411
2412                 for (i = 0; i < internals->slave_count; i++) {
2413                         if (!internals->slaves[i].link_status_poll_enabled)
2414                                 continue;
2415
2416                         slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2417                         polling_slave_found = 1;
2418
2419                         /* Update slave link status */
2420                         (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2421                                         internals->slaves[i].link_status_wait_to_complete);
2422
2423                         /* if link status has changed since last checked then call lsc
2424                          * event callback */
2425                         if (slave_ethdev->data->dev_link.link_status !=
2426                                         internals->slaves[i].last_link_status) {
2427                                 internals->slaves[i].last_link_status =
2428                                                 slave_ethdev->data->dev_link.link_status;
2429
2430                                 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2431                                                 RTE_ETH_EVENT_INTR_LSC,
2432                                                 &bonded_ethdev->data->port_id,
2433                                                 NULL);
2434                         }
2435                 }
2436                 rte_spinlock_unlock(&internals->lock);
2437         }
2438
2439         if (polling_slave_found)
2440                 /* Set alarm to continue monitoring link status of slave ethdev's */
2441                 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2442                                 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2443 }
2444
2445 static int
2446 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2447 {
2448         void (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2449
2450         struct bond_dev_private *bond_ctx;
2451         struct rte_eth_link slave_link;
2452
2453         uint32_t idx;
2454
2455         bond_ctx = ethdev->data->dev_private;
2456
2457         ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2458
2459         if (ethdev->data->dev_started == 0 ||
2460                         bond_ctx->active_slave_count == 0) {
2461                 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2462                 return 0;
2463         }
2464
2465         ethdev->data->dev_link.link_status = ETH_LINK_UP;
2466
2467         if (wait_to_complete)
2468                 link_update = rte_eth_link_get;
2469         else
2470                 link_update = rte_eth_link_get_nowait;
2471
2472         switch (bond_ctx->mode) {
2473         case BONDING_MODE_BROADCAST:
2474                 /**
2475                  * Setting link speed to UINT32_MAX to ensure we pick up the
2476                  * value of the first active slave
2477                  */
2478                 ethdev->data->dev_link.link_speed = UINT32_MAX;
2479
2480                 /**
2481                  * link speed is minimum value of all the slaves link speed as
2482                  * packet loss will occur on this slave if transmission at rates
2483                  * greater than this are attempted
2484                  */
2485                 for (idx = 1; idx < bond_ctx->active_slave_count; idx++) {
2486                         link_update(bond_ctx->active_slaves[0], &slave_link);
2487
2488                         if (slave_link.link_speed <
2489                                         ethdev->data->dev_link.link_speed)
2490                                 ethdev->data->dev_link.link_speed =
2491                                                 slave_link.link_speed;
2492                 }
2493                 break;
2494         case BONDING_MODE_ACTIVE_BACKUP:
2495                 /* Current primary slave */
2496                 link_update(bond_ctx->current_primary_port, &slave_link);
2497
2498                 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2499                 break;
2500         case BONDING_MODE_8023AD:
2501                 ethdev->data->dev_link.link_autoneg =
2502                                 bond_ctx->mode4.slave_link.link_autoneg;
2503                 ethdev->data->dev_link.link_duplex =
2504                                 bond_ctx->mode4.slave_link.link_duplex;
2505                 /* fall through to update link speed */
2506         case BONDING_MODE_ROUND_ROBIN:
2507         case BONDING_MODE_BALANCE:
2508         case BONDING_MODE_TLB:
2509         case BONDING_MODE_ALB:
2510         default:
2511                 /**
2512                  * In theses mode the maximum theoretical link speed is the sum
2513                  * of all the slaves
2514                  */
2515                 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2516
2517                 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2518                         link_update(bond_ctx->active_slaves[idx], &slave_link);
2519
2520                         ethdev->data->dev_link.link_speed +=
2521                                         slave_link.link_speed;
2522                 }
2523         }
2524
2525
2526         return 0;
2527 }
2528
2529
2530 static int
2531 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2532 {
2533         struct bond_dev_private *internals = dev->data->dev_private;
2534         struct rte_eth_stats slave_stats;
2535         int i, j;
2536
2537         for (i = 0; i < internals->slave_count; i++) {
2538                 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2539
2540                 stats->ipackets += slave_stats.ipackets;
2541                 stats->opackets += slave_stats.opackets;
2542                 stats->ibytes += slave_stats.ibytes;
2543                 stats->obytes += slave_stats.obytes;
2544                 stats->imissed += slave_stats.imissed;
2545                 stats->ierrors += slave_stats.ierrors;
2546                 stats->oerrors += slave_stats.oerrors;
2547                 stats->rx_nombuf += slave_stats.rx_nombuf;
2548
2549                 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2550                         stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2551                         stats->q_opackets[j] += slave_stats.q_opackets[j];
2552                         stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2553                         stats->q_obytes[j] += slave_stats.q_obytes[j];
2554                         stats->q_errors[j] += slave_stats.q_errors[j];
2555                 }
2556
2557         }
2558
2559         return 0;
2560 }
2561
2562 static void
2563 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2564 {
2565         struct bond_dev_private *internals = dev->data->dev_private;
2566         int i;
2567
2568         for (i = 0; i < internals->slave_count; i++)
2569                 rte_eth_stats_reset(internals->slaves[i].port_id);
2570 }
2571
2572 static void
2573 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2574 {
2575         struct bond_dev_private *internals = eth_dev->data->dev_private;
2576         int i;
2577
2578         internals->promiscuous_en = 1;
2579
2580         switch (internals->mode) {
2581         /* Promiscuous mode is propagated to all slaves */
2582         case BONDING_MODE_ROUND_ROBIN:
2583         case BONDING_MODE_BALANCE:
2584         case BONDING_MODE_BROADCAST:
2585                 for (i = 0; i < internals->slave_count; i++)
2586                         rte_eth_promiscuous_enable(internals->slaves[i].port_id);
2587                 break;
2588         /* In mode4 promiscus mode is managed when slave is added/removed */
2589         case BONDING_MODE_8023AD:
2590                 break;
2591         /* Promiscuous mode is propagated only to primary slave */
2592         case BONDING_MODE_ACTIVE_BACKUP:
2593         case BONDING_MODE_TLB:
2594         case BONDING_MODE_ALB:
2595         default:
2596                 rte_eth_promiscuous_enable(internals->current_primary_port);
2597         }
2598 }
2599
2600 static void
2601 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2602 {
2603         struct bond_dev_private *internals = dev->data->dev_private;
2604         int i;
2605
2606         internals->promiscuous_en = 0;
2607
2608         switch (internals->mode) {
2609         /* Promiscuous mode is propagated to all slaves */
2610         case BONDING_MODE_ROUND_ROBIN:
2611         case BONDING_MODE_BALANCE:
2612         case BONDING_MODE_BROADCAST:
2613                 for (i = 0; i < internals->slave_count; i++)
2614                         rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2615                 break;
2616         /* In mode4 promiscus mode is set managed when slave is added/removed */
2617         case BONDING_MODE_8023AD:
2618                 break;
2619         /* Promiscuous mode is propagated only to primary slave */
2620         case BONDING_MODE_ACTIVE_BACKUP:
2621         case BONDING_MODE_TLB:
2622         case BONDING_MODE_ALB:
2623         default:
2624                 rte_eth_promiscuous_disable(internals->current_primary_port);
2625         }
2626 }
2627
2628 static void
2629 bond_ethdev_delayed_lsc_propagation(void *arg)
2630 {
2631         if (arg == NULL)
2632                 return;
2633
2634         _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2635                         RTE_ETH_EVENT_INTR_LSC, NULL);
2636 }
2637
2638 int
2639 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2640                 void *param, void *ret_param __rte_unused)
2641 {
2642         struct rte_eth_dev *bonded_eth_dev;
2643         struct bond_dev_private *internals;
2644         struct rte_eth_link link;
2645         int rc = -1;
2646
2647         int i, valid_slave = 0;
2648         uint8_t active_pos;
2649         uint8_t lsc_flag = 0;
2650
2651         if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2652                 return rc;
2653
2654         bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
2655
2656         if (check_for_bonded_ethdev(bonded_eth_dev))
2657                 return rc;
2658
2659         internals = bonded_eth_dev->data->dev_private;
2660
2661         /* If the device isn't started don't handle interrupts */
2662         if (!bonded_eth_dev->data->dev_started)
2663                 return rc;
2664
2665         /* verify that port_id is a valid slave of bonded port */
2666         for (i = 0; i < internals->slave_count; i++) {
2667                 if (internals->slaves[i].port_id == port_id) {
2668                         valid_slave = 1;
2669                         break;
2670                 }
2671         }
2672
2673         if (!valid_slave)
2674                 return rc;
2675
2676         /* Synchronize lsc callback parallel calls either by real link event
2677          * from the slaves PMDs or by the bonding PMD itself.
2678          */
2679         rte_spinlock_lock(&internals->lsc_lock);
2680
2681         /* Search for port in active port list */
2682         active_pos = find_slave_by_id(internals->active_slaves,
2683                         internals->active_slave_count, port_id);
2684
2685         rte_eth_link_get_nowait(port_id, &link);
2686         if (link.link_status) {
2687                 if (active_pos < internals->active_slave_count)
2688                         goto link_update;
2689
2690                 /* if no active slave ports then set this port to be primary port */
2691                 if (internals->active_slave_count < 1) {
2692                         /* If first active slave, then change link status */
2693                         bonded_eth_dev->data->dev_link.link_status = ETH_LINK_UP;
2694                         internals->current_primary_port = port_id;
2695                         lsc_flag = 1;
2696
2697                         mac_address_slaves_update(bonded_eth_dev);
2698                 }
2699
2700                 /* check link state properties if bonded link is up*/
2701                 if (bonded_eth_dev->data->dev_link.link_status == ETH_LINK_UP) {
2702                         if (link_properties_valid(bonded_eth_dev, &link) != 0)
2703                                 RTE_BOND_LOG(ERR, "Invalid link properties "
2704                                              "for slave %d in bonding mode %d",
2705                                              port_id, internals->mode);
2706                 } else {
2707                         /* inherit slave link properties */
2708                         link_properties_set(bonded_eth_dev, &link);
2709                 }
2710
2711                 activate_slave(bonded_eth_dev, port_id);
2712
2713                 /* If user has defined the primary port then default to using it */
2714                 if (internals->user_defined_primary_port &&
2715                                 internals->primary_port == port_id)
2716                         bond_ethdev_primary_set(internals, port_id);
2717         } else {
2718                 if (active_pos == internals->active_slave_count)
2719                         goto link_update;
2720
2721                 /* Remove from active slave list */
2722                 deactivate_slave(bonded_eth_dev, port_id);
2723
2724                 if (internals->active_slave_count < 1)
2725                         lsc_flag = 1;
2726
2727                 /* Update primary id, take first active slave from list or if none
2728                  * available set to -1 */
2729                 if (port_id == internals->current_primary_port) {
2730                         if (internals->active_slave_count > 0)
2731                                 bond_ethdev_primary_set(internals,
2732                                                 internals->active_slaves[0]);
2733                         else
2734                                 internals->current_primary_port = internals->primary_port;
2735                 }
2736         }
2737
2738 link_update:
2739         /**
2740          * Update bonded device link properties after any change to active
2741          * slaves
2742          */
2743         bond_ethdev_link_update(bonded_eth_dev, 0);
2744
2745         if (lsc_flag) {
2746                 /* Cancel any possible outstanding interrupts if delays are enabled */
2747                 if (internals->link_up_delay_ms > 0 ||
2748                         internals->link_down_delay_ms > 0)
2749                         rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2750                                         bonded_eth_dev);
2751
2752                 if (bonded_eth_dev->data->dev_link.link_status) {
2753                         if (internals->link_up_delay_ms > 0)
2754                                 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2755                                                 bond_ethdev_delayed_lsc_propagation,
2756                                                 (void *)bonded_eth_dev);
2757                         else
2758                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2759                                                 RTE_ETH_EVENT_INTR_LSC,
2760                                                 NULL);
2761
2762                 } else {
2763                         if (internals->link_down_delay_ms > 0)
2764                                 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2765                                                 bond_ethdev_delayed_lsc_propagation,
2766                                                 (void *)bonded_eth_dev);
2767                         else
2768                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2769                                                 RTE_ETH_EVENT_INTR_LSC,
2770                                                 NULL);
2771                 }
2772         }
2773
2774         rte_spinlock_unlock(&internals->lsc_lock);
2775
2776         return rc;
2777 }
2778
2779 static int
2780 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2781                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2782 {
2783         unsigned i, j;
2784         int result = 0;
2785         int slave_reta_size;
2786         unsigned reta_count;
2787         struct bond_dev_private *internals = dev->data->dev_private;
2788
2789         if (reta_size != internals->reta_size)
2790                 return -EINVAL;
2791
2792          /* Copy RETA table */
2793         reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2794
2795         for (i = 0; i < reta_count; i++) {
2796                 internals->reta_conf[i].mask = reta_conf[i].mask;
2797                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2798                         if ((reta_conf[i].mask >> j) & 0x01)
2799                                 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2800         }
2801
2802         /* Fill rest of array */
2803         for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2804                 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2805                                 sizeof(internals->reta_conf[0]) * reta_count);
2806
2807         /* Propagate RETA over slaves */
2808         for (i = 0; i < internals->slave_count; i++) {
2809                 slave_reta_size = internals->slaves[i].reta_size;
2810                 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2811                                 &internals->reta_conf[0], slave_reta_size);
2812                 if (result < 0)
2813                         return result;
2814         }
2815
2816         return 0;
2817 }
2818
2819 static int
2820 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2821                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2822 {
2823         int i, j;
2824         struct bond_dev_private *internals = dev->data->dev_private;
2825
2826         if (reta_size != internals->reta_size)
2827                 return -EINVAL;
2828
2829          /* Copy RETA table */
2830         for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2831                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2832                         if ((reta_conf[i].mask >> j) & 0x01)
2833                                 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2834
2835         return 0;
2836 }
2837
2838 static int
2839 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2840                 struct rte_eth_rss_conf *rss_conf)
2841 {
2842         int i, result = 0;
2843         struct bond_dev_private *internals = dev->data->dev_private;
2844         struct rte_eth_rss_conf bond_rss_conf;
2845
2846         memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2847
2848         bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2849
2850         if (bond_rss_conf.rss_hf != 0)
2851                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2852
2853         if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2854                         sizeof(internals->rss_key)) {
2855                 if (bond_rss_conf.rss_key_len == 0)
2856                         bond_rss_conf.rss_key_len = 40;
2857                 internals->rss_key_len = bond_rss_conf.rss_key_len;
2858                 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2859                                 internals->rss_key_len);
2860         }
2861
2862         for (i = 0; i < internals->slave_count; i++) {
2863                 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2864                                 &bond_rss_conf);
2865                 if (result < 0)
2866                         return result;
2867         }
2868
2869         return 0;
2870 }
2871
2872 static int
2873 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2874                 struct rte_eth_rss_conf *rss_conf)
2875 {
2876         struct bond_dev_private *internals = dev->data->dev_private;
2877
2878         rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2879         rss_conf->rss_key_len = internals->rss_key_len;
2880         if (rss_conf->rss_key)
2881                 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2882
2883         return 0;
2884 }
2885
2886 static int
2887 bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
2888 {
2889         struct rte_eth_dev *slave_eth_dev;
2890         struct bond_dev_private *internals = dev->data->dev_private;
2891         int ret, i;
2892
2893         rte_spinlock_lock(&internals->lock);
2894
2895         for (i = 0; i < internals->slave_count; i++) {
2896                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2897                 if (*slave_eth_dev->dev_ops->mtu_set == NULL) {
2898                         rte_spinlock_unlock(&internals->lock);
2899                         return -ENOTSUP;
2900                 }
2901         }
2902         for (i = 0; i < internals->slave_count; i++) {
2903                 ret = rte_eth_dev_set_mtu(internals->slaves[i].port_id, mtu);
2904                 if (ret < 0) {
2905                         rte_spinlock_unlock(&internals->lock);
2906                         return ret;
2907                 }
2908         }
2909
2910         rte_spinlock_unlock(&internals->lock);
2911         return 0;
2912 }
2913
2914 static int
2915 bond_ethdev_mac_address_set(struct rte_eth_dev *dev, struct ether_addr *addr)
2916 {
2917         if (mac_address_set(dev, addr)) {
2918                 RTE_BOND_LOG(ERR, "Failed to update MAC address");
2919                 return -EINVAL;
2920         }
2921
2922         return 0;
2923 }
2924
2925 static int
2926 bond_filter_ctrl(struct rte_eth_dev *dev __rte_unused,
2927                  enum rte_filter_type type, enum rte_filter_op op, void *arg)
2928 {
2929         if (type == RTE_ETH_FILTER_GENERIC && op == RTE_ETH_FILTER_GET) {
2930                 *(const void **)arg = &bond_flow_ops;
2931                 return 0;
2932         }
2933         return -ENOTSUP;
2934 }
2935
2936 static int
2937 bond_ethdev_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac_addr,
2938                                 __rte_unused uint32_t index, uint32_t vmdq)
2939 {
2940         struct rte_eth_dev *slave_eth_dev;
2941         struct bond_dev_private *internals = dev->data->dev_private;
2942         int ret, i;
2943
2944         rte_spinlock_lock(&internals->lock);
2945
2946         for (i = 0; i < internals->slave_count; i++) {
2947                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2948                 if (*slave_eth_dev->dev_ops->mac_addr_add == NULL ||
2949                          *slave_eth_dev->dev_ops->mac_addr_remove == NULL) {
2950                         ret = -ENOTSUP;
2951                         goto end;
2952                 }
2953         }
2954
2955         for (i = 0; i < internals->slave_count; i++) {
2956                 ret = rte_eth_dev_mac_addr_add(internals->slaves[i].port_id,
2957                                 mac_addr, vmdq);
2958                 if (ret < 0) {
2959                         /* rollback */
2960                         for (i--; i >= 0; i--)
2961                                 rte_eth_dev_mac_addr_remove(
2962                                         internals->slaves[i].port_id, mac_addr);
2963                         goto end;
2964                 }
2965         }
2966
2967         ret = 0;
2968 end:
2969         rte_spinlock_unlock(&internals->lock);
2970         return ret;
2971 }
2972
2973 static void
2974 bond_ethdev_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
2975 {
2976         struct rte_eth_dev *slave_eth_dev;
2977         struct bond_dev_private *internals = dev->data->dev_private;
2978         int i;
2979
2980         rte_spinlock_lock(&internals->lock);
2981
2982         for (i = 0; i < internals->slave_count; i++) {
2983                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2984                 if (*slave_eth_dev->dev_ops->mac_addr_remove == NULL)
2985                         goto end;
2986         }
2987
2988         struct ether_addr *mac_addr = &dev->data->mac_addrs[index];
2989
2990         for (i = 0; i < internals->slave_count; i++)
2991                 rte_eth_dev_mac_addr_remove(internals->slaves[i].port_id,
2992                                 mac_addr);
2993
2994 end:
2995         rte_spinlock_unlock(&internals->lock);
2996 }
2997
2998 const struct eth_dev_ops default_dev_ops = {
2999         .dev_start            = bond_ethdev_start,
3000         .dev_stop             = bond_ethdev_stop,
3001         .dev_close            = bond_ethdev_close,
3002         .dev_configure        = bond_ethdev_configure,
3003         .dev_infos_get        = bond_ethdev_info,
3004         .vlan_filter_set      = bond_ethdev_vlan_filter_set,
3005         .rx_queue_setup       = bond_ethdev_rx_queue_setup,
3006         .tx_queue_setup       = bond_ethdev_tx_queue_setup,
3007         .rx_queue_release     = bond_ethdev_rx_queue_release,
3008         .tx_queue_release     = bond_ethdev_tx_queue_release,
3009         .link_update          = bond_ethdev_link_update,
3010         .stats_get            = bond_ethdev_stats_get,
3011         .stats_reset          = bond_ethdev_stats_reset,
3012         .promiscuous_enable   = bond_ethdev_promiscuous_enable,
3013         .promiscuous_disable  = bond_ethdev_promiscuous_disable,
3014         .reta_update          = bond_ethdev_rss_reta_update,
3015         .reta_query           = bond_ethdev_rss_reta_query,
3016         .rss_hash_update      = bond_ethdev_rss_hash_update,
3017         .rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get,
3018         .mtu_set              = bond_ethdev_mtu_set,
3019         .mac_addr_set         = bond_ethdev_mac_address_set,
3020         .mac_addr_add         = bond_ethdev_mac_addr_add,
3021         .mac_addr_remove      = bond_ethdev_mac_addr_remove,
3022         .filter_ctrl          = bond_filter_ctrl
3023 };
3024
3025 static int
3026 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
3027 {
3028         const char *name = rte_vdev_device_name(dev);
3029         uint8_t socket_id = dev->device.numa_node;
3030         struct bond_dev_private *internals = NULL;
3031         struct rte_eth_dev *eth_dev = NULL;
3032         uint32_t vlan_filter_bmp_size;
3033
3034         /* now do all data allocation - for eth_dev structure, dummy pci driver
3035          * and internal (private) data
3036          */
3037
3038         /* reserve an ethdev entry */
3039         eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
3040         if (eth_dev == NULL) {
3041                 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
3042                 goto err;
3043         }
3044
3045         internals = eth_dev->data->dev_private;
3046         eth_dev->data->nb_rx_queues = (uint16_t)1;
3047         eth_dev->data->nb_tx_queues = (uint16_t)1;
3048
3049         /* Allocate memory for storing MAC addresses */
3050         eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN *
3051                         BOND_MAX_MAC_ADDRS, 0, socket_id);
3052         if (eth_dev->data->mac_addrs == NULL) {
3053                 RTE_BOND_LOG(ERR,
3054                              "Failed to allocate %u bytes needed to store MAC addresses",
3055                              ETHER_ADDR_LEN * BOND_MAX_MAC_ADDRS);
3056                 goto err;
3057         }
3058
3059         eth_dev->dev_ops = &default_dev_ops;
3060         eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC;
3061
3062         rte_spinlock_init(&internals->lock);
3063         rte_spinlock_init(&internals->lsc_lock);
3064
3065         internals->port_id = eth_dev->data->port_id;
3066         internals->mode = BONDING_MODE_INVALID;
3067         internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
3068         internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
3069         internals->burst_xmit_hash = burst_xmit_l2_hash;
3070         internals->user_defined_mac = 0;
3071
3072         internals->link_status_polling_enabled = 0;
3073
3074         internals->link_status_polling_interval_ms =
3075                 DEFAULT_POLLING_INTERVAL_10_MS;
3076         internals->link_down_delay_ms = 0;
3077         internals->link_up_delay_ms = 0;
3078
3079         internals->slave_count = 0;
3080         internals->active_slave_count = 0;
3081         internals->rx_offload_capa = 0;
3082         internals->tx_offload_capa = 0;
3083         internals->rx_queue_offload_capa = 0;
3084         internals->tx_queue_offload_capa = 0;
3085         internals->candidate_max_rx_pktlen = 0;
3086         internals->max_rx_pktlen = 0;
3087
3088         /* Initially allow to choose any offload type */
3089         internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
3090
3091         memset(&internals->default_rxconf, 0,
3092                sizeof(internals->default_rxconf));
3093         memset(&internals->default_txconf, 0,
3094                sizeof(internals->default_txconf));
3095
3096         memset(&internals->rx_desc_lim, 0, sizeof(internals->rx_desc_lim));
3097         memset(&internals->tx_desc_lim, 0, sizeof(internals->tx_desc_lim));
3098
3099         memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
3100         memset(internals->slaves, 0, sizeof(internals->slaves));
3101
3102         TAILQ_INIT(&internals->flow_list);
3103         internals->flow_isolated_valid = 0;
3104
3105         /* Set mode 4 default configuration */
3106         bond_mode_8023ad_setup(eth_dev, NULL);
3107         if (bond_ethdev_mode_set(eth_dev, mode)) {
3108                 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode to %d",
3109                                  eth_dev->data->port_id, mode);
3110                 goto err;
3111         }
3112
3113         vlan_filter_bmp_size =
3114                 rte_bitmap_get_memory_footprint(ETHER_MAX_VLAN_ID + 1);
3115         internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
3116                                                    RTE_CACHE_LINE_SIZE);
3117         if (internals->vlan_filter_bmpmem == NULL) {
3118                 RTE_BOND_LOG(ERR,
3119                              "Failed to allocate vlan bitmap for bonded device %u",
3120                              eth_dev->data->port_id);
3121                 goto err;
3122         }
3123
3124         internals->vlan_filter_bmp = rte_bitmap_init(ETHER_MAX_VLAN_ID + 1,
3125                         internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
3126         if (internals->vlan_filter_bmp == NULL) {
3127                 RTE_BOND_LOG(ERR,
3128                              "Failed to init vlan bitmap for bonded device %u",
3129                              eth_dev->data->port_id);
3130                 rte_free(internals->vlan_filter_bmpmem);
3131                 goto err;
3132         }
3133
3134         return eth_dev->data->port_id;
3135
3136 err:
3137         rte_free(internals);
3138         if (eth_dev != NULL)
3139                 eth_dev->data->dev_private = NULL;
3140         rte_eth_dev_release_port(eth_dev);
3141         return -1;
3142 }
3143
3144 static int
3145 bond_probe(struct rte_vdev_device *dev)
3146 {
3147         const char *name;
3148         struct bond_dev_private *internals;
3149         struct rte_kvargs *kvlist;
3150         uint8_t bonding_mode, socket_id/*, agg_mode*/;
3151         int  arg_count, port_id;
3152         uint8_t agg_mode;
3153         struct rte_eth_dev *eth_dev;
3154
3155         if (!dev)
3156                 return -EINVAL;
3157
3158         name = rte_vdev_device_name(dev);
3159         RTE_BOND_LOG(INFO, "Initializing pmd_bond for %s", name);
3160
3161         if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
3162                 eth_dev = rte_eth_dev_attach_secondary(name);
3163                 if (!eth_dev) {
3164                         RTE_BOND_LOG(ERR, "Failed to probe %s", name);
3165                         return -1;
3166                 }
3167                 /* TODO: request info from primary to set up Rx and Tx */
3168                 eth_dev->dev_ops = &default_dev_ops;
3169                 eth_dev->device = &dev->device;
3170                 rte_eth_dev_probing_finish(eth_dev);
3171                 return 0;
3172         }
3173
3174         kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
3175                 pmd_bond_init_valid_arguments);
3176         if (kvlist == NULL)
3177                 return -1;
3178
3179         /* Parse link bonding mode */
3180         if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
3181                 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
3182                                 &bond_ethdev_parse_slave_mode_kvarg,
3183                                 &bonding_mode) != 0) {
3184                         RTE_BOND_LOG(ERR, "Invalid mode for bonded device %s",
3185                                         name);
3186                         goto parse_error;
3187                 }
3188         } else {
3189                 RTE_BOND_LOG(ERR, "Mode must be specified only once for bonded "
3190                                 "device %s", name);
3191                 goto parse_error;
3192         }
3193
3194         /* Parse socket id to create bonding device on */
3195         arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
3196         if (arg_count == 1) {
3197                 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
3198                                 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
3199                                 != 0) {
3200                         RTE_BOND_LOG(ERR, "Invalid socket Id specified for "
3201                                         "bonded device %s", name);
3202                         goto parse_error;
3203                 }
3204         } else if (arg_count > 1) {
3205                 RTE_BOND_LOG(ERR, "Socket Id can be specified only once for "
3206                                 "bonded device %s", name);
3207                 goto parse_error;
3208         } else {
3209                 socket_id = rte_socket_id();
3210         }
3211
3212         dev->device.numa_node = socket_id;
3213
3214         /* Create link bonding eth device */
3215         port_id = bond_alloc(dev, bonding_mode);
3216         if (port_id < 0) {
3217                 RTE_BOND_LOG(ERR, "Failed to create socket %s in mode %u on "
3218                                 "socket %u.",   name, bonding_mode, socket_id);
3219                 goto parse_error;
3220         }
3221         internals = rte_eth_devices[port_id].data->dev_private;
3222         internals->kvlist = kvlist;
3223
3224         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3225                 if (rte_kvargs_process(kvlist,
3226                                 PMD_BOND_AGG_MODE_KVARG,
3227                                 &bond_ethdev_parse_slave_agg_mode_kvarg,
3228                                 &agg_mode) != 0) {
3229                         RTE_BOND_LOG(ERR,
3230                                         "Failed to parse agg selection mode for bonded device %s",
3231                                         name);
3232                         goto parse_error;
3233                 }
3234
3235                 if (internals->mode == BONDING_MODE_8023AD)
3236                         internals->mode4.agg_selection = agg_mode;
3237         } else {
3238                 internals->mode4.agg_selection = AGG_STABLE;
3239         }
3240
3241         rte_eth_dev_probing_finish(&rte_eth_devices[port_id]);
3242         RTE_BOND_LOG(INFO, "Create bonded device %s on port %d in mode %u on "
3243                         "socket %u.",   name, port_id, bonding_mode, socket_id);
3244         return 0;
3245
3246 parse_error:
3247         rte_kvargs_free(kvlist);
3248
3249         return -1;
3250 }
3251
3252 static int
3253 bond_remove(struct rte_vdev_device *dev)
3254 {
3255         struct rte_eth_dev *eth_dev;
3256         struct bond_dev_private *internals;
3257         const char *name;
3258
3259         if (!dev)
3260                 return -EINVAL;
3261
3262         name = rte_vdev_device_name(dev);
3263         RTE_BOND_LOG(INFO, "Uninitializing pmd_bond for %s", name);
3264
3265         /* now free all data allocation - for eth_dev structure,
3266          * dummy pci driver and internal (private) data
3267          */
3268
3269         /* find an ethdev entry */
3270         eth_dev = rte_eth_dev_allocated(name);
3271         if (eth_dev == NULL)
3272                 return -ENODEV;
3273
3274         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
3275                 return rte_eth_dev_release_port(eth_dev);
3276
3277         RTE_ASSERT(eth_dev->device == &dev->device);
3278
3279         internals = eth_dev->data->dev_private;
3280         if (internals->slave_count != 0)
3281                 return -EBUSY;
3282
3283         if (eth_dev->data->dev_started == 1) {
3284                 bond_ethdev_stop(eth_dev);
3285                 bond_ethdev_close(eth_dev);
3286         }
3287
3288         eth_dev->dev_ops = NULL;
3289         eth_dev->rx_pkt_burst = NULL;
3290         eth_dev->tx_pkt_burst = NULL;
3291
3292         internals = eth_dev->data->dev_private;
3293         /* Try to release mempool used in mode6. If the bond
3294          * device is not mode6, free the NULL is not problem.
3295          */
3296         rte_mempool_free(internals->mode6.mempool);
3297         rte_bitmap_free(internals->vlan_filter_bmp);
3298         rte_free(internals->vlan_filter_bmpmem);
3299
3300         rte_eth_dev_release_port(eth_dev);
3301
3302         return 0;
3303 }
3304
3305 /* this part will resolve the slave portids after all the other pdev and vdev
3306  * have been allocated */
3307 static int
3308 bond_ethdev_configure(struct rte_eth_dev *dev)
3309 {
3310         const char *name = dev->device->name;
3311         struct bond_dev_private *internals = dev->data->dev_private;
3312         struct rte_kvargs *kvlist = internals->kvlist;
3313         int arg_count;
3314         uint16_t port_id = dev - rte_eth_devices;
3315         uint8_t agg_mode;
3316
3317         static const uint8_t default_rss_key[40] = {
3318                 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3319                 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3320                 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3321                 0xBE, 0xAC, 0x01, 0xFA
3322         };
3323
3324         unsigned i, j;
3325
3326         /*
3327          * If RSS is enabled, fill table with default values and
3328          * set key to the the value specified in port RSS configuration.
3329          * Fall back to default RSS key if the key is not specified
3330          */
3331         if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
3332                 if (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key != NULL) {
3333                         internals->rss_key_len =
3334                                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
3335                         memcpy(internals->rss_key,
3336                                dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key,
3337                                internals->rss_key_len);
3338                 } else {
3339                         internals->rss_key_len = sizeof(default_rss_key);
3340                         memcpy(internals->rss_key, default_rss_key,
3341                                internals->rss_key_len);
3342                 }
3343
3344                 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3345                         internals->reta_conf[i].mask = ~0LL;
3346                         for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3347                                 internals->reta_conf[i].reta[j] =
3348                                                 (i * RTE_RETA_GROUP_SIZE + j) %
3349                                                 dev->data->nb_rx_queues;
3350                 }
3351         }
3352
3353         /* set the max_rx_pktlen */
3354         internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3355
3356         /*
3357          * if no kvlist, it means that this bonded device has been created
3358          * through the bonding api.
3359          */
3360         if (!kvlist)
3361                 return 0;
3362
3363         /* Parse MAC address for bonded device */
3364         arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3365         if (arg_count == 1) {
3366                 struct ether_addr bond_mac;
3367
3368                 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3369                                        &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3370                         RTE_BOND_LOG(INFO, "Invalid mac address for bonded device %s",
3371                                      name);
3372                         return -1;
3373                 }
3374
3375                 /* Set MAC address */
3376                 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3377                         RTE_BOND_LOG(ERR,
3378                                      "Failed to set mac address on bonded device %s",
3379                                      name);
3380                         return -1;
3381                 }
3382         } else if (arg_count > 1) {
3383                 RTE_BOND_LOG(ERR,
3384                              "MAC address can be specified only once for bonded device %s",
3385                              name);
3386                 return -1;
3387         }
3388
3389         /* Parse/set balance mode transmit policy */
3390         arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3391         if (arg_count == 1) {
3392                 uint8_t xmit_policy;
3393
3394                 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3395                                        &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3396                     0) {
3397                         RTE_BOND_LOG(INFO,
3398                                      "Invalid xmit policy specified for bonded device %s",
3399                                      name);
3400                         return -1;
3401                 }
3402
3403                 /* Set balance mode transmit policy*/
3404                 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3405                         RTE_BOND_LOG(ERR,
3406                                      "Failed to set balance xmit policy on bonded device %s",
3407                                      name);
3408                         return -1;
3409                 }
3410         } else if (arg_count > 1) {
3411                 RTE_BOND_LOG(ERR,
3412                              "Transmit policy can be specified only once for bonded device %s",
3413                              name);
3414                 return -1;
3415         }
3416
3417         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3418                 if (rte_kvargs_process(kvlist,
3419                                        PMD_BOND_AGG_MODE_KVARG,
3420                                        &bond_ethdev_parse_slave_agg_mode_kvarg,
3421                                        &agg_mode) != 0) {
3422                         RTE_BOND_LOG(ERR,
3423                                      "Failed to parse agg selection mode for bonded device %s",
3424                                      name);
3425                 }
3426                 if (internals->mode == BONDING_MODE_8023AD)
3427                         rte_eth_bond_8023ad_agg_selection_set(port_id,
3428                                                               agg_mode);
3429         }
3430
3431         /* Parse/add slave ports to bonded device */
3432         if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3433                 struct bond_ethdev_slave_ports slave_ports;
3434                 unsigned i;
3435
3436                 memset(&slave_ports, 0, sizeof(slave_ports));
3437
3438                 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3439                                        &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3440                         RTE_BOND_LOG(ERR,
3441                                      "Failed to parse slave ports for bonded device %s",
3442                                      name);
3443                         return -1;
3444                 }
3445
3446                 for (i = 0; i < slave_ports.slave_count; i++) {
3447                         if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3448                                 RTE_BOND_LOG(ERR,
3449                                              "Failed to add port %d as slave to bonded device %s",
3450                                              slave_ports.slaves[i], name);
3451                         }
3452                 }
3453
3454         } else {
3455                 RTE_BOND_LOG(INFO, "No slaves specified for bonded device %s", name);
3456                 return -1;
3457         }
3458
3459         /* Parse/set primary slave port id*/
3460         arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3461         if (arg_count == 1) {
3462                 uint16_t primary_slave_port_id;
3463
3464                 if (rte_kvargs_process(kvlist,
3465                                        PMD_BOND_PRIMARY_SLAVE_KVARG,
3466                                        &bond_ethdev_parse_primary_slave_port_id_kvarg,
3467                                        &primary_slave_port_id) < 0) {
3468                         RTE_BOND_LOG(INFO,
3469                                      "Invalid primary slave port id specified for bonded device %s",
3470                                      name);
3471                         return -1;
3472                 }
3473
3474                 /* Set balance mode transmit policy*/
3475                 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3476                     != 0) {
3477                         RTE_BOND_LOG(ERR,
3478                                      "Failed to set primary slave port %d on bonded device %s",
3479                                      primary_slave_port_id, name);
3480                         return -1;
3481                 }
3482         } else if (arg_count > 1) {
3483                 RTE_BOND_LOG(INFO,
3484                              "Primary slave can be specified only once for bonded device %s",
3485                              name);
3486                 return -1;
3487         }
3488
3489         /* Parse link status monitor polling interval */
3490         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3491         if (arg_count == 1) {
3492                 uint32_t lsc_poll_interval_ms;
3493
3494                 if (rte_kvargs_process(kvlist,
3495                                        PMD_BOND_LSC_POLL_PERIOD_KVARG,
3496                                        &bond_ethdev_parse_time_ms_kvarg,
3497                                        &lsc_poll_interval_ms) < 0) {
3498                         RTE_BOND_LOG(INFO,
3499                                      "Invalid lsc polling interval value specified for bonded"
3500                                      " device %s", name);
3501                         return -1;
3502                 }
3503
3504                 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3505                     != 0) {
3506                         RTE_BOND_LOG(ERR,
3507                                      "Failed to set lsc monitor polling interval (%u ms) on bonded device %s",
3508                                      lsc_poll_interval_ms, name);
3509                         return -1;
3510                 }
3511         } else if (arg_count > 1) {
3512                 RTE_BOND_LOG(INFO,
3513                              "LSC polling interval can be specified only once for bonded"
3514                              " device %s", name);
3515                 return -1;
3516         }
3517
3518         /* Parse link up interrupt propagation delay */
3519         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3520         if (arg_count == 1) {
3521                 uint32_t link_up_delay_ms;
3522
3523                 if (rte_kvargs_process(kvlist,
3524                                        PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3525                                        &bond_ethdev_parse_time_ms_kvarg,
3526                                        &link_up_delay_ms) < 0) {
3527                         RTE_BOND_LOG(INFO,
3528                                      "Invalid link up propagation delay value specified for"
3529                                      " bonded device %s", name);
3530                         return -1;
3531                 }
3532
3533                 /* Set balance mode transmit policy*/
3534                 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3535                     != 0) {
3536                         RTE_BOND_LOG(ERR,
3537                                      "Failed to set link up propagation delay (%u ms) on bonded"
3538                                      " device %s", link_up_delay_ms, name);
3539                         return -1;
3540                 }
3541         } else if (arg_count > 1) {
3542                 RTE_BOND_LOG(INFO,
3543                              "Link up propagation delay can be specified only once for"
3544                              " bonded device %s", name);
3545                 return -1;
3546         }
3547
3548         /* Parse link down interrupt propagation delay */
3549         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3550         if (arg_count == 1) {
3551                 uint32_t link_down_delay_ms;
3552
3553                 if (rte_kvargs_process(kvlist,
3554                                        PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3555                                        &bond_ethdev_parse_time_ms_kvarg,
3556                                        &link_down_delay_ms) < 0) {
3557                         RTE_BOND_LOG(INFO,
3558                                      "Invalid link down propagation delay value specified for"
3559                                      " bonded device %s", name);
3560                         return -1;
3561                 }
3562
3563                 /* Set balance mode transmit policy*/
3564                 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3565                     != 0) {
3566                         RTE_BOND_LOG(ERR,
3567                                      "Failed to set link down propagation delay (%u ms) on bonded device %s",
3568                                      link_down_delay_ms, name);
3569                         return -1;
3570                 }
3571         } else if (arg_count > 1) {
3572                 RTE_BOND_LOG(INFO,
3573                              "Link down propagation delay can be specified only once for  bonded device %s",
3574                              name);
3575                 return -1;
3576         }
3577
3578         return 0;
3579 }
3580
3581 struct rte_vdev_driver pmd_bond_drv = {
3582         .probe = bond_probe,
3583         .remove = bond_remove,
3584 };
3585
3586 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3587 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3588
3589 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3590         "slave=<ifc> "
3591         "primary=<ifc> "
3592         "mode=[0-6] "
3593         "xmit_policy=[l2 | l23 | l34] "
3594         "agg_mode=[count | stable | bandwidth] "
3595         "socket_id=<int> "
3596         "mac=<mac addr> "
3597         "lsc_poll_period_ms=<int> "
3598         "up_delay=<int> "
3599         "down_delay=<int>");
3600
3601 int bond_logtype;
3602
3603 RTE_INIT(bond_init_log)
3604 {
3605         bond_logtype = rte_log_register("pmd.net.bond");
3606         if (bond_logtype >= 0)
3607                 rte_log_set_level(bond_logtype, RTE_LOG_NOTICE);
3608 }