New upstream version 18.11-rc1
[deb_dpdk.git] / drivers / net / bonding / rte_eth_bond_pmd.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 #include <stdlib.h>
5 #include <netinet/in.h>
6
7 #include <rte_mbuf.h>
8 #include <rte_malloc.h>
9 #include <rte_ethdev_driver.h>
10 #include <rte_ethdev_vdev.h>
11 #include <rte_tcp.h>
12 #include <rte_udp.h>
13 #include <rte_ip.h>
14 #include <rte_ip_frag.h>
15 #include <rte_devargs.h>
16 #include <rte_kvargs.h>
17 #include <rte_bus_vdev.h>
18 #include <rte_alarm.h>
19 #include <rte_cycles.h>
20 #include <rte_string_fns.h>
21
22 #include "rte_eth_bond.h"
23 #include "rte_eth_bond_private.h"
24 #include "rte_eth_bond_8023ad_private.h"
25
26 #define REORDER_PERIOD_MS 10
27 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
28 #define BOND_MAX_MAC_ADDRS 16
29
30 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
31
32 /* Table for statistics in mode 5 TLB */
33 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
34
35 static inline size_t
36 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
37 {
38         size_t vlan_offset = 0;
39
40         if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto ||
41                 rte_cpu_to_be_16(ETHER_TYPE_QINQ) == *proto) {
42                 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
43
44                 vlan_offset = sizeof(struct vlan_hdr);
45                 *proto = vlan_hdr->eth_proto;
46
47                 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
48                         vlan_hdr = vlan_hdr + 1;
49                         *proto = vlan_hdr->eth_proto;
50                         vlan_offset += sizeof(struct vlan_hdr);
51                 }
52         }
53         return vlan_offset;
54 }
55
56 static uint16_t
57 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
58 {
59         struct bond_dev_private *internals;
60
61         uint16_t num_rx_total = 0;
62         uint16_t slave_count;
63         uint16_t active_slave;
64         int i;
65
66         /* Cast to structure, containing bonded device's port id and queue id */
67         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
68         internals = bd_rx_q->dev_private;
69         slave_count = internals->active_slave_count;
70         active_slave = internals->active_slave;
71
72         for (i = 0; i < slave_count && nb_pkts; i++) {
73                 uint16_t num_rx_slave;
74
75                 /* Offset of pointer to *bufs increases as packets are received
76                  * from other slaves */
77                 num_rx_slave =
78                         rte_eth_rx_burst(internals->active_slaves[active_slave],
79                                          bd_rx_q->queue_id,
80                                          bufs + num_rx_total, nb_pkts);
81                 num_rx_total += num_rx_slave;
82                 nb_pkts -= num_rx_slave;
83                 if (++active_slave == slave_count)
84                         active_slave = 0;
85         }
86
87         if (++internals->active_slave == slave_count)
88                 internals->active_slave = 0;
89         return num_rx_total;
90 }
91
92 static uint16_t
93 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
94                 uint16_t nb_pkts)
95 {
96         struct bond_dev_private *internals;
97
98         /* Cast to structure, containing bonded device's port id and queue id */
99         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
100
101         internals = bd_rx_q->dev_private;
102
103         return rte_eth_rx_burst(internals->current_primary_port,
104                         bd_rx_q->queue_id, bufs, nb_pkts);
105 }
106
107 static inline uint8_t
108 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
109 {
110         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
111
112         return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
113                 (ethertype == ether_type_slow_be &&
114                 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
115 }
116
117 /*****************************************************************************
118  * Flow director's setup for mode 4 optimization
119  */
120
121 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
122         .dst.addr_bytes = { 0 },
123         .src.addr_bytes = { 0 },
124         .type = RTE_BE16(ETHER_TYPE_SLOW),
125 };
126
127 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
128         .dst.addr_bytes = { 0 },
129         .src.addr_bytes = { 0 },
130         .type = 0xFFFF,
131 };
132
133 static struct rte_flow_item flow_item_8023ad[] = {
134         {
135                 .type = RTE_FLOW_ITEM_TYPE_ETH,
136                 .spec = &flow_item_eth_type_8023ad,
137                 .last = NULL,
138                 .mask = &flow_item_eth_mask_type_8023ad,
139         },
140         {
141                 .type = RTE_FLOW_ITEM_TYPE_END,
142                 .spec = NULL,
143                 .last = NULL,
144                 .mask = NULL,
145         }
146 };
147
148 const struct rte_flow_attr flow_attr_8023ad = {
149         .group = 0,
150         .priority = 0,
151         .ingress = 1,
152         .egress = 0,
153         .reserved = 0,
154 };
155
156 int
157 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
158                 uint16_t slave_port) {
159         struct rte_eth_dev_info slave_info;
160         struct rte_flow_error error;
161         struct bond_dev_private *internals = (struct bond_dev_private *)
162                         (bond_dev->data->dev_private);
163
164         const struct rte_flow_action_queue lacp_queue_conf = {
165                 .index = 0,
166         };
167
168         const struct rte_flow_action actions[] = {
169                 {
170                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
171                         .conf = &lacp_queue_conf
172                 },
173                 {
174                         .type = RTE_FLOW_ACTION_TYPE_END,
175                 }
176         };
177
178         int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
179                         flow_item_8023ad, actions, &error);
180         if (ret < 0) {
181                 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
182                                 __func__, error.message, slave_port,
183                                 internals->mode4.dedicated_queues.rx_qid);
184                 return -1;
185         }
186
187         rte_eth_dev_info_get(slave_port, &slave_info);
188         if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
189                         slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
190                 RTE_BOND_LOG(ERR,
191                         "%s: Slave %d capabilities doesn't allow to allocate additional queues",
192                         __func__, slave_port);
193                 return -1;
194         }
195
196         return 0;
197 }
198
199 int
200 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
201         struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
202         struct bond_dev_private *internals = (struct bond_dev_private *)
203                         (bond_dev->data->dev_private);
204         struct rte_eth_dev_info bond_info;
205         uint16_t idx;
206
207         /* Verify if all slaves in bonding supports flow director and */
208         if (internals->slave_count > 0) {
209                 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
210
211                 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
212                 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
213
214                 for (idx = 0; idx < internals->slave_count; idx++) {
215                         if (bond_ethdev_8023ad_flow_verify(bond_dev,
216                                         internals->slaves[idx].port_id) != 0)
217                                 return -1;
218                 }
219         }
220
221         return 0;
222 }
223
224 int
225 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
226
227         struct rte_flow_error error;
228         struct bond_dev_private *internals = (struct bond_dev_private *)
229                         (bond_dev->data->dev_private);
230
231         struct rte_flow_action_queue lacp_queue_conf = {
232                 .index = internals->mode4.dedicated_queues.rx_qid,
233         };
234
235         const struct rte_flow_action actions[] = {
236                 {
237                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
238                         .conf = &lacp_queue_conf
239                 },
240                 {
241                         .type = RTE_FLOW_ACTION_TYPE_END,
242                 }
243         };
244
245         internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
246                         &flow_attr_8023ad, flow_item_8023ad, actions, &error);
247         if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
248                 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
249                                 "(slave_port=%d queue_id=%d)",
250                                 error.message, slave_port,
251                                 internals->mode4.dedicated_queues.rx_qid);
252                 return -1;
253         }
254
255         return 0;
256 }
257
258 static uint16_t
259 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
260                 uint16_t nb_pkts)
261 {
262         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
263         struct bond_dev_private *internals = bd_rx_q->dev_private;
264         uint16_t num_rx_total = 0;      /* Total number of received packets */
265         uint16_t slaves[RTE_MAX_ETHPORTS];
266         uint16_t slave_count;
267         uint16_t active_slave;
268         uint16_t i;
269
270         /* Copy slave list to protect against slave up/down changes during tx
271          * bursting */
272         slave_count = internals->active_slave_count;
273         active_slave = internals->active_slave;
274         memcpy(slaves, internals->active_slaves,
275                         sizeof(internals->active_slaves[0]) * slave_count);
276
277         for (i = 0; i < slave_count && nb_pkts; i++) {
278                 uint16_t num_rx_slave;
279
280                 /* Read packets from this slave */
281                 num_rx_slave = rte_eth_rx_burst(slaves[active_slave],
282                                                 bd_rx_q->queue_id,
283                                                 bufs + num_rx_total, nb_pkts);
284                 num_rx_total += num_rx_slave;
285                 nb_pkts -= num_rx_slave;
286
287                 if (++active_slave == slave_count)
288                         active_slave = 0;
289         }
290
291         if (++internals->active_slave == slave_count)
292                 internals->active_slave = 0;
293
294         return num_rx_total;
295 }
296
297 static uint16_t
298 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
299                 uint16_t nb_bufs)
300 {
301         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
302         struct bond_dev_private *internals = bd_tx_q->dev_private;
303
304         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
305         uint16_t slave_count;
306
307         uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
308         uint16_t dist_slave_count;
309
310         /* 2-D array to sort mbufs for transmission on each slave into */
311         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
312         /* Number of mbufs for transmission on each slave */
313         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
314         /* Mapping array generated by hash function to map mbufs to slaves */
315         uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
316
317         uint16_t slave_tx_count;
318         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
319
320         uint16_t i;
321
322         if (unlikely(nb_bufs == 0))
323                 return 0;
324
325         /* Copy slave list to protect against slave up/down changes during tx
326          * bursting */
327         slave_count = internals->active_slave_count;
328         if (unlikely(slave_count < 1))
329                 return 0;
330
331         memcpy(slave_port_ids, internals->active_slaves,
332                         sizeof(slave_port_ids[0]) * slave_count);
333
334
335         dist_slave_count = 0;
336         for (i = 0; i < slave_count; i++) {
337                 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
338
339                 if (ACTOR_STATE(port, DISTRIBUTING))
340                         dist_slave_port_ids[dist_slave_count++] =
341                                         slave_port_ids[i];
342         }
343
344         if (unlikely(dist_slave_count < 1))
345                 return 0;
346
347         /*
348          * Populate slaves mbuf with the packets which are to be sent on it
349          * selecting output slave using hash based on xmit policy
350          */
351         internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
352                         bufs_slave_port_idxs);
353
354         for (i = 0; i < nb_bufs; i++) {
355                 /* Populate slave mbuf arrays with mbufs for that slave. */
356                 uint8_t slave_idx = bufs_slave_port_idxs[i];
357
358                 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
359         }
360
361
362         /* Send packet burst on each slave device */
363         for (i = 0; i < dist_slave_count; i++) {
364                 if (slave_nb_bufs[i] == 0)
365                         continue;
366
367                 slave_tx_count = rte_eth_tx_burst(dist_slave_port_ids[i],
368                                 bd_tx_q->queue_id, slave_bufs[i],
369                                 slave_nb_bufs[i]);
370
371                 total_tx_count += slave_tx_count;
372
373                 /* If tx burst fails move packets to end of bufs */
374                 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
375                         int slave_tx_fail_count = slave_nb_bufs[i] -
376                                         slave_tx_count;
377                         total_tx_fail_count += slave_tx_fail_count;
378                         memcpy(&bufs[nb_bufs - total_tx_fail_count],
379                                &slave_bufs[i][slave_tx_count],
380                                slave_tx_fail_count * sizeof(bufs[0]));
381                 }
382         }
383
384         return total_tx_count;
385 }
386
387
388 static uint16_t
389 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
390                 uint16_t nb_pkts)
391 {
392         /* Cast to structure, containing bonded device's port id and queue id */
393         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
394         struct bond_dev_private *internals = bd_rx_q->dev_private;
395         struct rte_eth_dev *bonded_eth_dev =
396                                         &rte_eth_devices[internals->port_id];
397         struct ether_addr *bond_mac = bonded_eth_dev->data->mac_addrs;
398         struct ether_hdr *hdr;
399
400         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
401         uint16_t num_rx_total = 0;      /* Total number of received packets */
402         uint16_t slaves[RTE_MAX_ETHPORTS];
403         uint16_t slave_count, idx;
404
405         uint8_t collecting;  /* current slave collecting status */
406         const uint8_t promisc = internals->promiscuous_en;
407         uint8_t i, j, k;
408         uint8_t subtype;
409
410         /* Copy slave list to protect against slave up/down changes during tx
411          * bursting */
412         slave_count = internals->active_slave_count;
413         memcpy(slaves, internals->active_slaves,
414                         sizeof(internals->active_slaves[0]) * slave_count);
415
416         idx = internals->active_slave;
417         if (idx >= slave_count) {
418                 internals->active_slave = 0;
419                 idx = 0;
420         }
421         for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
422                 j = num_rx_total;
423                 collecting = ACTOR_STATE(&bond_mode_8023ad_ports[slaves[idx]],
424                                          COLLECTING);
425
426                 /* Read packets from this slave */
427                 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
428                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
429
430                 for (k = j; k < 2 && k < num_rx_total; k++)
431                         rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
432
433                 /* Handle slow protocol packets. */
434                 while (j < num_rx_total) {
435
436                         /* If packet is not pure L2 and is known, skip it */
437                         if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) {
438                                 j++;
439                                 continue;
440                         }
441
442                         if (j + 3 < num_rx_total)
443                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
444
445                         hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
446                         subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
447
448                         /* Remove packet from array if it is slow packet or slave is not
449                          * in collecting state or bonding interface is not in promiscuous
450                          * mode and packet address does not match. */
451                         if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]) ||
452                                 !collecting ||
453                                 (!promisc &&
454                                  !is_multicast_ether_addr(&hdr->d_addr) &&
455                                  !is_same_ether_addr(bond_mac,
456                                                      &hdr->d_addr)))) {
457
458                                 if (hdr->ether_type == ether_type_slow_be) {
459                                         bond_mode_8023ad_handle_slow_pkt(
460                                             internals, slaves[idx], bufs[j]);
461                                 } else
462                                         rte_pktmbuf_free(bufs[j]);
463
464                                 /* Packet is managed by mode 4 or dropped, shift the array */
465                                 num_rx_total--;
466                                 if (j < num_rx_total) {
467                                         memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
468                                                 (num_rx_total - j));
469                                 }
470                         } else
471                                 j++;
472                 }
473                 if (unlikely(++idx == slave_count))
474                         idx = 0;
475         }
476
477         if (++internals->active_slave == slave_count)
478                 internals->active_slave = 0;
479
480         return num_rx_total;
481 }
482
483 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
484 uint32_t burstnumberRX;
485 uint32_t burstnumberTX;
486
487 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
488
489 static void
490 arp_op_name(uint16_t arp_op, char *buf)
491 {
492         switch (arp_op) {
493         case ARP_OP_REQUEST:
494                 snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
495                 return;
496         case ARP_OP_REPLY:
497                 snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
498                 return;
499         case ARP_OP_REVREQUEST:
500                 snprintf(buf, sizeof("Reverse ARP Request"), "%s",
501                                 "Reverse ARP Request");
502                 return;
503         case ARP_OP_REVREPLY:
504                 snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
505                                 "Reverse ARP Reply");
506                 return;
507         case ARP_OP_INVREQUEST:
508                 snprintf(buf, sizeof("Peer Identify Request"), "%s",
509                                 "Peer Identify Request");
510                 return;
511         case ARP_OP_INVREPLY:
512                 snprintf(buf, sizeof("Peer Identify Reply"), "%s",
513                                 "Peer Identify Reply");
514                 return;
515         default:
516                 break;
517         }
518         snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
519         return;
520 }
521 #endif
522 #define MaxIPv4String   16
523 static void
524 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
525 {
526         uint32_t ipv4_addr;
527
528         ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
529         snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
530                 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
531                 ipv4_addr & 0xFF);
532 }
533
534 #define MAX_CLIENTS_NUMBER      128
535 uint8_t active_clients;
536 struct client_stats_t {
537         uint16_t port;
538         uint32_t ipv4_addr;
539         uint32_t ipv4_rx_packets;
540         uint32_t ipv4_tx_packets;
541 };
542 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
543
544 static void
545 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
546 {
547         int i = 0;
548
549         for (; i < MAX_CLIENTS_NUMBER; i++)     {
550                 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port))      {
551                         /* Just update RX packets number for this client */
552                         if (TXorRXindicator == &burstnumberRX)
553                                 client_stats[i].ipv4_rx_packets++;
554                         else
555                                 client_stats[i].ipv4_tx_packets++;
556                         return;
557                 }
558         }
559         /* We have a new client. Insert him to the table, and increment stats */
560         if (TXorRXindicator == &burstnumberRX)
561                 client_stats[active_clients].ipv4_rx_packets++;
562         else
563                 client_stats[active_clients].ipv4_tx_packets++;
564         client_stats[active_clients].ipv4_addr = addr;
565         client_stats[active_clients].port = port;
566         active_clients++;
567
568 }
569
570 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
571 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
572         rte_log(RTE_LOG_DEBUG, bond_logtype,                            \
573                 "%s port:%d SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X SrcIP:%s " \
574                 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X DstIP:%s %s %d\n", \
575                 info,                                                   \
576                 port,                                                   \
577                 eth_h->s_addr.addr_bytes[0], eth_h->s_addr.addr_bytes[1], \
578                 eth_h->s_addr.addr_bytes[2], eth_h->s_addr.addr_bytes[3], \
579                 eth_h->s_addr.addr_bytes[4], eth_h->s_addr.addr_bytes[5], \
580                 src_ip,                                                 \
581                 eth_h->d_addr.addr_bytes[0], eth_h->d_addr.addr_bytes[1], \
582                 eth_h->d_addr.addr_bytes[2], eth_h->d_addr.addr_bytes[3], \
583                 eth_h->d_addr.addr_bytes[4], eth_h->d_addr.addr_bytes[5], \
584                 dst_ip,                                                 \
585                 arp_op, ++burstnumber)
586 #endif
587
588 static void
589 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
590                 uint16_t port, uint32_t __attribute__((unused)) *burstnumber)
591 {
592         struct ipv4_hdr *ipv4_h;
593 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
594         struct arp_hdr *arp_h;
595         char dst_ip[16];
596         char ArpOp[24];
597         char buf[16];
598 #endif
599         char src_ip[16];
600
601         uint16_t ether_type = eth_h->ether_type;
602         uint16_t offset = get_vlan_offset(eth_h, &ether_type);
603
604 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
605         strlcpy(buf, info, 16);
606 #endif
607
608         if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
609                 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
610                 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
611 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
612                 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
613                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
614 #endif
615                 update_client_stats(ipv4_h->src_addr, port, burstnumber);
616         }
617 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
618         else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
619                 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
620                 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
621                 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
622                 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
623                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
624         }
625 #endif
626 }
627 #endif
628
629 static uint16_t
630 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
631 {
632         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
633         struct bond_dev_private *internals = bd_tx_q->dev_private;
634         struct ether_hdr *eth_h;
635         uint16_t ether_type, offset;
636         uint16_t nb_recv_pkts;
637         int i;
638
639         nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
640
641         for (i = 0; i < nb_recv_pkts; i++) {
642                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
643                 ether_type = eth_h->ether_type;
644                 offset = get_vlan_offset(eth_h, &ether_type);
645
646                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
647 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
648                         mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
649 #endif
650                         bond_mode_alb_arp_recv(eth_h, offset, internals);
651                 }
652 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
653                 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
654                         mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
655 #endif
656         }
657
658         return nb_recv_pkts;
659 }
660
661 static uint16_t
662 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
663                 uint16_t nb_pkts)
664 {
665         struct bond_dev_private *internals;
666         struct bond_tx_queue *bd_tx_q;
667
668         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
669         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
670
671         uint16_t num_of_slaves;
672         uint16_t slaves[RTE_MAX_ETHPORTS];
673
674         uint16_t num_tx_total = 0, num_tx_slave;
675
676         static int slave_idx = 0;
677         int i, cslave_idx = 0, tx_fail_total = 0;
678
679         bd_tx_q = (struct bond_tx_queue *)queue;
680         internals = bd_tx_q->dev_private;
681
682         /* Copy slave list to protect against slave up/down changes during tx
683          * bursting */
684         num_of_slaves = internals->active_slave_count;
685         memcpy(slaves, internals->active_slaves,
686                         sizeof(internals->active_slaves[0]) * num_of_slaves);
687
688         if (num_of_slaves < 1)
689                 return num_tx_total;
690
691         /* Populate slaves mbuf with which packets are to be sent on it  */
692         for (i = 0; i < nb_pkts; i++) {
693                 cslave_idx = (slave_idx + i) % num_of_slaves;
694                 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
695         }
696
697         /* increment current slave index so the next call to tx burst starts on the
698          * next slave */
699         slave_idx = ++cslave_idx;
700
701         /* Send packet burst on each slave device */
702         for (i = 0; i < num_of_slaves; i++) {
703                 if (slave_nb_pkts[i] > 0) {
704                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
705                                         slave_bufs[i], slave_nb_pkts[i]);
706
707                         /* if tx burst fails move packets to end of bufs */
708                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
709                                 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
710
711                                 tx_fail_total += tx_fail_slave;
712
713                                 memcpy(&bufs[nb_pkts - tx_fail_total],
714                                        &slave_bufs[i][num_tx_slave],
715                                        tx_fail_slave * sizeof(bufs[0]));
716                         }
717                         num_tx_total += num_tx_slave;
718                 }
719         }
720
721         return num_tx_total;
722 }
723
724 static uint16_t
725 bond_ethdev_tx_burst_active_backup(void *queue,
726                 struct rte_mbuf **bufs, uint16_t nb_pkts)
727 {
728         struct bond_dev_private *internals;
729         struct bond_tx_queue *bd_tx_q;
730
731         bd_tx_q = (struct bond_tx_queue *)queue;
732         internals = bd_tx_q->dev_private;
733
734         if (internals->active_slave_count < 1)
735                 return 0;
736
737         return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
738                         bufs, nb_pkts);
739 }
740
741 static inline uint16_t
742 ether_hash(struct ether_hdr *eth_hdr)
743 {
744         unaligned_uint16_t *word_src_addr =
745                 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
746         unaligned_uint16_t *word_dst_addr =
747                 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
748
749         return (word_src_addr[0] ^ word_dst_addr[0]) ^
750                         (word_src_addr[1] ^ word_dst_addr[1]) ^
751                         (word_src_addr[2] ^ word_dst_addr[2]);
752 }
753
754 static inline uint32_t
755 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
756 {
757         return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
758 }
759
760 static inline uint32_t
761 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
762 {
763         unaligned_uint32_t *word_src_addr =
764                 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
765         unaligned_uint32_t *word_dst_addr =
766                 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
767
768         return (word_src_addr[0] ^ word_dst_addr[0]) ^
769                         (word_src_addr[1] ^ word_dst_addr[1]) ^
770                         (word_src_addr[2] ^ word_dst_addr[2]) ^
771                         (word_src_addr[3] ^ word_dst_addr[3]);
772 }
773
774
775 void
776 burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
777                 uint8_t slave_count, uint16_t *slaves)
778 {
779         struct ether_hdr *eth_hdr;
780         uint32_t hash;
781         int i;
782
783         for (i = 0; i < nb_pkts; i++) {
784                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
785
786                 hash = ether_hash(eth_hdr);
787
788                 slaves[i] = (hash ^= hash >> 8) % slave_count;
789         }
790 }
791
792 void
793 burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
794                 uint8_t slave_count, uint16_t *slaves)
795 {
796         uint16_t i;
797         struct ether_hdr *eth_hdr;
798         uint16_t proto;
799         size_t vlan_offset;
800         uint32_t hash, l3hash;
801
802         for (i = 0; i < nb_pkts; i++) {
803                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
804                 l3hash = 0;
805
806                 proto = eth_hdr->ether_type;
807                 hash = ether_hash(eth_hdr);
808
809                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
810
811                 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
812                         struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
813                                         ((char *)(eth_hdr + 1) + vlan_offset);
814                         l3hash = ipv4_hash(ipv4_hdr);
815
816                 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
817                         struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
818                                         ((char *)(eth_hdr + 1) + vlan_offset);
819                         l3hash = ipv6_hash(ipv6_hdr);
820                 }
821
822                 hash = hash ^ l3hash;
823                 hash ^= hash >> 16;
824                 hash ^= hash >> 8;
825
826                 slaves[i] = hash % slave_count;
827         }
828 }
829
830 void
831 burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
832                 uint8_t slave_count, uint16_t *slaves)
833 {
834         struct ether_hdr *eth_hdr;
835         uint16_t proto;
836         size_t vlan_offset;
837         int i;
838
839         struct udp_hdr *udp_hdr;
840         struct tcp_hdr *tcp_hdr;
841         uint32_t hash, l3hash, l4hash;
842
843         for (i = 0; i < nb_pkts; i++) {
844                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
845                 proto = eth_hdr->ether_type;
846                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
847                 l3hash = 0;
848                 l4hash = 0;
849
850                 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
851                         struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
852                                         ((char *)(eth_hdr + 1) + vlan_offset);
853                         size_t ip_hdr_offset;
854
855                         l3hash = ipv4_hash(ipv4_hdr);
856
857                         /* there is no L4 header in fragmented packet */
858                         if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)
859                                                                 == 0)) {
860                                 ip_hdr_offset = (ipv4_hdr->version_ihl
861                                         & IPV4_HDR_IHL_MASK) *
862                                         IPV4_IHL_MULTIPLIER;
863
864                                 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
865                                         tcp_hdr = (struct tcp_hdr *)
866                                                 ((char *)ipv4_hdr +
867                                                         ip_hdr_offset);
868                                         l4hash = HASH_L4_PORTS(tcp_hdr);
869                                 } else if (ipv4_hdr->next_proto_id ==
870                                                                 IPPROTO_UDP) {
871                                         udp_hdr = (struct udp_hdr *)
872                                                 ((char *)ipv4_hdr +
873                                                         ip_hdr_offset);
874                                         l4hash = HASH_L4_PORTS(udp_hdr);
875                                 }
876                         }
877                 } else if  (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
878                         struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
879                                         ((char *)(eth_hdr + 1) + vlan_offset);
880                         l3hash = ipv6_hash(ipv6_hdr);
881
882                         if (ipv6_hdr->proto == IPPROTO_TCP) {
883                                 tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
884                                 l4hash = HASH_L4_PORTS(tcp_hdr);
885                         } else if (ipv6_hdr->proto == IPPROTO_UDP) {
886                                 udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
887                                 l4hash = HASH_L4_PORTS(udp_hdr);
888                         }
889                 }
890
891                 hash = l3hash ^ l4hash;
892                 hash ^= hash >> 16;
893                 hash ^= hash >> 8;
894
895                 slaves[i] = hash % slave_count;
896         }
897 }
898
899 struct bwg_slave {
900         uint64_t bwg_left_int;
901         uint64_t bwg_left_remainder;
902         uint8_t slave;
903 };
904
905 void
906 bond_tlb_activate_slave(struct bond_dev_private *internals) {
907         int i;
908
909         for (i = 0; i < internals->active_slave_count; i++) {
910                 tlb_last_obytets[internals->active_slaves[i]] = 0;
911         }
912 }
913
914 static int
915 bandwidth_cmp(const void *a, const void *b)
916 {
917         const struct bwg_slave *bwg_a = a;
918         const struct bwg_slave *bwg_b = b;
919         int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
920         int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
921                         (int64_t)bwg_a->bwg_left_remainder;
922         if (diff > 0)
923                 return 1;
924         else if (diff < 0)
925                 return -1;
926         else if (diff2 > 0)
927                 return 1;
928         else if (diff2 < 0)
929                 return -1;
930         else
931                 return 0;
932 }
933
934 static void
935 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
936                 struct bwg_slave *bwg_slave)
937 {
938         struct rte_eth_link link_status;
939
940         rte_eth_link_get_nowait(port_id, &link_status);
941         uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
942         if (link_bwg == 0)
943                 return;
944         link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
945         bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
946         bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
947 }
948
949 static void
950 bond_ethdev_update_tlb_slave_cb(void *arg)
951 {
952         struct bond_dev_private *internals = arg;
953         struct rte_eth_stats slave_stats;
954         struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
955         uint8_t slave_count;
956         uint64_t tx_bytes;
957
958         uint8_t update_stats = 0;
959         uint8_t i, slave_id;
960
961         internals->slave_update_idx++;
962
963
964         if (internals->slave_update_idx >= REORDER_PERIOD_MS)
965                 update_stats = 1;
966
967         for (i = 0; i < internals->active_slave_count; i++) {
968                 slave_id = internals->active_slaves[i];
969                 rte_eth_stats_get(slave_id, &slave_stats);
970                 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
971                 bandwidth_left(slave_id, tx_bytes,
972                                 internals->slave_update_idx, &bwg_array[i]);
973                 bwg_array[i].slave = slave_id;
974
975                 if (update_stats) {
976                         tlb_last_obytets[slave_id] = slave_stats.obytes;
977                 }
978         }
979
980         if (update_stats == 1)
981                 internals->slave_update_idx = 0;
982
983         slave_count = i;
984         qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
985         for (i = 0; i < slave_count; i++)
986                 internals->tlb_slaves_order[i] = bwg_array[i].slave;
987
988         rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
989                         (struct bond_dev_private *)internals);
990 }
991
992 static uint16_t
993 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
994 {
995         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
996         struct bond_dev_private *internals = bd_tx_q->dev_private;
997
998         struct rte_eth_dev *primary_port =
999                         &rte_eth_devices[internals->primary_port];
1000         uint16_t num_tx_total = 0;
1001         uint16_t i, j;
1002
1003         uint16_t num_of_slaves = internals->active_slave_count;
1004         uint16_t slaves[RTE_MAX_ETHPORTS];
1005
1006         struct ether_hdr *ether_hdr;
1007         struct ether_addr primary_slave_addr;
1008         struct ether_addr active_slave_addr;
1009
1010         if (num_of_slaves < 1)
1011                 return num_tx_total;
1012
1013         memcpy(slaves, internals->tlb_slaves_order,
1014                                 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
1015
1016
1017         ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
1018
1019         if (nb_pkts > 3) {
1020                 for (i = 0; i < 3; i++)
1021                         rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
1022         }
1023
1024         for (i = 0; i < num_of_slaves; i++) {
1025                 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
1026                 for (j = num_tx_total; j < nb_pkts; j++) {
1027                         if (j + 3 < nb_pkts)
1028                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
1029
1030                         ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
1031                         if (is_same_ether_addr(&ether_hdr->s_addr, &primary_slave_addr))
1032                                 ether_addr_copy(&active_slave_addr, &ether_hdr->s_addr);
1033 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1034                                         mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
1035 #endif
1036                 }
1037
1038                 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1039                                 bufs + num_tx_total, nb_pkts - num_tx_total);
1040
1041                 if (num_tx_total == nb_pkts)
1042                         break;
1043         }
1044
1045         return num_tx_total;
1046 }
1047
1048 void
1049 bond_tlb_disable(struct bond_dev_private *internals)
1050 {
1051         rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
1052 }
1053
1054 void
1055 bond_tlb_enable(struct bond_dev_private *internals)
1056 {
1057         bond_ethdev_update_tlb_slave_cb(internals);
1058 }
1059
1060 static uint16_t
1061 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1062 {
1063         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1064         struct bond_dev_private *internals = bd_tx_q->dev_private;
1065
1066         struct ether_hdr *eth_h;
1067         uint16_t ether_type, offset;
1068
1069         struct client_data *client_info;
1070
1071         /*
1072          * We create transmit buffers for every slave and one additional to send
1073          * through tlb. In worst case every packet will be send on one port.
1074          */
1075         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
1076         uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
1077
1078         /*
1079          * We create separate transmit buffers for update packets as they won't
1080          * be counted in num_tx_total.
1081          */
1082         struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1083         uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1084
1085         struct rte_mbuf *upd_pkt;
1086         size_t pkt_size;
1087
1088         uint16_t num_send, num_not_send = 0;
1089         uint16_t num_tx_total = 0;
1090         uint16_t slave_idx;
1091
1092         int i, j;
1093
1094         /* Search tx buffer for ARP packets and forward them to alb */
1095         for (i = 0; i < nb_pkts; i++) {
1096                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
1097                 ether_type = eth_h->ether_type;
1098                 offset = get_vlan_offset(eth_h, &ether_type);
1099
1100                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
1101                         slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1102
1103                         /* Change src mac in eth header */
1104                         rte_eth_macaddr_get(slave_idx, &eth_h->s_addr);
1105
1106                         /* Add packet to slave tx buffer */
1107                         slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1108                         slave_bufs_pkts[slave_idx]++;
1109                 } else {
1110                         /* If packet is not ARP, send it with TLB policy */
1111                         slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1112                                         bufs[i];
1113                         slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1114                 }
1115         }
1116
1117         /* Update connected client ARP tables */
1118         if (internals->mode6.ntt) {
1119                 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1120                         client_info = &internals->mode6.client_table[i];
1121
1122                         if (client_info->in_use) {
1123                                 /* Allocate new packet to send ARP update on current slave */
1124                                 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1125                                 if (upd_pkt == NULL) {
1126                                         RTE_BOND_LOG(ERR,
1127                                                      "Failed to allocate ARP packet from pool");
1128                                         continue;
1129                                 }
1130                                 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
1131                                                 + client_info->vlan_count * sizeof(struct vlan_hdr);
1132                                 upd_pkt->data_len = pkt_size;
1133                                 upd_pkt->pkt_len = pkt_size;
1134
1135                                 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1136                                                 internals);
1137
1138                                 /* Add packet to update tx buffer */
1139                                 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1140                                 update_bufs_pkts[slave_idx]++;
1141                         }
1142                 }
1143                 internals->mode6.ntt = 0;
1144         }
1145
1146         /* Send ARP packets on proper slaves */
1147         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1148                 if (slave_bufs_pkts[i] > 0) {
1149                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1150                                         slave_bufs[i], slave_bufs_pkts[i]);
1151                         for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1152                                 bufs[nb_pkts - 1 - num_not_send - j] =
1153                                                 slave_bufs[i][nb_pkts - 1 - j];
1154                         }
1155
1156                         num_tx_total += num_send;
1157                         num_not_send += slave_bufs_pkts[i] - num_send;
1158
1159 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1160         /* Print TX stats including update packets */
1161                         for (j = 0; j < slave_bufs_pkts[i]; j++) {
1162                                 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
1163                                 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1164                         }
1165 #endif
1166                 }
1167         }
1168
1169         /* Send update packets on proper slaves */
1170         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1171                 if (update_bufs_pkts[i] > 0) {
1172                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1173                                         update_bufs_pkts[i]);
1174                         for (j = num_send; j < update_bufs_pkts[i]; j++) {
1175                                 rte_pktmbuf_free(update_bufs[i][j]);
1176                         }
1177 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1178                         for (j = 0; j < update_bufs_pkts[i]; j++) {
1179                                 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
1180                                 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1181                         }
1182 #endif
1183                 }
1184         }
1185
1186         /* Send non-ARP packets using tlb policy */
1187         if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1188                 num_send = bond_ethdev_tx_burst_tlb(queue,
1189                                 slave_bufs[RTE_MAX_ETHPORTS],
1190                                 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1191
1192                 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1193                         bufs[nb_pkts - 1 - num_not_send - j] =
1194                                         slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1195                 }
1196
1197                 num_tx_total += num_send;
1198         }
1199
1200         return num_tx_total;
1201 }
1202
1203 static uint16_t
1204 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1205                 uint16_t nb_bufs)
1206 {
1207         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1208         struct bond_dev_private *internals = bd_tx_q->dev_private;
1209
1210         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1211         uint16_t slave_count;
1212
1213         /* Array to sort mbufs for transmission on each slave into */
1214         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1215         /* Number of mbufs for transmission on each slave */
1216         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1217         /* Mapping array generated by hash function to map mbufs to slaves */
1218         uint16_t bufs_slave_port_idxs[nb_bufs];
1219
1220         uint16_t slave_tx_count;
1221         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1222
1223         uint16_t i;
1224
1225         if (unlikely(nb_bufs == 0))
1226                 return 0;
1227
1228         /* Copy slave list to protect against slave up/down changes during tx
1229          * bursting */
1230         slave_count = internals->active_slave_count;
1231         if (unlikely(slave_count < 1))
1232                 return 0;
1233
1234         memcpy(slave_port_ids, internals->active_slaves,
1235                         sizeof(slave_port_ids[0]) * slave_count);
1236
1237         /*
1238          * Populate slaves mbuf with the packets which are to be sent on it
1239          * selecting output slave using hash based on xmit policy
1240          */
1241         internals->burst_xmit_hash(bufs, nb_bufs, slave_count,
1242                         bufs_slave_port_idxs);
1243
1244         for (i = 0; i < nb_bufs; i++) {
1245                 /* Populate slave mbuf arrays with mbufs for that slave. */
1246                 uint8_t slave_idx = bufs_slave_port_idxs[i];
1247
1248                 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
1249         }
1250
1251         /* Send packet burst on each slave device */
1252         for (i = 0; i < slave_count; i++) {
1253                 if (slave_nb_bufs[i] == 0)
1254                         continue;
1255
1256                 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1257                                 bd_tx_q->queue_id, slave_bufs[i],
1258                                 slave_nb_bufs[i]);
1259
1260                 total_tx_count += slave_tx_count;
1261
1262                 /* If tx burst fails move packets to end of bufs */
1263                 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1264                         int slave_tx_fail_count = slave_nb_bufs[i] -
1265                                         slave_tx_count;
1266                         total_tx_fail_count += slave_tx_fail_count;
1267                         memcpy(&bufs[nb_bufs - total_tx_fail_count],
1268                                &slave_bufs[i][slave_tx_count],
1269                                slave_tx_fail_count * sizeof(bufs[0]));
1270                 }
1271         }
1272
1273         return total_tx_count;
1274 }
1275
1276 static uint16_t
1277 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1278                 uint16_t nb_bufs)
1279 {
1280         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1281         struct bond_dev_private *internals = bd_tx_q->dev_private;
1282
1283         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1284         uint16_t slave_count;
1285
1286         uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
1287         uint16_t dist_slave_count;
1288
1289         /* 2-D array to sort mbufs for transmission on each slave into */
1290         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1291         /* Number of mbufs for transmission on each slave */
1292         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1293         /* Mapping array generated by hash function to map mbufs to slaves */
1294         uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
1295
1296         uint16_t slave_tx_count;
1297         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1298
1299         uint16_t i;
1300
1301         if (unlikely(nb_bufs == 0))
1302                 return 0;
1303
1304         /* Copy slave list to protect against slave up/down changes during tx
1305          * bursting */
1306         slave_count = internals->active_slave_count;
1307         if (unlikely(slave_count < 1))
1308                 return 0;
1309
1310         memcpy(slave_port_ids, internals->active_slaves,
1311                         sizeof(slave_port_ids[0]) * slave_count);
1312
1313         dist_slave_count = 0;
1314         for (i = 0; i < slave_count; i++) {
1315                 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1316
1317                 if (ACTOR_STATE(port, DISTRIBUTING))
1318                         dist_slave_port_ids[dist_slave_count++] =
1319                                         slave_port_ids[i];
1320         }
1321
1322         if (likely(dist_slave_count > 1)) {
1323
1324                 /*
1325                  * Populate slaves mbuf with the packets which are to be sent
1326                  * on it, selecting output slave using hash based on xmit policy
1327                  */
1328                 internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
1329                                 bufs_slave_port_idxs);
1330
1331                 for (i = 0; i < nb_bufs; i++) {
1332                         /*
1333                          * Populate slave mbuf arrays with mbufs for that
1334                          * slave
1335                          */
1336                         uint8_t slave_idx = bufs_slave_port_idxs[i];
1337
1338                         slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] =
1339                                         bufs[i];
1340                 }
1341
1342
1343                 /* Send packet burst on each slave device */
1344                 for (i = 0; i < dist_slave_count; i++) {
1345                         if (slave_nb_bufs[i] == 0)
1346                                 continue;
1347
1348                         slave_tx_count = rte_eth_tx_burst(
1349                                         dist_slave_port_ids[i],
1350                                         bd_tx_q->queue_id, slave_bufs[i],
1351                                         slave_nb_bufs[i]);
1352
1353                         total_tx_count += slave_tx_count;
1354
1355                         /* If tx burst fails move packets to end of bufs */
1356                         if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1357                                 int slave_tx_fail_count = slave_nb_bufs[i] -
1358                                                 slave_tx_count;
1359                                 total_tx_fail_count += slave_tx_fail_count;
1360
1361                                 memcpy(&bufs[nb_bufs - total_tx_fail_count],
1362                                        &slave_bufs[i][slave_tx_count],
1363                                        slave_tx_fail_count * sizeof(bufs[0]));
1364                         }
1365                 }
1366         }
1367
1368         /* Check for LACP control packets and send if available */
1369         for (i = 0; i < slave_count; i++) {
1370                 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1371                 struct rte_mbuf *ctrl_pkt = NULL;
1372
1373                 if (likely(rte_ring_empty(port->tx_ring)))
1374                         continue;
1375
1376                 if (rte_ring_dequeue(port->tx_ring,
1377                                      (void **)&ctrl_pkt) != -ENOENT) {
1378                         slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1379                                         bd_tx_q->queue_id, &ctrl_pkt, 1);
1380                         /*
1381                          * re-enqueue LAG control plane packets to buffering
1382                          * ring if transmission fails so the packet isn't lost.
1383                          */
1384                         if (slave_tx_count != 1)
1385                                 rte_ring_enqueue(port->tx_ring, ctrl_pkt);
1386                 }
1387         }
1388
1389         return total_tx_count;
1390 }
1391
1392 static uint16_t
1393 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1394                 uint16_t nb_pkts)
1395 {
1396         struct bond_dev_private *internals;
1397         struct bond_tx_queue *bd_tx_q;
1398
1399         uint8_t tx_failed_flag = 0, num_of_slaves;
1400         uint16_t slaves[RTE_MAX_ETHPORTS];
1401
1402         uint16_t max_nb_of_tx_pkts = 0;
1403
1404         int slave_tx_total[RTE_MAX_ETHPORTS];
1405         int i, most_successful_tx_slave = -1;
1406
1407         bd_tx_q = (struct bond_tx_queue *)queue;
1408         internals = bd_tx_q->dev_private;
1409
1410         /* Copy slave list to protect against slave up/down changes during tx
1411          * bursting */
1412         num_of_slaves = internals->active_slave_count;
1413         memcpy(slaves, internals->active_slaves,
1414                         sizeof(internals->active_slaves[0]) * num_of_slaves);
1415
1416         if (num_of_slaves < 1)
1417                 return 0;
1418
1419         /* Increment reference count on mbufs */
1420         for (i = 0; i < nb_pkts; i++)
1421                 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1422
1423         /* Transmit burst on each active slave */
1424         for (i = 0; i < num_of_slaves; i++) {
1425                 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1426                                         bufs, nb_pkts);
1427
1428                 if (unlikely(slave_tx_total[i] < nb_pkts))
1429                         tx_failed_flag = 1;
1430
1431                 /* record the value and slave index for the slave which transmits the
1432                  * maximum number of packets */
1433                 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1434                         max_nb_of_tx_pkts = slave_tx_total[i];
1435                         most_successful_tx_slave = i;
1436                 }
1437         }
1438
1439         /* if slaves fail to transmit packets from burst, the calling application
1440          * is not expected to know about multiple references to packets so we must
1441          * handle failures of all packets except those of the most successful slave
1442          */
1443         if (unlikely(tx_failed_flag))
1444                 for (i = 0; i < num_of_slaves; i++)
1445                         if (i != most_successful_tx_slave)
1446                                 while (slave_tx_total[i] < nb_pkts)
1447                                         rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1448
1449         return max_nb_of_tx_pkts;
1450 }
1451
1452 void
1453 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1454 {
1455         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1456
1457         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1458                 /**
1459                  * If in mode 4 then save the link properties of the first
1460                  * slave, all subsequent slaves must match these properties
1461                  */
1462                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1463
1464                 bond_link->link_autoneg = slave_link->link_autoneg;
1465                 bond_link->link_duplex = slave_link->link_duplex;
1466                 bond_link->link_speed = slave_link->link_speed;
1467         } else {
1468                 /**
1469                  * In any other mode the link properties are set to default
1470                  * values of AUTONEG/DUPLEX
1471                  */
1472                 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1473                 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1474         }
1475 }
1476
1477 int
1478 link_properties_valid(struct rte_eth_dev *ethdev,
1479                 struct rte_eth_link *slave_link)
1480 {
1481         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1482
1483         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1484                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1485
1486                 if (bond_link->link_duplex != slave_link->link_duplex ||
1487                         bond_link->link_autoneg != slave_link->link_autoneg ||
1488                         bond_link->link_speed != slave_link->link_speed)
1489                         return -1;
1490         }
1491
1492         return 0;
1493 }
1494
1495 int
1496 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1497 {
1498         struct ether_addr *mac_addr;
1499
1500         if (eth_dev == NULL) {
1501                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1502                 return -1;
1503         }
1504
1505         if (dst_mac_addr == NULL) {
1506                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1507                 return -1;
1508         }
1509
1510         mac_addr = eth_dev->data->mac_addrs;
1511
1512         ether_addr_copy(mac_addr, dst_mac_addr);
1513         return 0;
1514 }
1515
1516 int
1517 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1518 {
1519         struct ether_addr *mac_addr;
1520
1521         if (eth_dev == NULL) {
1522                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1523                 return -1;
1524         }
1525
1526         if (new_mac_addr == NULL) {
1527                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1528                 return -1;
1529         }
1530
1531         mac_addr = eth_dev->data->mac_addrs;
1532
1533         /* If new MAC is different to current MAC then update */
1534         if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1535                 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1536
1537         return 0;
1538 }
1539
1540 static const struct ether_addr null_mac_addr;
1541
1542 /*
1543  * Add additional MAC addresses to the slave
1544  */
1545 int
1546 slave_add_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1547                 uint16_t slave_port_id)
1548 {
1549         int i, ret;
1550         struct ether_addr *mac_addr;
1551
1552         for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1553                 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1554                 if (is_same_ether_addr(mac_addr, &null_mac_addr))
1555                         break;
1556
1557                 ret = rte_eth_dev_mac_addr_add(slave_port_id, mac_addr, 0);
1558                 if (ret < 0) {
1559                         /* rollback */
1560                         for (i--; i > 0; i--)
1561                                 rte_eth_dev_mac_addr_remove(slave_port_id,
1562                                         &bonded_eth_dev->data->mac_addrs[i]);
1563                         return ret;
1564                 }
1565         }
1566
1567         return 0;
1568 }
1569
1570 /*
1571  * Remove additional MAC addresses from the slave
1572  */
1573 int
1574 slave_remove_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1575                 uint16_t slave_port_id)
1576 {
1577         int i, rc, ret;
1578         struct ether_addr *mac_addr;
1579
1580         rc = 0;
1581         for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1582                 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1583                 if (is_same_ether_addr(mac_addr, &null_mac_addr))
1584                         break;
1585
1586                 ret = rte_eth_dev_mac_addr_remove(slave_port_id, mac_addr);
1587                 /* save only the first error */
1588                 if (ret < 0 && rc == 0)
1589                         rc = ret;
1590         }
1591
1592         return rc;
1593 }
1594
1595 int
1596 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1597 {
1598         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1599         int i;
1600
1601         /* Update slave devices MAC addresses */
1602         if (internals->slave_count < 1)
1603                 return -1;
1604
1605         switch (internals->mode) {
1606         case BONDING_MODE_ROUND_ROBIN:
1607         case BONDING_MODE_BALANCE:
1608         case BONDING_MODE_BROADCAST:
1609                 for (i = 0; i < internals->slave_count; i++) {
1610                         if (rte_eth_dev_default_mac_addr_set(
1611                                         internals->slaves[i].port_id,
1612                                         bonded_eth_dev->data->mac_addrs)) {
1613                                 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1614                                                 internals->slaves[i].port_id);
1615                                 return -1;
1616                         }
1617                 }
1618                 break;
1619         case BONDING_MODE_8023AD:
1620                 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1621                 break;
1622         case BONDING_MODE_ACTIVE_BACKUP:
1623         case BONDING_MODE_TLB:
1624         case BONDING_MODE_ALB:
1625         default:
1626                 for (i = 0; i < internals->slave_count; i++) {
1627                         if (internals->slaves[i].port_id ==
1628                                         internals->current_primary_port) {
1629                                 if (rte_eth_dev_default_mac_addr_set(
1630                                                 internals->primary_port,
1631                                                 bonded_eth_dev->data->mac_addrs)) {
1632                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1633                                                         internals->current_primary_port);
1634                                         return -1;
1635                                 }
1636                         } else {
1637                                 if (rte_eth_dev_default_mac_addr_set(
1638                                                 internals->slaves[i].port_id,
1639                                                 &internals->slaves[i].persisted_mac_addr)) {
1640                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1641                                                         internals->slaves[i].port_id);
1642                                         return -1;
1643                                 }
1644                         }
1645                 }
1646         }
1647
1648         return 0;
1649 }
1650
1651 int
1652 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1653 {
1654         struct bond_dev_private *internals;
1655
1656         internals = eth_dev->data->dev_private;
1657
1658         switch (mode) {
1659         case BONDING_MODE_ROUND_ROBIN:
1660                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1661                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1662                 break;
1663         case BONDING_MODE_ACTIVE_BACKUP:
1664                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1665                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1666                 break;
1667         case BONDING_MODE_BALANCE:
1668                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1669                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1670                 break;
1671         case BONDING_MODE_BROADCAST:
1672                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1673                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1674                 break;
1675         case BONDING_MODE_8023AD:
1676                 if (bond_mode_8023ad_enable(eth_dev) != 0)
1677                         return -1;
1678
1679                 if (internals->mode4.dedicated_queues.enabled == 0) {
1680                         eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1681                         eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1682                         RTE_BOND_LOG(WARNING,
1683                                 "Using mode 4, it is necessary to do TX burst "
1684                                 "and RX burst at least every 100ms.");
1685                 } else {
1686                         /* Use flow director's optimization */
1687                         eth_dev->rx_pkt_burst =
1688                                         bond_ethdev_rx_burst_8023ad_fast_queue;
1689                         eth_dev->tx_pkt_burst =
1690                                         bond_ethdev_tx_burst_8023ad_fast_queue;
1691                 }
1692                 break;
1693         case BONDING_MODE_TLB:
1694                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1695                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1696                 break;
1697         case BONDING_MODE_ALB:
1698                 if (bond_mode_alb_enable(eth_dev) != 0)
1699                         return -1;
1700
1701                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1702                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1703                 break;
1704         default:
1705                 return -1;
1706         }
1707
1708         internals->mode = mode;
1709
1710         return 0;
1711 }
1712
1713
1714 static int
1715 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1716                 struct rte_eth_dev *slave_eth_dev)
1717 {
1718         int errval = 0;
1719         struct bond_dev_private *internals = (struct bond_dev_private *)
1720                 bonded_eth_dev->data->dev_private;
1721         struct port *port = &bond_mode_8023ad_ports[slave_eth_dev->data->port_id];
1722
1723         if (port->slow_pool == NULL) {
1724                 char mem_name[256];
1725                 int slave_id = slave_eth_dev->data->port_id;
1726
1727                 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1728                                 slave_id);
1729                 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1730                         250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1731                         slave_eth_dev->data->numa_node);
1732
1733                 /* Any memory allocation failure in initialization is critical because
1734                  * resources can't be free, so reinitialization is impossible. */
1735                 if (port->slow_pool == NULL) {
1736                         rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1737                                 slave_id, mem_name, rte_strerror(rte_errno));
1738                 }
1739         }
1740
1741         if (internals->mode4.dedicated_queues.enabled == 1) {
1742                 /* Configure slow Rx queue */
1743
1744                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1745                                 internals->mode4.dedicated_queues.rx_qid, 128,
1746                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1747                                 NULL, port->slow_pool);
1748                 if (errval != 0) {
1749                         RTE_BOND_LOG(ERR,
1750                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1751                                         slave_eth_dev->data->port_id,
1752                                         internals->mode4.dedicated_queues.rx_qid,
1753                                         errval);
1754                         return errval;
1755                 }
1756
1757                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1758                                 internals->mode4.dedicated_queues.tx_qid, 512,
1759                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1760                                 NULL);
1761                 if (errval != 0) {
1762                         RTE_BOND_LOG(ERR,
1763                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1764                                 slave_eth_dev->data->port_id,
1765                                 internals->mode4.dedicated_queues.tx_qid,
1766                                 errval);
1767                         return errval;
1768                 }
1769         }
1770         return 0;
1771 }
1772
1773 int
1774 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1775                 struct rte_eth_dev *slave_eth_dev)
1776 {
1777         struct bond_rx_queue *bd_rx_q;
1778         struct bond_tx_queue *bd_tx_q;
1779         uint16_t nb_rx_queues;
1780         uint16_t nb_tx_queues;
1781
1782         int errval;
1783         uint16_t q_id;
1784         struct rte_flow_error flow_error;
1785
1786         struct bond_dev_private *internals = (struct bond_dev_private *)
1787                 bonded_eth_dev->data->dev_private;
1788
1789         /* Stop slave */
1790         rte_eth_dev_stop(slave_eth_dev->data->port_id);
1791
1792         /* Enable interrupts on slave device if supported */
1793         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1794                 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1795
1796         /* If RSS is enabled for bonding, try to enable it for slaves  */
1797         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1798                 if (internals->rss_key_len != 0) {
1799                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1800                                         internals->rss_key_len;
1801                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1802                                         internals->rss_key;
1803                 } else {
1804                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1805                 }
1806
1807                 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1808                                 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1809                 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1810                                 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1811         }
1812
1813         if (bonded_eth_dev->data->dev_conf.rxmode.offloads &
1814                         DEV_RX_OFFLOAD_VLAN_FILTER)
1815                 slave_eth_dev->data->dev_conf.rxmode.offloads |=
1816                                 DEV_RX_OFFLOAD_VLAN_FILTER;
1817         else
1818                 slave_eth_dev->data->dev_conf.rxmode.offloads &=
1819                                 ~DEV_RX_OFFLOAD_VLAN_FILTER;
1820
1821         nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1822         nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1823
1824         if (internals->mode == BONDING_MODE_8023AD) {
1825                 if (internals->mode4.dedicated_queues.enabled == 1) {
1826                         nb_rx_queues++;
1827                         nb_tx_queues++;
1828                 }
1829         }
1830
1831         errval = rte_eth_dev_set_mtu(slave_eth_dev->data->port_id,
1832                                      bonded_eth_dev->data->mtu);
1833         if (errval != 0 && errval != -ENOTSUP) {
1834                 RTE_BOND_LOG(ERR, "rte_eth_dev_set_mtu: port %u, err (%d)",
1835                                 slave_eth_dev->data->port_id, errval);
1836                 return errval;
1837         }
1838
1839         /* Configure device */
1840         errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1841                         nb_rx_queues, nb_tx_queues,
1842                         &(slave_eth_dev->data->dev_conf));
1843         if (errval != 0) {
1844                 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u, err (%d)",
1845                                 slave_eth_dev->data->port_id, errval);
1846                 return errval;
1847         }
1848
1849         /* Setup Rx Queues */
1850         for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1851                 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1852
1853                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1854                                 bd_rx_q->nb_rx_desc,
1855                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1856                                 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1857                 if (errval != 0) {
1858                         RTE_BOND_LOG(ERR,
1859                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1860                                         slave_eth_dev->data->port_id, q_id, errval);
1861                         return errval;
1862                 }
1863         }
1864
1865         /* Setup Tx Queues */
1866         for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1867                 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1868
1869                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1870                                 bd_tx_q->nb_tx_desc,
1871                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1872                                 &bd_tx_q->tx_conf);
1873                 if (errval != 0) {
1874                         RTE_BOND_LOG(ERR,
1875                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1876                                 slave_eth_dev->data->port_id, q_id, errval);
1877                         return errval;
1878                 }
1879         }
1880
1881         if (internals->mode == BONDING_MODE_8023AD &&
1882                         internals->mode4.dedicated_queues.enabled == 1) {
1883                 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1884                                 != 0)
1885                         return errval;
1886
1887                 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1888                                 slave_eth_dev->data->port_id) != 0) {
1889                         RTE_BOND_LOG(ERR,
1890                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1891                                 slave_eth_dev->data->port_id, q_id, errval);
1892                         return -1;
1893                 }
1894
1895                 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1896                         rte_flow_destroy(slave_eth_dev->data->port_id,
1897                                         internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1898                                         &flow_error);
1899
1900                 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1901                                 slave_eth_dev->data->port_id);
1902         }
1903
1904         /* Start device */
1905         errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1906         if (errval != 0) {
1907                 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1908                                 slave_eth_dev->data->port_id, errval);
1909                 return -1;
1910         }
1911
1912         /* If RSS is enabled for bonding, synchronize RETA */
1913         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1914                 int i;
1915                 struct bond_dev_private *internals;
1916
1917                 internals = bonded_eth_dev->data->dev_private;
1918
1919                 for (i = 0; i < internals->slave_count; i++) {
1920                         if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1921                                 errval = rte_eth_dev_rss_reta_update(
1922                                                 slave_eth_dev->data->port_id,
1923                                                 &internals->reta_conf[0],
1924                                                 internals->slaves[i].reta_size);
1925                                 if (errval != 0) {
1926                                         RTE_BOND_LOG(WARNING,
1927                                                      "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1928                                                      " RSS Configuration for bonding may be inconsistent.",
1929                                                      slave_eth_dev->data->port_id, errval);
1930                                 }
1931                                 break;
1932                         }
1933                 }
1934         }
1935
1936         /* If lsc interrupt is set, check initial slave's link status */
1937         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1938                 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1939                 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1940                         RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1941                         NULL);
1942         }
1943
1944         return 0;
1945 }
1946
1947 void
1948 slave_remove(struct bond_dev_private *internals,
1949                 struct rte_eth_dev *slave_eth_dev)
1950 {
1951         uint8_t i;
1952
1953         for (i = 0; i < internals->slave_count; i++)
1954                 if (internals->slaves[i].port_id ==
1955                                 slave_eth_dev->data->port_id)
1956                         break;
1957
1958         if (i < (internals->slave_count - 1)) {
1959                 struct rte_flow *flow;
1960
1961                 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1962                                 sizeof(internals->slaves[0]) *
1963                                 (internals->slave_count - i - 1));
1964                 TAILQ_FOREACH(flow, &internals->flow_list, next) {
1965                         memmove(&flow->flows[i], &flow->flows[i + 1],
1966                                 sizeof(flow->flows[0]) *
1967                                 (internals->slave_count - i - 1));
1968                         flow->flows[internals->slave_count - 1] = NULL;
1969                 }
1970         }
1971
1972         internals->slave_count--;
1973
1974         /* force reconfiguration of slave interfaces */
1975         _rte_eth_dev_reset(slave_eth_dev);
1976 }
1977
1978 static void
1979 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1980
1981 void
1982 slave_add(struct bond_dev_private *internals,
1983                 struct rte_eth_dev *slave_eth_dev)
1984 {
1985         struct bond_slave_details *slave_details =
1986                         &internals->slaves[internals->slave_count];
1987
1988         slave_details->port_id = slave_eth_dev->data->port_id;
1989         slave_details->last_link_status = 0;
1990
1991         /* Mark slave devices that don't support interrupts so we can
1992          * compensate when we start the bond
1993          */
1994         if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1995                 slave_details->link_status_poll_enabled = 1;
1996         }
1997
1998         slave_details->link_status_wait_to_complete = 0;
1999         /* clean tlb_last_obytes when adding port for bonding device */
2000         memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
2001                         sizeof(struct ether_addr));
2002 }
2003
2004 void
2005 bond_ethdev_primary_set(struct bond_dev_private *internals,
2006                 uint16_t slave_port_id)
2007 {
2008         int i;
2009
2010         if (internals->active_slave_count < 1)
2011                 internals->current_primary_port = slave_port_id;
2012         else
2013                 /* Search bonded device slave ports for new proposed primary port */
2014                 for (i = 0; i < internals->active_slave_count; i++) {
2015                         if (internals->active_slaves[i] == slave_port_id)
2016                                 internals->current_primary_port = slave_port_id;
2017                 }
2018 }
2019
2020 static void
2021 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
2022
2023 static int
2024 bond_ethdev_start(struct rte_eth_dev *eth_dev)
2025 {
2026         struct bond_dev_private *internals;
2027         int i;
2028
2029         /* slave eth dev will be started by bonded device */
2030         if (check_for_bonded_ethdev(eth_dev)) {
2031                 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
2032                                 eth_dev->data->port_id);
2033                 return -1;
2034         }
2035
2036         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2037         eth_dev->data->dev_started = 1;
2038
2039         internals = eth_dev->data->dev_private;
2040
2041         if (internals->slave_count == 0) {
2042                 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
2043                 goto out_err;
2044         }
2045
2046         if (internals->user_defined_mac == 0) {
2047                 struct ether_addr *new_mac_addr = NULL;
2048
2049                 for (i = 0; i < internals->slave_count; i++)
2050                         if (internals->slaves[i].port_id == internals->primary_port)
2051                                 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
2052
2053                 if (new_mac_addr == NULL)
2054                         goto out_err;
2055
2056                 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
2057                         RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
2058                                         eth_dev->data->port_id);
2059                         goto out_err;
2060                 }
2061         }
2062
2063         /* If bonded device is configure in promiscuous mode then re-apply config */
2064         if (internals->promiscuous_en)
2065                 bond_ethdev_promiscuous_enable(eth_dev);
2066
2067         if (internals->mode == BONDING_MODE_8023AD) {
2068                 if (internals->mode4.dedicated_queues.enabled == 1) {
2069                         internals->mode4.dedicated_queues.rx_qid =
2070                                         eth_dev->data->nb_rx_queues;
2071                         internals->mode4.dedicated_queues.tx_qid =
2072                                         eth_dev->data->nb_tx_queues;
2073                 }
2074         }
2075
2076
2077         /* Reconfigure each slave device if starting bonded device */
2078         for (i = 0; i < internals->slave_count; i++) {
2079                 struct rte_eth_dev *slave_ethdev =
2080                                 &(rte_eth_devices[internals->slaves[i].port_id]);
2081                 if (slave_configure(eth_dev, slave_ethdev) != 0) {
2082                         RTE_BOND_LOG(ERR,
2083                                 "bonded port (%d) failed to reconfigure slave device (%d)",
2084                                 eth_dev->data->port_id,
2085                                 internals->slaves[i].port_id);
2086                         goto out_err;
2087                 }
2088                 /* We will need to poll for link status if any slave doesn't
2089                  * support interrupts
2090                  */
2091                 if (internals->slaves[i].link_status_poll_enabled)
2092                         internals->link_status_polling_enabled = 1;
2093         }
2094
2095         /* start polling if needed */
2096         if (internals->link_status_polling_enabled) {
2097                 rte_eal_alarm_set(
2098                         internals->link_status_polling_interval_ms * 1000,
2099                         bond_ethdev_slave_link_status_change_monitor,
2100                         (void *)&rte_eth_devices[internals->port_id]);
2101         }
2102
2103         /* Update all slave devices MACs*/
2104         if (mac_address_slaves_update(eth_dev) != 0)
2105                 goto out_err;
2106
2107         if (internals->user_defined_primary_port)
2108                 bond_ethdev_primary_set(internals, internals->primary_port);
2109
2110         if (internals->mode == BONDING_MODE_8023AD)
2111                 bond_mode_8023ad_start(eth_dev);
2112
2113         if (internals->mode == BONDING_MODE_TLB ||
2114                         internals->mode == BONDING_MODE_ALB)
2115                 bond_tlb_enable(internals);
2116
2117         return 0;
2118
2119 out_err:
2120         eth_dev->data->dev_started = 0;
2121         return -1;
2122 }
2123
2124 static void
2125 bond_ethdev_free_queues(struct rte_eth_dev *dev)
2126 {
2127         uint8_t i;
2128
2129         if (dev->data->rx_queues != NULL) {
2130                 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2131                         rte_free(dev->data->rx_queues[i]);
2132                         dev->data->rx_queues[i] = NULL;
2133                 }
2134                 dev->data->nb_rx_queues = 0;
2135         }
2136
2137         if (dev->data->tx_queues != NULL) {
2138                 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2139                         rte_free(dev->data->tx_queues[i]);
2140                         dev->data->tx_queues[i] = NULL;
2141                 }
2142                 dev->data->nb_tx_queues = 0;
2143         }
2144 }
2145
2146 void
2147 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2148 {
2149         struct bond_dev_private *internals = eth_dev->data->dev_private;
2150         uint8_t i;
2151
2152         if (internals->mode == BONDING_MODE_8023AD) {
2153                 struct port *port;
2154                 void *pkt = NULL;
2155
2156                 bond_mode_8023ad_stop(eth_dev);
2157
2158                 /* Discard all messages to/from mode 4 state machines */
2159                 for (i = 0; i < internals->active_slave_count; i++) {
2160                         port = &bond_mode_8023ad_ports[internals->active_slaves[i]];
2161
2162                         RTE_ASSERT(port->rx_ring != NULL);
2163                         while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2164                                 rte_pktmbuf_free(pkt);
2165
2166                         RTE_ASSERT(port->tx_ring != NULL);
2167                         while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2168                                 rte_pktmbuf_free(pkt);
2169                 }
2170         }
2171
2172         if (internals->mode == BONDING_MODE_TLB ||
2173                         internals->mode == BONDING_MODE_ALB) {
2174                 bond_tlb_disable(internals);
2175                 for (i = 0; i < internals->active_slave_count; i++)
2176                         tlb_last_obytets[internals->active_slaves[i]] = 0;
2177         }
2178
2179         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2180         eth_dev->data->dev_started = 0;
2181
2182         internals->link_status_polling_enabled = 0;
2183         for (i = 0; i < internals->slave_count; i++) {
2184                 internals->slaves[i].last_link_status = 0;
2185                 rte_eth_dev_stop(internals->slaves[i].port_id);
2186                 deactivate_slave(eth_dev, internals->slaves[i].port_id);
2187         }
2188 }
2189
2190 void
2191 bond_ethdev_close(struct rte_eth_dev *dev)
2192 {
2193         struct bond_dev_private *internals = dev->data->dev_private;
2194         uint8_t bond_port_id = internals->port_id;
2195         int skipped = 0;
2196         struct rte_flow_error ferror;
2197
2198         RTE_BOND_LOG(INFO, "Closing bonded device %s", dev->device->name);
2199         while (internals->slave_count != skipped) {
2200                 uint16_t port_id = internals->slaves[skipped].port_id;
2201
2202                 rte_eth_dev_stop(port_id);
2203
2204                 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2205                         RTE_BOND_LOG(ERR,
2206                                      "Failed to remove port %d from bonded device %s",
2207                                      port_id, dev->device->name);
2208                         skipped++;
2209                 }
2210         }
2211         bond_flow_ops.flush(dev, &ferror);
2212         bond_ethdev_free_queues(dev);
2213         rte_bitmap_reset(internals->vlan_filter_bmp);
2214 }
2215
2216 /* forward declaration */
2217 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2218
2219 static void
2220 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2221 {
2222         struct bond_dev_private *internals = dev->data->dev_private;
2223
2224         uint16_t max_nb_rx_queues = UINT16_MAX;
2225         uint16_t max_nb_tx_queues = UINT16_MAX;
2226
2227         dev_info->max_mac_addrs = BOND_MAX_MAC_ADDRS;
2228
2229         dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2230                         internals->candidate_max_rx_pktlen :
2231                         ETHER_MAX_JUMBO_FRAME_LEN;
2232
2233         /* Max number of tx/rx queues that the bonded device can support is the
2234          * minimum values of the bonded slaves, as all slaves must be capable
2235          * of supporting the same number of tx/rx queues.
2236          */
2237         if (internals->slave_count > 0) {
2238                 struct rte_eth_dev_info slave_info;
2239                 uint8_t idx;
2240
2241                 for (idx = 0; idx < internals->slave_count; idx++) {
2242                         rte_eth_dev_info_get(internals->slaves[idx].port_id,
2243                                         &slave_info);
2244
2245                         if (slave_info.max_rx_queues < max_nb_rx_queues)
2246                                 max_nb_rx_queues = slave_info.max_rx_queues;
2247
2248                         if (slave_info.max_tx_queues < max_nb_tx_queues)
2249                                 max_nb_tx_queues = slave_info.max_tx_queues;
2250                 }
2251         }
2252
2253         dev_info->max_rx_queues = max_nb_rx_queues;
2254         dev_info->max_tx_queues = max_nb_tx_queues;
2255
2256         memcpy(&dev_info->default_rxconf, &internals->default_rxconf,
2257                sizeof(dev_info->default_rxconf));
2258         memcpy(&dev_info->default_txconf, &internals->default_txconf,
2259                sizeof(dev_info->default_txconf));
2260
2261         memcpy(&dev_info->rx_desc_lim, &internals->rx_desc_lim,
2262                sizeof(dev_info->rx_desc_lim));
2263         memcpy(&dev_info->tx_desc_lim, &internals->tx_desc_lim,
2264                sizeof(dev_info->tx_desc_lim));
2265
2266         /**
2267          * If dedicated hw queues enabled for link bonding device in LACP mode
2268          * then we need to reduce the maximum number of data path queues by 1.
2269          */
2270         if (internals->mode == BONDING_MODE_8023AD &&
2271                 internals->mode4.dedicated_queues.enabled == 1) {
2272                 dev_info->max_rx_queues--;
2273                 dev_info->max_tx_queues--;
2274         }
2275
2276         dev_info->min_rx_bufsize = 0;
2277
2278         dev_info->rx_offload_capa = internals->rx_offload_capa;
2279         dev_info->tx_offload_capa = internals->tx_offload_capa;
2280         dev_info->rx_queue_offload_capa = internals->rx_queue_offload_capa;
2281         dev_info->tx_queue_offload_capa = internals->tx_queue_offload_capa;
2282         dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2283
2284         dev_info->reta_size = internals->reta_size;
2285 }
2286
2287 static int
2288 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2289 {
2290         int res;
2291         uint16_t i;
2292         struct bond_dev_private *internals = dev->data->dev_private;
2293
2294         /* don't do this while a slave is being added */
2295         rte_spinlock_lock(&internals->lock);
2296
2297         if (on)
2298                 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2299         else
2300                 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2301
2302         for (i = 0; i < internals->slave_count; i++) {
2303                 uint16_t port_id = internals->slaves[i].port_id;
2304
2305                 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2306                 if (res == ENOTSUP)
2307                         RTE_BOND_LOG(WARNING,
2308                                      "Setting VLAN filter on slave port %u not supported.",
2309                                      port_id);
2310         }
2311
2312         rte_spinlock_unlock(&internals->lock);
2313         return 0;
2314 }
2315
2316 static int
2317 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2318                 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2319                 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2320 {
2321         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2322                         rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2323                                         0, dev->data->numa_node);
2324         if (bd_rx_q == NULL)
2325                 return -1;
2326
2327         bd_rx_q->queue_id = rx_queue_id;
2328         bd_rx_q->dev_private = dev->data->dev_private;
2329
2330         bd_rx_q->nb_rx_desc = nb_rx_desc;
2331
2332         memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2333         bd_rx_q->mb_pool = mb_pool;
2334
2335         dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2336
2337         return 0;
2338 }
2339
2340 static int
2341 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2342                 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2343                 const struct rte_eth_txconf *tx_conf)
2344 {
2345         struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
2346                         rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2347                                         0, dev->data->numa_node);
2348
2349         if (bd_tx_q == NULL)
2350                 return -1;
2351
2352         bd_tx_q->queue_id = tx_queue_id;
2353         bd_tx_q->dev_private = dev->data->dev_private;
2354
2355         bd_tx_q->nb_tx_desc = nb_tx_desc;
2356         memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2357
2358         dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2359
2360         return 0;
2361 }
2362
2363 static void
2364 bond_ethdev_rx_queue_release(void *queue)
2365 {
2366         if (queue == NULL)
2367                 return;
2368
2369         rte_free(queue);
2370 }
2371
2372 static void
2373 bond_ethdev_tx_queue_release(void *queue)
2374 {
2375         if (queue == NULL)
2376                 return;
2377
2378         rte_free(queue);
2379 }
2380
2381 static void
2382 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2383 {
2384         struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2385         struct bond_dev_private *internals;
2386
2387         /* Default value for polling slave found is true as we don't want to
2388          * disable the polling thread if we cannot get the lock */
2389         int i, polling_slave_found = 1;
2390
2391         if (cb_arg == NULL)
2392                 return;
2393
2394         bonded_ethdev = (struct rte_eth_dev *)cb_arg;
2395         internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
2396
2397         if (!bonded_ethdev->data->dev_started ||
2398                 !internals->link_status_polling_enabled)
2399                 return;
2400
2401         /* If device is currently being configured then don't check slaves link
2402          * status, wait until next period */
2403         if (rte_spinlock_trylock(&internals->lock)) {
2404                 if (internals->slave_count > 0)
2405                         polling_slave_found = 0;
2406
2407                 for (i = 0; i < internals->slave_count; i++) {
2408                         if (!internals->slaves[i].link_status_poll_enabled)
2409                                 continue;
2410
2411                         slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2412                         polling_slave_found = 1;
2413
2414                         /* Update slave link status */
2415                         (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2416                                         internals->slaves[i].link_status_wait_to_complete);
2417
2418                         /* if link status has changed since last checked then call lsc
2419                          * event callback */
2420                         if (slave_ethdev->data->dev_link.link_status !=
2421                                         internals->slaves[i].last_link_status) {
2422                                 internals->slaves[i].last_link_status =
2423                                                 slave_ethdev->data->dev_link.link_status;
2424
2425                                 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2426                                                 RTE_ETH_EVENT_INTR_LSC,
2427                                                 &bonded_ethdev->data->port_id,
2428                                                 NULL);
2429                         }
2430                 }
2431                 rte_spinlock_unlock(&internals->lock);
2432         }
2433
2434         if (polling_slave_found)
2435                 /* Set alarm to continue monitoring link status of slave ethdev's */
2436                 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2437                                 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2438 }
2439
2440 static int
2441 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2442 {
2443         void (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2444
2445         struct bond_dev_private *bond_ctx;
2446         struct rte_eth_link slave_link;
2447
2448         uint32_t idx;
2449
2450         bond_ctx = ethdev->data->dev_private;
2451
2452         ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2453
2454         if (ethdev->data->dev_started == 0 ||
2455                         bond_ctx->active_slave_count == 0) {
2456                 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2457                 return 0;
2458         }
2459
2460         ethdev->data->dev_link.link_status = ETH_LINK_UP;
2461
2462         if (wait_to_complete)
2463                 link_update = rte_eth_link_get;
2464         else
2465                 link_update = rte_eth_link_get_nowait;
2466
2467         switch (bond_ctx->mode) {
2468         case BONDING_MODE_BROADCAST:
2469                 /**
2470                  * Setting link speed to UINT32_MAX to ensure we pick up the
2471                  * value of the first active slave
2472                  */
2473                 ethdev->data->dev_link.link_speed = UINT32_MAX;
2474
2475                 /**
2476                  * link speed is minimum value of all the slaves link speed as
2477                  * packet loss will occur on this slave if transmission at rates
2478                  * greater than this are attempted
2479                  */
2480                 for (idx = 1; idx < bond_ctx->active_slave_count; idx++) {
2481                         link_update(bond_ctx->active_slaves[0], &slave_link);
2482
2483                         if (slave_link.link_speed <
2484                                         ethdev->data->dev_link.link_speed)
2485                                 ethdev->data->dev_link.link_speed =
2486                                                 slave_link.link_speed;
2487                 }
2488                 break;
2489         case BONDING_MODE_ACTIVE_BACKUP:
2490                 /* Current primary slave */
2491                 link_update(bond_ctx->current_primary_port, &slave_link);
2492
2493                 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2494                 break;
2495         case BONDING_MODE_8023AD:
2496                 ethdev->data->dev_link.link_autoneg =
2497                                 bond_ctx->mode4.slave_link.link_autoneg;
2498                 ethdev->data->dev_link.link_duplex =
2499                                 bond_ctx->mode4.slave_link.link_duplex;
2500                 /* fall through to update link speed */
2501         case BONDING_MODE_ROUND_ROBIN:
2502         case BONDING_MODE_BALANCE:
2503         case BONDING_MODE_TLB:
2504         case BONDING_MODE_ALB:
2505         default:
2506                 /**
2507                  * In theses mode the maximum theoretical link speed is the sum
2508                  * of all the slaves
2509                  */
2510                 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2511
2512                 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2513                         link_update(bond_ctx->active_slaves[idx], &slave_link);
2514
2515                         ethdev->data->dev_link.link_speed +=
2516                                         slave_link.link_speed;
2517                 }
2518         }
2519
2520
2521         return 0;
2522 }
2523
2524
2525 static int
2526 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2527 {
2528         struct bond_dev_private *internals = dev->data->dev_private;
2529         struct rte_eth_stats slave_stats;
2530         int i, j;
2531
2532         for (i = 0; i < internals->slave_count; i++) {
2533                 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2534
2535                 stats->ipackets += slave_stats.ipackets;
2536                 stats->opackets += slave_stats.opackets;
2537                 stats->ibytes += slave_stats.ibytes;
2538                 stats->obytes += slave_stats.obytes;
2539                 stats->imissed += slave_stats.imissed;
2540                 stats->ierrors += slave_stats.ierrors;
2541                 stats->oerrors += slave_stats.oerrors;
2542                 stats->rx_nombuf += slave_stats.rx_nombuf;
2543
2544                 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2545                         stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2546                         stats->q_opackets[j] += slave_stats.q_opackets[j];
2547                         stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2548                         stats->q_obytes[j] += slave_stats.q_obytes[j];
2549                         stats->q_errors[j] += slave_stats.q_errors[j];
2550                 }
2551
2552         }
2553
2554         return 0;
2555 }
2556
2557 static void
2558 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2559 {
2560         struct bond_dev_private *internals = dev->data->dev_private;
2561         int i;
2562
2563         for (i = 0; i < internals->slave_count; i++)
2564                 rte_eth_stats_reset(internals->slaves[i].port_id);
2565 }
2566
2567 static void
2568 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2569 {
2570         struct bond_dev_private *internals = eth_dev->data->dev_private;
2571         int i;
2572
2573         internals->promiscuous_en = 1;
2574
2575         switch (internals->mode) {
2576         /* Promiscuous mode is propagated to all slaves */
2577         case BONDING_MODE_ROUND_ROBIN:
2578         case BONDING_MODE_BALANCE:
2579         case BONDING_MODE_BROADCAST:
2580                 for (i = 0; i < internals->slave_count; i++)
2581                         rte_eth_promiscuous_enable(internals->slaves[i].port_id);
2582                 break;
2583         /* In mode4 promiscus mode is managed when slave is added/removed */
2584         case BONDING_MODE_8023AD:
2585                 break;
2586         /* Promiscuous mode is propagated only to primary slave */
2587         case BONDING_MODE_ACTIVE_BACKUP:
2588         case BONDING_MODE_TLB:
2589         case BONDING_MODE_ALB:
2590         default:
2591                 rte_eth_promiscuous_enable(internals->current_primary_port);
2592         }
2593 }
2594
2595 static void
2596 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2597 {
2598         struct bond_dev_private *internals = dev->data->dev_private;
2599         int i;
2600
2601         internals->promiscuous_en = 0;
2602
2603         switch (internals->mode) {
2604         /* Promiscuous mode is propagated to all slaves */
2605         case BONDING_MODE_ROUND_ROBIN:
2606         case BONDING_MODE_BALANCE:
2607         case BONDING_MODE_BROADCAST:
2608                 for (i = 0; i < internals->slave_count; i++)
2609                         rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2610                 break;
2611         /* In mode4 promiscus mode is set managed when slave is added/removed */
2612         case BONDING_MODE_8023AD:
2613                 break;
2614         /* Promiscuous mode is propagated only to primary slave */
2615         case BONDING_MODE_ACTIVE_BACKUP:
2616         case BONDING_MODE_TLB:
2617         case BONDING_MODE_ALB:
2618         default:
2619                 rte_eth_promiscuous_disable(internals->current_primary_port);
2620         }
2621 }
2622
2623 static void
2624 bond_ethdev_delayed_lsc_propagation(void *arg)
2625 {
2626         if (arg == NULL)
2627                 return;
2628
2629         _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2630                         RTE_ETH_EVENT_INTR_LSC, NULL);
2631 }
2632
2633 int
2634 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2635                 void *param, void *ret_param __rte_unused)
2636 {
2637         struct rte_eth_dev *bonded_eth_dev;
2638         struct bond_dev_private *internals;
2639         struct rte_eth_link link;
2640         int rc = -1;
2641
2642         int i, valid_slave = 0;
2643         uint8_t active_pos;
2644         uint8_t lsc_flag = 0;
2645
2646         if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2647                 return rc;
2648
2649         bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
2650
2651         if (check_for_bonded_ethdev(bonded_eth_dev))
2652                 return rc;
2653
2654         internals = bonded_eth_dev->data->dev_private;
2655
2656         /* If the device isn't started don't handle interrupts */
2657         if (!bonded_eth_dev->data->dev_started)
2658                 return rc;
2659
2660         /* verify that port_id is a valid slave of bonded port */
2661         for (i = 0; i < internals->slave_count; i++) {
2662                 if (internals->slaves[i].port_id == port_id) {
2663                         valid_slave = 1;
2664                         break;
2665                 }
2666         }
2667
2668         if (!valid_slave)
2669                 return rc;
2670
2671         /* Synchronize lsc callback parallel calls either by real link event
2672          * from the slaves PMDs or by the bonding PMD itself.
2673          */
2674         rte_spinlock_lock(&internals->lsc_lock);
2675
2676         /* Search for port in active port list */
2677         active_pos = find_slave_by_id(internals->active_slaves,
2678                         internals->active_slave_count, port_id);
2679
2680         rte_eth_link_get_nowait(port_id, &link);
2681         if (link.link_status) {
2682                 if (active_pos < internals->active_slave_count)
2683                         goto link_update;
2684
2685                 /* if no active slave ports then set this port to be primary port */
2686                 if (internals->active_slave_count < 1) {
2687                         /* If first active slave, then change link status */
2688                         bonded_eth_dev->data->dev_link.link_status = ETH_LINK_UP;
2689                         internals->current_primary_port = port_id;
2690                         lsc_flag = 1;
2691
2692                         mac_address_slaves_update(bonded_eth_dev);
2693                 }
2694
2695                 /* check link state properties if bonded link is up*/
2696                 if (bonded_eth_dev->data->dev_link.link_status == ETH_LINK_UP) {
2697                         if (link_properties_valid(bonded_eth_dev, &link) != 0)
2698                                 RTE_BOND_LOG(ERR, "Invalid link properties "
2699                                              "for slave %d in bonding mode %d",
2700                                              port_id, internals->mode);
2701                 } else {
2702                         /* inherit slave link properties */
2703                         link_properties_set(bonded_eth_dev, &link);
2704                 }
2705
2706                 activate_slave(bonded_eth_dev, port_id);
2707
2708                 /* If user has defined the primary port then default to using it */
2709                 if (internals->user_defined_primary_port &&
2710                                 internals->primary_port == port_id)
2711                         bond_ethdev_primary_set(internals, port_id);
2712         } else {
2713                 if (active_pos == internals->active_slave_count)
2714                         goto link_update;
2715
2716                 /* Remove from active slave list */
2717                 deactivate_slave(bonded_eth_dev, port_id);
2718
2719                 if (internals->active_slave_count < 1)
2720                         lsc_flag = 1;
2721
2722                 /* Update primary id, take first active slave from list or if none
2723                  * available set to -1 */
2724                 if (port_id == internals->current_primary_port) {
2725                         if (internals->active_slave_count > 0)
2726                                 bond_ethdev_primary_set(internals,
2727                                                 internals->active_slaves[0]);
2728                         else
2729                                 internals->current_primary_port = internals->primary_port;
2730                 }
2731         }
2732
2733 link_update:
2734         /**
2735          * Update bonded device link properties after any change to active
2736          * slaves
2737          */
2738         bond_ethdev_link_update(bonded_eth_dev, 0);
2739
2740         if (lsc_flag) {
2741                 /* Cancel any possible outstanding interrupts if delays are enabled */
2742                 if (internals->link_up_delay_ms > 0 ||
2743                         internals->link_down_delay_ms > 0)
2744                         rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2745                                         bonded_eth_dev);
2746
2747                 if (bonded_eth_dev->data->dev_link.link_status) {
2748                         if (internals->link_up_delay_ms > 0)
2749                                 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2750                                                 bond_ethdev_delayed_lsc_propagation,
2751                                                 (void *)bonded_eth_dev);
2752                         else
2753                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2754                                                 RTE_ETH_EVENT_INTR_LSC,
2755                                                 NULL);
2756
2757                 } else {
2758                         if (internals->link_down_delay_ms > 0)
2759                                 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2760                                                 bond_ethdev_delayed_lsc_propagation,
2761                                                 (void *)bonded_eth_dev);
2762                         else
2763                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2764                                                 RTE_ETH_EVENT_INTR_LSC,
2765                                                 NULL);
2766                 }
2767         }
2768
2769         rte_spinlock_unlock(&internals->lsc_lock);
2770
2771         return rc;
2772 }
2773
2774 static int
2775 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2776                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2777 {
2778         unsigned i, j;
2779         int result = 0;
2780         int slave_reta_size;
2781         unsigned reta_count;
2782         struct bond_dev_private *internals = dev->data->dev_private;
2783
2784         if (reta_size != internals->reta_size)
2785                 return -EINVAL;
2786
2787          /* Copy RETA table */
2788         reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2789
2790         for (i = 0; i < reta_count; i++) {
2791                 internals->reta_conf[i].mask = reta_conf[i].mask;
2792                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2793                         if ((reta_conf[i].mask >> j) & 0x01)
2794                                 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2795         }
2796
2797         /* Fill rest of array */
2798         for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2799                 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2800                                 sizeof(internals->reta_conf[0]) * reta_count);
2801
2802         /* Propagate RETA over slaves */
2803         for (i = 0; i < internals->slave_count; i++) {
2804                 slave_reta_size = internals->slaves[i].reta_size;
2805                 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2806                                 &internals->reta_conf[0], slave_reta_size);
2807                 if (result < 0)
2808                         return result;
2809         }
2810
2811         return 0;
2812 }
2813
2814 static int
2815 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2816                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2817 {
2818         int i, j;
2819         struct bond_dev_private *internals = dev->data->dev_private;
2820
2821         if (reta_size != internals->reta_size)
2822                 return -EINVAL;
2823
2824          /* Copy RETA table */
2825         for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2826                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2827                         if ((reta_conf[i].mask >> j) & 0x01)
2828                                 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2829
2830         return 0;
2831 }
2832
2833 static int
2834 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2835                 struct rte_eth_rss_conf *rss_conf)
2836 {
2837         int i, result = 0;
2838         struct bond_dev_private *internals = dev->data->dev_private;
2839         struct rte_eth_rss_conf bond_rss_conf;
2840
2841         memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2842
2843         bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2844
2845         if (bond_rss_conf.rss_hf != 0)
2846                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2847
2848         if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2849                         sizeof(internals->rss_key)) {
2850                 if (bond_rss_conf.rss_key_len == 0)
2851                         bond_rss_conf.rss_key_len = 40;
2852                 internals->rss_key_len = bond_rss_conf.rss_key_len;
2853                 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2854                                 internals->rss_key_len);
2855         }
2856
2857         for (i = 0; i < internals->slave_count; i++) {
2858                 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2859                                 &bond_rss_conf);
2860                 if (result < 0)
2861                         return result;
2862         }
2863
2864         return 0;
2865 }
2866
2867 static int
2868 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2869                 struct rte_eth_rss_conf *rss_conf)
2870 {
2871         struct bond_dev_private *internals = dev->data->dev_private;
2872
2873         rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2874         rss_conf->rss_key_len = internals->rss_key_len;
2875         if (rss_conf->rss_key)
2876                 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2877
2878         return 0;
2879 }
2880
2881 static int
2882 bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
2883 {
2884         struct rte_eth_dev *slave_eth_dev;
2885         struct bond_dev_private *internals = dev->data->dev_private;
2886         int ret, i;
2887
2888         rte_spinlock_lock(&internals->lock);
2889
2890         for (i = 0; i < internals->slave_count; i++) {
2891                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2892                 if (*slave_eth_dev->dev_ops->mtu_set == NULL) {
2893                         rte_spinlock_unlock(&internals->lock);
2894                         return -ENOTSUP;
2895                 }
2896         }
2897         for (i = 0; i < internals->slave_count; i++) {
2898                 ret = rte_eth_dev_set_mtu(internals->slaves[i].port_id, mtu);
2899                 if (ret < 0) {
2900                         rte_spinlock_unlock(&internals->lock);
2901                         return ret;
2902                 }
2903         }
2904
2905         rte_spinlock_unlock(&internals->lock);
2906         return 0;
2907 }
2908
2909 static int
2910 bond_ethdev_mac_address_set(struct rte_eth_dev *dev, struct ether_addr *addr)
2911 {
2912         if (mac_address_set(dev, addr)) {
2913                 RTE_BOND_LOG(ERR, "Failed to update MAC address");
2914                 return -EINVAL;
2915         }
2916
2917         return 0;
2918 }
2919
2920 static int
2921 bond_filter_ctrl(struct rte_eth_dev *dev __rte_unused,
2922                  enum rte_filter_type type, enum rte_filter_op op, void *arg)
2923 {
2924         if (type == RTE_ETH_FILTER_GENERIC && op == RTE_ETH_FILTER_GET) {
2925                 *(const void **)arg = &bond_flow_ops;
2926                 return 0;
2927         }
2928         return -ENOTSUP;
2929 }
2930
2931 static int
2932 bond_ethdev_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac_addr,
2933                                 __rte_unused uint32_t index, uint32_t vmdq)
2934 {
2935         struct rte_eth_dev *slave_eth_dev;
2936         struct bond_dev_private *internals = dev->data->dev_private;
2937         int ret, i;
2938
2939         rte_spinlock_lock(&internals->lock);
2940
2941         for (i = 0; i < internals->slave_count; i++) {
2942                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2943                 if (*slave_eth_dev->dev_ops->mac_addr_add == NULL ||
2944                          *slave_eth_dev->dev_ops->mac_addr_remove == NULL) {
2945                         ret = -ENOTSUP;
2946                         goto end;
2947                 }
2948         }
2949
2950         for (i = 0; i < internals->slave_count; i++) {
2951                 ret = rte_eth_dev_mac_addr_add(internals->slaves[i].port_id,
2952                                 mac_addr, vmdq);
2953                 if (ret < 0) {
2954                         /* rollback */
2955                         for (i--; i >= 0; i--)
2956                                 rte_eth_dev_mac_addr_remove(
2957                                         internals->slaves[i].port_id, mac_addr);
2958                         goto end;
2959                 }
2960         }
2961
2962         ret = 0;
2963 end:
2964         rte_spinlock_unlock(&internals->lock);
2965         return ret;
2966 }
2967
2968 static void
2969 bond_ethdev_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
2970 {
2971         struct rte_eth_dev *slave_eth_dev;
2972         struct bond_dev_private *internals = dev->data->dev_private;
2973         int i;
2974
2975         rte_spinlock_lock(&internals->lock);
2976
2977         for (i = 0; i < internals->slave_count; i++) {
2978                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2979                 if (*slave_eth_dev->dev_ops->mac_addr_remove == NULL)
2980                         goto end;
2981         }
2982
2983         struct ether_addr *mac_addr = &dev->data->mac_addrs[index];
2984
2985         for (i = 0; i < internals->slave_count; i++)
2986                 rte_eth_dev_mac_addr_remove(internals->slaves[i].port_id,
2987                                 mac_addr);
2988
2989 end:
2990         rte_spinlock_unlock(&internals->lock);
2991 }
2992
2993 const struct eth_dev_ops default_dev_ops = {
2994         .dev_start            = bond_ethdev_start,
2995         .dev_stop             = bond_ethdev_stop,
2996         .dev_close            = bond_ethdev_close,
2997         .dev_configure        = bond_ethdev_configure,
2998         .dev_infos_get        = bond_ethdev_info,
2999         .vlan_filter_set      = bond_ethdev_vlan_filter_set,
3000         .rx_queue_setup       = bond_ethdev_rx_queue_setup,
3001         .tx_queue_setup       = bond_ethdev_tx_queue_setup,
3002         .rx_queue_release     = bond_ethdev_rx_queue_release,
3003         .tx_queue_release     = bond_ethdev_tx_queue_release,
3004         .link_update          = bond_ethdev_link_update,
3005         .stats_get            = bond_ethdev_stats_get,
3006         .stats_reset          = bond_ethdev_stats_reset,
3007         .promiscuous_enable   = bond_ethdev_promiscuous_enable,
3008         .promiscuous_disable  = bond_ethdev_promiscuous_disable,
3009         .reta_update          = bond_ethdev_rss_reta_update,
3010         .reta_query           = bond_ethdev_rss_reta_query,
3011         .rss_hash_update      = bond_ethdev_rss_hash_update,
3012         .rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get,
3013         .mtu_set              = bond_ethdev_mtu_set,
3014         .mac_addr_set         = bond_ethdev_mac_address_set,
3015         .mac_addr_add         = bond_ethdev_mac_addr_add,
3016         .mac_addr_remove      = bond_ethdev_mac_addr_remove,
3017         .filter_ctrl          = bond_filter_ctrl
3018 };
3019
3020 static int
3021 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
3022 {
3023         const char *name = rte_vdev_device_name(dev);
3024         uint8_t socket_id = dev->device.numa_node;
3025         struct bond_dev_private *internals = NULL;
3026         struct rte_eth_dev *eth_dev = NULL;
3027         uint32_t vlan_filter_bmp_size;
3028
3029         /* now do all data allocation - for eth_dev structure, dummy pci driver
3030          * and internal (private) data
3031          */
3032
3033         /* reserve an ethdev entry */
3034         eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
3035         if (eth_dev == NULL) {
3036                 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
3037                 goto err;
3038         }
3039
3040         internals = eth_dev->data->dev_private;
3041         eth_dev->data->nb_rx_queues = (uint16_t)1;
3042         eth_dev->data->nb_tx_queues = (uint16_t)1;
3043
3044         /* Allocate memory for storing MAC addresses */
3045         eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN *
3046                         BOND_MAX_MAC_ADDRS, 0, socket_id);
3047         if (eth_dev->data->mac_addrs == NULL) {
3048                 RTE_BOND_LOG(ERR,
3049                              "Failed to allocate %u bytes needed to store MAC addresses",
3050                              ETHER_ADDR_LEN * BOND_MAX_MAC_ADDRS);
3051                 goto err;
3052         }
3053
3054         eth_dev->dev_ops = &default_dev_ops;
3055         eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC;
3056
3057         rte_spinlock_init(&internals->lock);
3058         rte_spinlock_init(&internals->lsc_lock);
3059
3060         internals->port_id = eth_dev->data->port_id;
3061         internals->mode = BONDING_MODE_INVALID;
3062         internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
3063         internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
3064         internals->burst_xmit_hash = burst_xmit_l2_hash;
3065         internals->user_defined_mac = 0;
3066
3067         internals->link_status_polling_enabled = 0;
3068
3069         internals->link_status_polling_interval_ms =
3070                 DEFAULT_POLLING_INTERVAL_10_MS;
3071         internals->link_down_delay_ms = 0;
3072         internals->link_up_delay_ms = 0;
3073
3074         internals->slave_count = 0;
3075         internals->active_slave_count = 0;
3076         internals->rx_offload_capa = 0;
3077         internals->tx_offload_capa = 0;
3078         internals->rx_queue_offload_capa = 0;
3079         internals->tx_queue_offload_capa = 0;
3080         internals->candidate_max_rx_pktlen = 0;
3081         internals->max_rx_pktlen = 0;
3082
3083         /* Initially allow to choose any offload type */
3084         internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
3085
3086         memset(&internals->default_rxconf, 0,
3087                sizeof(internals->default_rxconf));
3088         memset(&internals->default_txconf, 0,
3089                sizeof(internals->default_txconf));
3090
3091         memset(&internals->rx_desc_lim, 0, sizeof(internals->rx_desc_lim));
3092         memset(&internals->tx_desc_lim, 0, sizeof(internals->tx_desc_lim));
3093
3094         memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
3095         memset(internals->slaves, 0, sizeof(internals->slaves));
3096
3097         TAILQ_INIT(&internals->flow_list);
3098         internals->flow_isolated_valid = 0;
3099
3100         /* Set mode 4 default configuration */
3101         bond_mode_8023ad_setup(eth_dev, NULL);
3102         if (bond_ethdev_mode_set(eth_dev, mode)) {
3103                 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode to %d",
3104                                  eth_dev->data->port_id, mode);
3105                 goto err;
3106         }
3107
3108         vlan_filter_bmp_size =
3109                 rte_bitmap_get_memory_footprint(ETHER_MAX_VLAN_ID + 1);
3110         internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
3111                                                    RTE_CACHE_LINE_SIZE);
3112         if (internals->vlan_filter_bmpmem == NULL) {
3113                 RTE_BOND_LOG(ERR,
3114                              "Failed to allocate vlan bitmap for bonded device %u",
3115                              eth_dev->data->port_id);
3116                 goto err;
3117         }
3118
3119         internals->vlan_filter_bmp = rte_bitmap_init(ETHER_MAX_VLAN_ID + 1,
3120                         internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
3121         if (internals->vlan_filter_bmp == NULL) {
3122                 RTE_BOND_LOG(ERR,
3123                              "Failed to init vlan bitmap for bonded device %u",
3124                              eth_dev->data->port_id);
3125                 rte_free(internals->vlan_filter_bmpmem);
3126                 goto err;
3127         }
3128
3129         return eth_dev->data->port_id;
3130
3131 err:
3132         rte_free(internals);
3133         if (eth_dev != NULL)
3134                 eth_dev->data->dev_private = NULL;
3135         rte_eth_dev_release_port(eth_dev);
3136         return -1;
3137 }
3138
3139 static int
3140 bond_probe(struct rte_vdev_device *dev)
3141 {
3142         const char *name;
3143         struct bond_dev_private *internals;
3144         struct rte_kvargs *kvlist;
3145         uint8_t bonding_mode, socket_id/*, agg_mode*/;
3146         int  arg_count, port_id;
3147         uint8_t agg_mode;
3148         struct rte_eth_dev *eth_dev;
3149
3150         if (!dev)
3151                 return -EINVAL;
3152
3153         name = rte_vdev_device_name(dev);
3154         RTE_BOND_LOG(INFO, "Initializing pmd_bond for %s", name);
3155
3156         if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
3157                 eth_dev = rte_eth_dev_attach_secondary(name);
3158                 if (!eth_dev) {
3159                         RTE_BOND_LOG(ERR, "Failed to probe %s", name);
3160                         return -1;
3161                 }
3162                 /* TODO: request info from primary to set up Rx and Tx */
3163                 eth_dev->dev_ops = &default_dev_ops;
3164                 eth_dev->device = &dev->device;
3165                 rte_eth_dev_probing_finish(eth_dev);
3166                 return 0;
3167         }
3168
3169         kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
3170                 pmd_bond_init_valid_arguments);
3171         if (kvlist == NULL)
3172                 return -1;
3173
3174         /* Parse link bonding mode */
3175         if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
3176                 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
3177                                 &bond_ethdev_parse_slave_mode_kvarg,
3178                                 &bonding_mode) != 0) {
3179                         RTE_BOND_LOG(ERR, "Invalid mode for bonded device %s",
3180                                         name);
3181                         goto parse_error;
3182                 }
3183         } else {
3184                 RTE_BOND_LOG(ERR, "Mode must be specified only once for bonded "
3185                                 "device %s", name);
3186                 goto parse_error;
3187         }
3188
3189         /* Parse socket id to create bonding device on */
3190         arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
3191         if (arg_count == 1) {
3192                 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
3193                                 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
3194                                 != 0) {
3195                         RTE_BOND_LOG(ERR, "Invalid socket Id specified for "
3196                                         "bonded device %s", name);
3197                         goto parse_error;
3198                 }
3199         } else if (arg_count > 1) {
3200                 RTE_BOND_LOG(ERR, "Socket Id can be specified only once for "
3201                                 "bonded device %s", name);
3202                 goto parse_error;
3203         } else {
3204                 socket_id = rte_socket_id();
3205         }
3206
3207         dev->device.numa_node = socket_id;
3208
3209         /* Create link bonding eth device */
3210         port_id = bond_alloc(dev, bonding_mode);
3211         if (port_id < 0) {
3212                 RTE_BOND_LOG(ERR, "Failed to create socket %s in mode %u on "
3213                                 "socket %u.",   name, bonding_mode, socket_id);
3214                 goto parse_error;
3215         }
3216         internals = rte_eth_devices[port_id].data->dev_private;
3217         internals->kvlist = kvlist;
3218
3219         rte_eth_dev_probing_finish(&rte_eth_devices[port_id]);
3220
3221         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3222                 if (rte_kvargs_process(kvlist,
3223                                 PMD_BOND_AGG_MODE_KVARG,
3224                                 &bond_ethdev_parse_slave_agg_mode_kvarg,
3225                                 &agg_mode) != 0) {
3226                         RTE_BOND_LOG(ERR,
3227                                         "Failed to parse agg selection mode for bonded device %s",
3228                                         name);
3229                         goto parse_error;
3230                 }
3231
3232                 if (internals->mode == BONDING_MODE_8023AD)
3233                         rte_eth_bond_8023ad_agg_selection_set(port_id,
3234                                         agg_mode);
3235         } else {
3236                 rte_eth_bond_8023ad_agg_selection_set(port_id, AGG_STABLE);
3237         }
3238
3239         RTE_BOND_LOG(INFO, "Create bonded device %s on port %d in mode %u on "
3240                         "socket %u.",   name, port_id, bonding_mode, socket_id);
3241         return 0;
3242
3243 parse_error:
3244         rte_kvargs_free(kvlist);
3245
3246         return -1;
3247 }
3248
3249 static int
3250 bond_remove(struct rte_vdev_device *dev)
3251 {
3252         struct rte_eth_dev *eth_dev;
3253         struct bond_dev_private *internals;
3254         const char *name;
3255
3256         if (!dev)
3257                 return -EINVAL;
3258
3259         name = rte_vdev_device_name(dev);
3260         RTE_BOND_LOG(INFO, "Uninitializing pmd_bond for %s", name);
3261
3262         /* now free all data allocation - for eth_dev structure,
3263          * dummy pci driver and internal (private) data
3264          */
3265
3266         /* find an ethdev entry */
3267         eth_dev = rte_eth_dev_allocated(name);
3268         if (eth_dev == NULL)
3269                 return -ENODEV;
3270
3271         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
3272                 return rte_eth_dev_release_port(eth_dev);
3273
3274         RTE_ASSERT(eth_dev->device == &dev->device);
3275
3276         internals = eth_dev->data->dev_private;
3277         if (internals->slave_count != 0)
3278                 return -EBUSY;
3279
3280         if (eth_dev->data->dev_started == 1) {
3281                 bond_ethdev_stop(eth_dev);
3282                 bond_ethdev_close(eth_dev);
3283         }
3284
3285         eth_dev->dev_ops = NULL;
3286         eth_dev->rx_pkt_burst = NULL;
3287         eth_dev->tx_pkt_burst = NULL;
3288
3289         internals = eth_dev->data->dev_private;
3290         /* Try to release mempool used in mode6. If the bond
3291          * device is not mode6, free the NULL is not problem.
3292          */
3293         rte_mempool_free(internals->mode6.mempool);
3294         rte_bitmap_free(internals->vlan_filter_bmp);
3295         rte_free(internals->vlan_filter_bmpmem);
3296
3297         rte_eth_dev_release_port(eth_dev);
3298
3299         return 0;
3300 }
3301
3302 /* this part will resolve the slave portids after all the other pdev and vdev
3303  * have been allocated */
3304 static int
3305 bond_ethdev_configure(struct rte_eth_dev *dev)
3306 {
3307         const char *name = dev->device->name;
3308         struct bond_dev_private *internals = dev->data->dev_private;
3309         struct rte_kvargs *kvlist = internals->kvlist;
3310         int arg_count;
3311         uint16_t port_id = dev - rte_eth_devices;
3312         uint8_t agg_mode;
3313
3314         static const uint8_t default_rss_key[40] = {
3315                 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3316                 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3317                 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3318                 0xBE, 0xAC, 0x01, 0xFA
3319         };
3320
3321         unsigned i, j;
3322
3323         /*
3324          * If RSS is enabled, fill table with default values and
3325          * set key to the the value specified in port RSS configuration.
3326          * Fall back to default RSS key if the key is not specified
3327          */
3328         if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
3329                 if (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key != NULL) {
3330                         internals->rss_key_len =
3331                                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
3332                         memcpy(internals->rss_key,
3333                                dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key,
3334                                internals->rss_key_len);
3335                 } else {
3336                         internals->rss_key_len = sizeof(default_rss_key);
3337                         memcpy(internals->rss_key, default_rss_key,
3338                                internals->rss_key_len);
3339                 }
3340
3341                 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3342                         internals->reta_conf[i].mask = ~0LL;
3343                         for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3344                                 internals->reta_conf[i].reta[j] =
3345                                                 (i * RTE_RETA_GROUP_SIZE + j) %
3346                                                 dev->data->nb_rx_queues;
3347                 }
3348         }
3349
3350         /* set the max_rx_pktlen */
3351         internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3352
3353         /*
3354          * if no kvlist, it means that this bonded device has been created
3355          * through the bonding api.
3356          */
3357         if (!kvlist)
3358                 return 0;
3359
3360         /* Parse MAC address for bonded device */
3361         arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3362         if (arg_count == 1) {
3363                 struct ether_addr bond_mac;
3364
3365                 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3366                                        &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3367                         RTE_BOND_LOG(INFO, "Invalid mac address for bonded device %s",
3368                                      name);
3369                         return -1;
3370                 }
3371
3372                 /* Set MAC address */
3373                 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3374                         RTE_BOND_LOG(ERR,
3375                                      "Failed to set mac address on bonded device %s",
3376                                      name);
3377                         return -1;
3378                 }
3379         } else if (arg_count > 1) {
3380                 RTE_BOND_LOG(ERR,
3381                              "MAC address can be specified only once for bonded device %s",
3382                              name);
3383                 return -1;
3384         }
3385
3386         /* Parse/set balance mode transmit policy */
3387         arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3388         if (arg_count == 1) {
3389                 uint8_t xmit_policy;
3390
3391                 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3392                                        &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3393                     0) {
3394                         RTE_BOND_LOG(INFO,
3395                                      "Invalid xmit policy specified for bonded device %s",
3396                                      name);
3397                         return -1;
3398                 }
3399
3400                 /* Set balance mode transmit policy*/
3401                 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3402                         RTE_BOND_LOG(ERR,
3403                                      "Failed to set balance xmit policy on bonded device %s",
3404                                      name);
3405                         return -1;
3406                 }
3407         } else if (arg_count > 1) {
3408                 RTE_BOND_LOG(ERR,
3409                              "Transmit policy can be specified only once for bonded device %s",
3410                              name);
3411                 return -1;
3412         }
3413
3414         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3415                 if (rte_kvargs_process(kvlist,
3416                                        PMD_BOND_AGG_MODE_KVARG,
3417                                        &bond_ethdev_parse_slave_agg_mode_kvarg,
3418                                        &agg_mode) != 0) {
3419                         RTE_BOND_LOG(ERR,
3420                                      "Failed to parse agg selection mode for bonded device %s",
3421                                      name);
3422                 }
3423                 if (internals->mode == BONDING_MODE_8023AD)
3424                         rte_eth_bond_8023ad_agg_selection_set(port_id,
3425                                                               agg_mode);
3426         }
3427
3428         /* Parse/add slave ports to bonded device */
3429         if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3430                 struct bond_ethdev_slave_ports slave_ports;
3431                 unsigned i;
3432
3433                 memset(&slave_ports, 0, sizeof(slave_ports));
3434
3435                 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3436                                        &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3437                         RTE_BOND_LOG(ERR,
3438                                      "Failed to parse slave ports for bonded device %s",
3439                                      name);
3440                         return -1;
3441                 }
3442
3443                 for (i = 0; i < slave_ports.slave_count; i++) {
3444                         if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3445                                 RTE_BOND_LOG(ERR,
3446                                              "Failed to add port %d as slave to bonded device %s",
3447                                              slave_ports.slaves[i], name);
3448                         }
3449                 }
3450
3451         } else {
3452                 RTE_BOND_LOG(INFO, "No slaves specified for bonded device %s", name);
3453                 return -1;
3454         }
3455
3456         /* Parse/set primary slave port id*/
3457         arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3458         if (arg_count == 1) {
3459                 uint16_t primary_slave_port_id;
3460
3461                 if (rte_kvargs_process(kvlist,
3462                                        PMD_BOND_PRIMARY_SLAVE_KVARG,
3463                                        &bond_ethdev_parse_primary_slave_port_id_kvarg,
3464                                        &primary_slave_port_id) < 0) {
3465                         RTE_BOND_LOG(INFO,
3466                                      "Invalid primary slave port id specified for bonded device %s",
3467                                      name);
3468                         return -1;
3469                 }
3470
3471                 /* Set balance mode transmit policy*/
3472                 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3473                     != 0) {
3474                         RTE_BOND_LOG(ERR,
3475                                      "Failed to set primary slave port %d on bonded device %s",
3476                                      primary_slave_port_id, name);
3477                         return -1;
3478                 }
3479         } else if (arg_count > 1) {
3480                 RTE_BOND_LOG(INFO,
3481                              "Primary slave can be specified only once for bonded device %s",
3482                              name);
3483                 return -1;
3484         }
3485
3486         /* Parse link status monitor polling interval */
3487         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3488         if (arg_count == 1) {
3489                 uint32_t lsc_poll_interval_ms;
3490
3491                 if (rte_kvargs_process(kvlist,
3492                                        PMD_BOND_LSC_POLL_PERIOD_KVARG,
3493                                        &bond_ethdev_parse_time_ms_kvarg,
3494                                        &lsc_poll_interval_ms) < 0) {
3495                         RTE_BOND_LOG(INFO,
3496                                      "Invalid lsc polling interval value specified for bonded"
3497                                      " device %s", name);
3498                         return -1;
3499                 }
3500
3501                 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3502                     != 0) {
3503                         RTE_BOND_LOG(ERR,
3504                                      "Failed to set lsc monitor polling interval (%u ms) on bonded device %s",
3505                                      lsc_poll_interval_ms, name);
3506                         return -1;
3507                 }
3508         } else if (arg_count > 1) {
3509                 RTE_BOND_LOG(INFO,
3510                              "LSC polling interval can be specified only once for bonded"
3511                              " device %s", name);
3512                 return -1;
3513         }
3514
3515         /* Parse link up interrupt propagation delay */
3516         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3517         if (arg_count == 1) {
3518                 uint32_t link_up_delay_ms;
3519
3520                 if (rte_kvargs_process(kvlist,
3521                                        PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3522                                        &bond_ethdev_parse_time_ms_kvarg,
3523                                        &link_up_delay_ms) < 0) {
3524                         RTE_BOND_LOG(INFO,
3525                                      "Invalid link up propagation delay value specified for"
3526                                      " bonded device %s", name);
3527                         return -1;
3528                 }
3529
3530                 /* Set balance mode transmit policy*/
3531                 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3532                     != 0) {
3533                         RTE_BOND_LOG(ERR,
3534                                      "Failed to set link up propagation delay (%u ms) on bonded"
3535                                      " device %s", link_up_delay_ms, name);
3536                         return -1;
3537                 }
3538         } else if (arg_count > 1) {
3539                 RTE_BOND_LOG(INFO,
3540                              "Link up propagation delay can be specified only once for"
3541                              " bonded device %s", name);
3542                 return -1;
3543         }
3544
3545         /* Parse link down interrupt propagation delay */
3546         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3547         if (arg_count == 1) {
3548                 uint32_t link_down_delay_ms;
3549
3550                 if (rte_kvargs_process(kvlist,
3551                                        PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3552                                        &bond_ethdev_parse_time_ms_kvarg,
3553                                        &link_down_delay_ms) < 0) {
3554                         RTE_BOND_LOG(INFO,
3555                                      "Invalid link down propagation delay value specified for"
3556                                      " bonded device %s", name);
3557                         return -1;
3558                 }
3559
3560                 /* Set balance mode transmit policy*/
3561                 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3562                     != 0) {
3563                         RTE_BOND_LOG(ERR,
3564                                      "Failed to set link down propagation delay (%u ms) on bonded device %s",
3565                                      link_down_delay_ms, name);
3566                         return -1;
3567                 }
3568         } else if (arg_count > 1) {
3569                 RTE_BOND_LOG(INFO,
3570                              "Link down propagation delay can be specified only once for  bonded device %s",
3571                              name);
3572                 return -1;
3573         }
3574
3575         return 0;
3576 }
3577
3578 struct rte_vdev_driver pmd_bond_drv = {
3579         .probe = bond_probe,
3580         .remove = bond_remove,
3581 };
3582
3583 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3584 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3585
3586 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3587         "slave=<ifc> "
3588         "primary=<ifc> "
3589         "mode=[0-6] "
3590         "xmit_policy=[l2 | l23 | l34] "
3591         "agg_mode=[count | stable | bandwidth] "
3592         "socket_id=<int> "
3593         "mac=<mac addr> "
3594         "lsc_poll_period_ms=<int> "
3595         "up_delay=<int> "
3596         "down_delay=<int>");
3597
3598 int bond_logtype;
3599
3600 RTE_INIT(bond_init_log)
3601 {
3602         bond_logtype = rte_log_register("pmd.net.bond");
3603         if (bond_logtype >= 0)
3604                 rte_log_set_level(bond_logtype, RTE_LOG_NOTICE);
3605 }