New upstream version 18.02
[deb_dpdk.git] / drivers / net / bonding / rte_eth_bond_pmd.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 #include <stdlib.h>
5 #include <netinet/in.h>
6
7 #include <rte_mbuf.h>
8 #include <rte_malloc.h>
9 #include <rte_ethdev_driver.h>
10 #include <rte_ethdev_vdev.h>
11 #include <rte_tcp.h>
12 #include <rte_udp.h>
13 #include <rte_ip.h>
14 #include <rte_ip_frag.h>
15 #include <rte_devargs.h>
16 #include <rte_kvargs.h>
17 #include <rte_bus_vdev.h>
18 #include <rte_alarm.h>
19 #include <rte_cycles.h>
20
21 #include "rte_eth_bond.h"
22 #include "rte_eth_bond_private.h"
23 #include "rte_eth_bond_8023ad_private.h"
24
25 #define REORDER_PERIOD_MS 10
26 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
27
28 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
29
30 /* Table for statistics in mode 5 TLB */
31 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
32
33 static inline size_t
34 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
35 {
36         size_t vlan_offset = 0;
37
38         if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
39                 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
40
41                 vlan_offset = sizeof(struct vlan_hdr);
42                 *proto = vlan_hdr->eth_proto;
43
44                 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
45                         vlan_hdr = vlan_hdr + 1;
46                         *proto = vlan_hdr->eth_proto;
47                         vlan_offset += sizeof(struct vlan_hdr);
48                 }
49         }
50         return vlan_offset;
51 }
52
53 static uint16_t
54 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
55 {
56         struct bond_dev_private *internals;
57
58         uint16_t num_rx_slave = 0;
59         uint16_t num_rx_total = 0;
60
61         int i;
62
63         /* Cast to structure, containing bonded device's port id and queue id */
64         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
65
66         internals = bd_rx_q->dev_private;
67
68
69         for (i = 0; i < internals->active_slave_count && nb_pkts; i++) {
70                 /* Offset of pointer to *bufs increases as packets are received
71                  * from other slaves */
72                 num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
73                                 bd_rx_q->queue_id, bufs + num_rx_total, nb_pkts);
74                 if (num_rx_slave) {
75                         num_rx_total += num_rx_slave;
76                         nb_pkts -= num_rx_slave;
77                 }
78         }
79
80         return num_rx_total;
81 }
82
83 static uint16_t
84 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
85                 uint16_t nb_pkts)
86 {
87         struct bond_dev_private *internals;
88
89         /* Cast to structure, containing bonded device's port id and queue id */
90         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
91
92         internals = bd_rx_q->dev_private;
93
94         return rte_eth_rx_burst(internals->current_primary_port,
95                         bd_rx_q->queue_id, bufs, nb_pkts);
96 }
97
98 static inline uint8_t
99 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
100 {
101         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
102
103         return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
104                 (ethertype == ether_type_slow_be &&
105                 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
106 }
107
108 /*****************************************************************************
109  * Flow director's setup for mode 4 optimization
110  */
111
112 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
113         .dst.addr_bytes = { 0 },
114         .src.addr_bytes = { 0 },
115         .type = RTE_BE16(ETHER_TYPE_SLOW),
116 };
117
118 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
119         .dst.addr_bytes = { 0 },
120         .src.addr_bytes = { 0 },
121         .type = 0xFFFF,
122 };
123
124 static struct rte_flow_item flow_item_8023ad[] = {
125         {
126                 .type = RTE_FLOW_ITEM_TYPE_ETH,
127                 .spec = &flow_item_eth_type_8023ad,
128                 .last = NULL,
129                 .mask = &flow_item_eth_mask_type_8023ad,
130         },
131         {
132                 .type = RTE_FLOW_ITEM_TYPE_END,
133                 .spec = NULL,
134                 .last = NULL,
135                 .mask = NULL,
136         }
137 };
138
139 const struct rte_flow_attr flow_attr_8023ad = {
140         .group = 0,
141         .priority = 0,
142         .ingress = 1,
143         .egress = 0,
144         .reserved = 0,
145 };
146
147 int
148 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
149                 uint16_t slave_port) {
150         struct rte_eth_dev_info slave_info;
151         struct rte_flow_error error;
152         struct bond_dev_private *internals = (struct bond_dev_private *)
153                         (bond_dev->data->dev_private);
154
155         const struct rte_flow_action_queue lacp_queue_conf = {
156                 .index = 0,
157         };
158
159         const struct rte_flow_action actions[] = {
160                 {
161                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
162                         .conf = &lacp_queue_conf
163                 },
164                 {
165                         .type = RTE_FLOW_ACTION_TYPE_END,
166                 }
167         };
168
169         int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
170                         flow_item_8023ad, actions, &error);
171         if (ret < 0) {
172                 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
173                                 __func__, error.message, slave_port,
174                                 internals->mode4.dedicated_queues.rx_qid);
175                 return -1;
176         }
177
178         rte_eth_dev_info_get(slave_port, &slave_info);
179         if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
180                         slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
181                 RTE_BOND_LOG(ERR,
182                         "%s: Slave %d capabilities doesn't allow to allocate additional queues",
183                         __func__, slave_port);
184                 return -1;
185         }
186
187         return 0;
188 }
189
190 int
191 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
192         struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
193         struct bond_dev_private *internals = (struct bond_dev_private *)
194                         (bond_dev->data->dev_private);
195         struct rte_eth_dev_info bond_info;
196         uint16_t idx;
197
198         /* Verify if all slaves in bonding supports flow director and */
199         if (internals->slave_count > 0) {
200                 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
201
202                 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
203                 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
204
205                 for (idx = 0; idx < internals->slave_count; idx++) {
206                         if (bond_ethdev_8023ad_flow_verify(bond_dev,
207                                         internals->slaves[idx].port_id) != 0)
208                                 return -1;
209                 }
210         }
211
212         return 0;
213 }
214
215 int
216 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
217
218         struct rte_flow_error error;
219         struct bond_dev_private *internals = (struct bond_dev_private *)
220                         (bond_dev->data->dev_private);
221
222         struct rte_flow_action_queue lacp_queue_conf = {
223                 .index = internals->mode4.dedicated_queues.rx_qid,
224         };
225
226         const struct rte_flow_action actions[] = {
227                 {
228                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
229                         .conf = &lacp_queue_conf
230                 },
231                 {
232                         .type = RTE_FLOW_ACTION_TYPE_END,
233                 }
234         };
235
236         internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
237                         &flow_attr_8023ad, flow_item_8023ad, actions, &error);
238         if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
239                 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
240                                 "(slave_port=%d queue_id=%d)",
241                                 error.message, slave_port,
242                                 internals->mode4.dedicated_queues.rx_qid);
243                 return -1;
244         }
245
246         return 0;
247 }
248
249 static uint16_t
250 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
251                 uint16_t nb_pkts)
252 {
253         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
254         struct bond_dev_private *internals = bd_rx_q->dev_private;
255         uint16_t num_rx_total = 0;      /* Total number of received packets */
256         uint16_t slaves[RTE_MAX_ETHPORTS];
257         uint16_t slave_count;
258
259         uint16_t i, idx;
260
261         /* Copy slave list to protect against slave up/down changes during tx
262          * bursting */
263         slave_count = internals->active_slave_count;
264         memcpy(slaves, internals->active_slaves,
265                         sizeof(internals->active_slaves[0]) * slave_count);
266
267         for (i = 0, idx = internals->active_slave;
268                         i < slave_count && num_rx_total < nb_pkts; i++, idx++) {
269                 idx = idx % slave_count;
270
271                 /* Read packets from this slave */
272                 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
273                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
274         }
275
276         internals->active_slave = idx;
277
278         return num_rx_total;
279 }
280
281 static uint16_t
282 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
283                 uint16_t nb_bufs)
284 {
285         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
286         struct bond_dev_private *internals = bd_tx_q->dev_private;
287
288         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
289         uint16_t slave_count;
290
291         uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
292         uint16_t dist_slave_count;
293
294         /* 2-D array to sort mbufs for transmission on each slave into */
295         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
296         /* Number of mbufs for transmission on each slave */
297         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
298         /* Mapping array generated by hash function to map mbufs to slaves */
299         uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
300
301         uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
302         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
303
304         uint16_t i, j;
305
306         if (unlikely(nb_bufs == 0))
307                 return 0;
308
309         /* Copy slave list to protect against slave up/down changes during tx
310          * bursting */
311         slave_count = internals->active_slave_count;
312         if (unlikely(slave_count < 1))
313                 return 0;
314
315         memcpy(slave_port_ids, internals->active_slaves,
316                         sizeof(slave_port_ids[0]) * slave_count);
317
318
319         dist_slave_count = 0;
320         for (i = 0; i < slave_count; i++) {
321                 struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
322
323                 if (ACTOR_STATE(port, DISTRIBUTING))
324                         dist_slave_port_ids[dist_slave_count++] =
325                                         slave_port_ids[i];
326         }
327
328         if (unlikely(dist_slave_count < 1))
329                 return 0;
330
331         /*
332          * Populate slaves mbuf with the packets which are to be sent on it
333          * selecting output slave using hash based on xmit policy
334          */
335         internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
336                         bufs_slave_port_idxs);
337
338         for (i = 0; i < nb_bufs; i++) {
339                 /* Populate slave mbuf arrays with mbufs for that slave. */
340                 uint8_t slave_idx = bufs_slave_port_idxs[i];
341
342                 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
343         }
344
345
346         /* Send packet burst on each slave device */
347         for (i = 0; i < dist_slave_count; i++) {
348                 if (slave_nb_bufs[i] == 0)
349                         continue;
350
351                 slave_tx_count = rte_eth_tx_burst(dist_slave_port_ids[i],
352                                 bd_tx_q->queue_id, slave_bufs[i],
353                                 slave_nb_bufs[i]);
354
355                 total_tx_count += slave_tx_count;
356
357                 /* If tx burst fails move packets to end of bufs */
358                 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
359                         slave_tx_fail_count[i] = slave_nb_bufs[i] -
360                                         slave_tx_count;
361                         total_tx_fail_count += slave_tx_fail_count[i];
362
363                         /*
364                          * Shift bufs to beginning of array to allow reordering
365                          * later
366                          */
367                         for (j = 0; j < slave_tx_fail_count[i]; j++) {
368                                 slave_bufs[i][j] =
369                                         slave_bufs[i][(slave_tx_count - 1) + j];
370                         }
371                 }
372         }
373
374         /*
375          * If there are tx burst failures we move packets to end of bufs to
376          * preserve expected PMD behaviour of all failed transmitted being
377          * at the end of the input mbuf array
378          */
379         if (unlikely(total_tx_fail_count > 0)) {
380                 int bufs_idx = nb_bufs - total_tx_fail_count - 1;
381
382                 for (i = 0; i < slave_count; i++) {
383                         if (slave_tx_fail_count[i] > 0) {
384                                 for (j = 0; j < slave_tx_fail_count[i]; j++)
385                                         bufs[bufs_idx++] = slave_bufs[i][j];
386                         }
387                 }
388         }
389
390         return total_tx_count;
391 }
392
393
394 static uint16_t
395 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
396                 uint16_t nb_pkts)
397 {
398         /* Cast to structure, containing bonded device's port id and queue id */
399         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
400         struct bond_dev_private *internals = bd_rx_q->dev_private;
401         struct ether_addr bond_mac;
402
403         struct ether_hdr *hdr;
404
405         const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
406         uint16_t num_rx_total = 0;      /* Total number of received packets */
407         uint16_t slaves[RTE_MAX_ETHPORTS];
408         uint16_t slave_count, idx;
409
410         uint8_t collecting;  /* current slave collecting status */
411         const uint8_t promisc = internals->promiscuous_en;
412         uint8_t i, j, k;
413         uint8_t subtype;
414
415         rte_eth_macaddr_get(internals->port_id, &bond_mac);
416         /* Copy slave list to protect against slave up/down changes during tx
417          * bursting */
418         slave_count = internals->active_slave_count;
419         memcpy(slaves, internals->active_slaves,
420                         sizeof(internals->active_slaves[0]) * slave_count);
421
422         idx = internals->active_slave;
423         if (idx >= slave_count) {
424                 internals->active_slave = 0;
425                 idx = 0;
426         }
427         for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
428                 j = num_rx_total;
429                 collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[idx]],
430                                          COLLECTING);
431
432                 /* Read packets from this slave */
433                 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
434                                 &bufs[num_rx_total], nb_pkts - num_rx_total);
435
436                 for (k = j; k < 2 && k < num_rx_total; k++)
437                         rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
438
439                 /* Handle slow protocol packets. */
440                 while (j < num_rx_total) {
441
442                         /* If packet is not pure L2 and is known, skip it */
443                         if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) {
444                                 j++;
445                                 continue;
446                         }
447
448                         if (j + 3 < num_rx_total)
449                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
450
451                         hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
452                         subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
453
454                         /* Remove packet from array if it is slow packet or slave is not
455                          * in collecting state or bonding interface is not in promiscuous
456                          * mode and packet address does not match. */
457                         if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]) ||
458                                 !collecting || (!promisc &&
459                                         !is_multicast_ether_addr(&hdr->d_addr) &&
460                                         !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
461
462                                 if (hdr->ether_type == ether_type_slow_be) {
463                                         bond_mode_8023ad_handle_slow_pkt(
464                                             internals, slaves[idx], bufs[j]);
465                                 } else
466                                         rte_pktmbuf_free(bufs[j]);
467
468                                 /* Packet is managed by mode 4 or dropped, shift the array */
469                                 num_rx_total--;
470                                 if (j < num_rx_total) {
471                                         memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
472                                                 (num_rx_total - j));
473                                 }
474                         } else
475                                 j++;
476                 }
477                 if (unlikely(++idx == slave_count))
478                         idx = 0;
479         }
480
481         internals->active_slave = idx;
482         return num_rx_total;
483 }
484
485 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
486 uint32_t burstnumberRX;
487 uint32_t burstnumberTX;
488
489 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
490
491 static void
492 arp_op_name(uint16_t arp_op, char *buf)
493 {
494         switch (arp_op) {
495         case ARP_OP_REQUEST:
496                 snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
497                 return;
498         case ARP_OP_REPLY:
499                 snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
500                 return;
501         case ARP_OP_REVREQUEST:
502                 snprintf(buf, sizeof("Reverse ARP Request"), "%s",
503                                 "Reverse ARP Request");
504                 return;
505         case ARP_OP_REVREPLY:
506                 snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
507                                 "Reverse ARP Reply");
508                 return;
509         case ARP_OP_INVREQUEST:
510                 snprintf(buf, sizeof("Peer Identify Request"), "%s",
511                                 "Peer Identify Request");
512                 return;
513         case ARP_OP_INVREPLY:
514                 snprintf(buf, sizeof("Peer Identify Reply"), "%s",
515                                 "Peer Identify Reply");
516                 return;
517         default:
518                 break;
519         }
520         snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
521         return;
522 }
523 #endif
524 #define MaxIPv4String   16
525 static void
526 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
527 {
528         uint32_t ipv4_addr;
529
530         ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
531         snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
532                 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
533                 ipv4_addr & 0xFF);
534 }
535
536 #define MAX_CLIENTS_NUMBER      128
537 uint8_t active_clients;
538 struct client_stats_t {
539         uint16_t port;
540         uint32_t ipv4_addr;
541         uint32_t ipv4_rx_packets;
542         uint32_t ipv4_tx_packets;
543 };
544 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
545
546 static void
547 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
548 {
549         int i = 0;
550
551         for (; i < MAX_CLIENTS_NUMBER; i++)     {
552                 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port))      {
553                         /* Just update RX packets number for this client */
554                         if (TXorRXindicator == &burstnumberRX)
555                                 client_stats[i].ipv4_rx_packets++;
556                         else
557                                 client_stats[i].ipv4_tx_packets++;
558                         return;
559                 }
560         }
561         /* We have a new client. Insert him to the table, and increment stats */
562         if (TXorRXindicator == &burstnumberRX)
563                 client_stats[active_clients].ipv4_rx_packets++;
564         else
565                 client_stats[active_clients].ipv4_tx_packets++;
566         client_stats[active_clients].ipv4_addr = addr;
567         client_stats[active_clients].port = port;
568         active_clients++;
569
570 }
571
572 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
573 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber)     \
574                 RTE_LOG(DEBUG, PMD, \
575                 "%s " \
576                 "port:%d " \
577                 "SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
578                 "SrcIP:%s " \
579                 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
580                 "DstIP:%s " \
581                 "%s " \
582                 "%d\n", \
583                 info, \
584                 port, \
585                 eth_h->s_addr.addr_bytes[0], \
586                 eth_h->s_addr.addr_bytes[1], \
587                 eth_h->s_addr.addr_bytes[2], \
588                 eth_h->s_addr.addr_bytes[3], \
589                 eth_h->s_addr.addr_bytes[4], \
590                 eth_h->s_addr.addr_bytes[5], \
591                 src_ip, \
592                 eth_h->d_addr.addr_bytes[0], \
593                 eth_h->d_addr.addr_bytes[1], \
594                 eth_h->d_addr.addr_bytes[2], \
595                 eth_h->d_addr.addr_bytes[3], \
596                 eth_h->d_addr.addr_bytes[4], \
597                 eth_h->d_addr.addr_bytes[5], \
598                 dst_ip, \
599                 arp_op, \
600                 ++burstnumber)
601 #endif
602
603 static void
604 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
605                 uint16_t port, uint32_t __attribute__((unused)) *burstnumber)
606 {
607         struct ipv4_hdr *ipv4_h;
608 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
609         struct arp_hdr *arp_h;
610         char dst_ip[16];
611         char ArpOp[24];
612         char buf[16];
613 #endif
614         char src_ip[16];
615
616         uint16_t ether_type = eth_h->ether_type;
617         uint16_t offset = get_vlan_offset(eth_h, &ether_type);
618
619 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
620         snprintf(buf, 16, "%s", info);
621 #endif
622
623         if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
624                 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
625                 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
626 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
627                 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
628                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
629 #endif
630                 update_client_stats(ipv4_h->src_addr, port, burstnumber);
631         }
632 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
633         else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
634                 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
635                 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
636                 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
637                 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
638                 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
639         }
640 #endif
641 }
642 #endif
643
644 static uint16_t
645 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
646 {
647         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
648         struct bond_dev_private *internals = bd_tx_q->dev_private;
649         struct ether_hdr *eth_h;
650         uint16_t ether_type, offset;
651         uint16_t nb_recv_pkts;
652         int i;
653
654         nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
655
656         for (i = 0; i < nb_recv_pkts; i++) {
657                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
658                 ether_type = eth_h->ether_type;
659                 offset = get_vlan_offset(eth_h, &ether_type);
660
661                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
662 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
663                         mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
664 #endif
665                         bond_mode_alb_arp_recv(eth_h, offset, internals);
666                 }
667 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
668                 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
669                         mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
670 #endif
671         }
672
673         return nb_recv_pkts;
674 }
675
676 static uint16_t
677 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
678                 uint16_t nb_pkts)
679 {
680         struct bond_dev_private *internals;
681         struct bond_tx_queue *bd_tx_q;
682
683         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
684         uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
685
686         uint16_t num_of_slaves;
687         uint16_t slaves[RTE_MAX_ETHPORTS];
688
689         uint16_t num_tx_total = 0, num_tx_slave;
690
691         static int slave_idx = 0;
692         int i, cslave_idx = 0, tx_fail_total = 0;
693
694         bd_tx_q = (struct bond_tx_queue *)queue;
695         internals = bd_tx_q->dev_private;
696
697         /* Copy slave list to protect against slave up/down changes during tx
698          * bursting */
699         num_of_slaves = internals->active_slave_count;
700         memcpy(slaves, internals->active_slaves,
701                         sizeof(internals->active_slaves[0]) * num_of_slaves);
702
703         if (num_of_slaves < 1)
704                 return num_tx_total;
705
706         /* Populate slaves mbuf with which packets are to be sent on it  */
707         for (i = 0; i < nb_pkts; i++) {
708                 cslave_idx = (slave_idx + i) % num_of_slaves;
709                 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
710         }
711
712         /* increment current slave index so the next call to tx burst starts on the
713          * next slave */
714         slave_idx = ++cslave_idx;
715
716         /* Send packet burst on each slave device */
717         for (i = 0; i < num_of_slaves; i++) {
718                 if (slave_nb_pkts[i] > 0) {
719                         num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
720                                         slave_bufs[i], slave_nb_pkts[i]);
721
722                         /* if tx burst fails move packets to end of bufs */
723                         if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
724                                 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
725
726                                 tx_fail_total += tx_fail_slave;
727
728                                 memcpy(&bufs[nb_pkts - tx_fail_total],
729                                                 &slave_bufs[i][num_tx_slave],
730                                                 tx_fail_slave * sizeof(bufs[0]));
731                         }
732                         num_tx_total += num_tx_slave;
733                 }
734         }
735
736         return num_tx_total;
737 }
738
739 static uint16_t
740 bond_ethdev_tx_burst_active_backup(void *queue,
741                 struct rte_mbuf **bufs, uint16_t nb_pkts)
742 {
743         struct bond_dev_private *internals;
744         struct bond_tx_queue *bd_tx_q;
745
746         bd_tx_q = (struct bond_tx_queue *)queue;
747         internals = bd_tx_q->dev_private;
748
749         if (internals->active_slave_count < 1)
750                 return 0;
751
752         return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
753                         bufs, nb_pkts);
754 }
755
756 static inline uint16_t
757 ether_hash(struct ether_hdr *eth_hdr)
758 {
759         unaligned_uint16_t *word_src_addr =
760                 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
761         unaligned_uint16_t *word_dst_addr =
762                 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
763
764         return (word_src_addr[0] ^ word_dst_addr[0]) ^
765                         (word_src_addr[1] ^ word_dst_addr[1]) ^
766                         (word_src_addr[2] ^ word_dst_addr[2]);
767 }
768
769 static inline uint32_t
770 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
771 {
772         return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
773 }
774
775 static inline uint32_t
776 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
777 {
778         unaligned_uint32_t *word_src_addr =
779                 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
780         unaligned_uint32_t *word_dst_addr =
781                 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
782
783         return (word_src_addr[0] ^ word_dst_addr[0]) ^
784                         (word_src_addr[1] ^ word_dst_addr[1]) ^
785                         (word_src_addr[2] ^ word_dst_addr[2]) ^
786                         (word_src_addr[3] ^ word_dst_addr[3]);
787 }
788
789
790 void
791 burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
792                 uint8_t slave_count, uint16_t *slaves)
793 {
794         struct ether_hdr *eth_hdr;
795         uint32_t hash;
796         int i;
797
798         for (i = 0; i < nb_pkts; i++) {
799                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
800
801                 hash = ether_hash(eth_hdr);
802
803                 slaves[i] = (hash ^= hash >> 8) % slave_count;
804         }
805 }
806
807 void
808 burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
809                 uint8_t slave_count, uint16_t *slaves)
810 {
811         uint16_t i;
812         struct ether_hdr *eth_hdr;
813         uint16_t proto;
814         size_t vlan_offset;
815         uint32_t hash, l3hash;
816
817         for (i = 0; i < nb_pkts; i++) {
818                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
819                 l3hash = 0;
820
821                 proto = eth_hdr->ether_type;
822                 hash = ether_hash(eth_hdr);
823
824                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
825
826                 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
827                         struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
828                                         ((char *)(eth_hdr + 1) + vlan_offset);
829                         l3hash = ipv4_hash(ipv4_hdr);
830
831                 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
832                         struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
833                                         ((char *)(eth_hdr + 1) + vlan_offset);
834                         l3hash = ipv6_hash(ipv6_hdr);
835                 }
836
837                 hash = hash ^ l3hash;
838                 hash ^= hash >> 16;
839                 hash ^= hash >> 8;
840
841                 slaves[i] = hash % slave_count;
842         }
843 }
844
845 void
846 burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
847                 uint8_t slave_count, uint16_t *slaves)
848 {
849         struct ether_hdr *eth_hdr;
850         uint16_t proto;
851         size_t vlan_offset;
852         int i;
853
854         struct udp_hdr *udp_hdr;
855         struct tcp_hdr *tcp_hdr;
856         uint32_t hash, l3hash, l4hash;
857
858         for (i = 0; i < nb_pkts; i++) {
859                 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
860                 proto = eth_hdr->ether_type;
861                 vlan_offset = get_vlan_offset(eth_hdr, &proto);
862                 l3hash = 0;
863                 l4hash = 0;
864
865                 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
866                         struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
867                                         ((char *)(eth_hdr + 1) + vlan_offset);
868                         size_t ip_hdr_offset;
869
870                         l3hash = ipv4_hash(ipv4_hdr);
871
872                         /* there is no L4 header in fragmented packet */
873                         if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)
874                                                                 == 0)) {
875                                 ip_hdr_offset = (ipv4_hdr->version_ihl
876                                         & IPV4_HDR_IHL_MASK) *
877                                         IPV4_IHL_MULTIPLIER;
878
879                                 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
880                                         tcp_hdr = (struct tcp_hdr *)
881                                                 ((char *)ipv4_hdr +
882                                                         ip_hdr_offset);
883                                         l4hash = HASH_L4_PORTS(tcp_hdr);
884                                 } else if (ipv4_hdr->next_proto_id ==
885                                                                 IPPROTO_UDP) {
886                                         udp_hdr = (struct udp_hdr *)
887                                                 ((char *)ipv4_hdr +
888                                                         ip_hdr_offset);
889                                         l4hash = HASH_L4_PORTS(udp_hdr);
890                                 }
891                         }
892                 } else if  (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
893                         struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
894                                         ((char *)(eth_hdr + 1) + vlan_offset);
895                         l3hash = ipv6_hash(ipv6_hdr);
896
897                         if (ipv6_hdr->proto == IPPROTO_TCP) {
898                                 tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
899                                 l4hash = HASH_L4_PORTS(tcp_hdr);
900                         } else if (ipv6_hdr->proto == IPPROTO_UDP) {
901                                 udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
902                                 l4hash = HASH_L4_PORTS(udp_hdr);
903                         }
904                 }
905
906                 hash = l3hash ^ l4hash;
907                 hash ^= hash >> 16;
908                 hash ^= hash >> 8;
909
910                 slaves[i] = hash % slave_count;
911         }
912 }
913
914 struct bwg_slave {
915         uint64_t bwg_left_int;
916         uint64_t bwg_left_remainder;
917         uint8_t slave;
918 };
919
920 void
921 bond_tlb_activate_slave(struct bond_dev_private *internals) {
922         int i;
923
924         for (i = 0; i < internals->active_slave_count; i++) {
925                 tlb_last_obytets[internals->active_slaves[i]] = 0;
926         }
927 }
928
929 static int
930 bandwidth_cmp(const void *a, const void *b)
931 {
932         const struct bwg_slave *bwg_a = a;
933         const struct bwg_slave *bwg_b = b;
934         int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
935         int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
936                         (int64_t)bwg_a->bwg_left_remainder;
937         if (diff > 0)
938                 return 1;
939         else if (diff < 0)
940                 return -1;
941         else if (diff2 > 0)
942                 return 1;
943         else if (diff2 < 0)
944                 return -1;
945         else
946                 return 0;
947 }
948
949 static void
950 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
951                 struct bwg_slave *bwg_slave)
952 {
953         struct rte_eth_link link_status;
954
955         rte_eth_link_get_nowait(port_id, &link_status);
956         uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
957         if (link_bwg == 0)
958                 return;
959         link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
960         bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
961         bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
962 }
963
964 static void
965 bond_ethdev_update_tlb_slave_cb(void *arg)
966 {
967         struct bond_dev_private *internals = arg;
968         struct rte_eth_stats slave_stats;
969         struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
970         uint8_t slave_count;
971         uint64_t tx_bytes;
972
973         uint8_t update_stats = 0;
974         uint8_t i, slave_id;
975
976         internals->slave_update_idx++;
977
978
979         if (internals->slave_update_idx >= REORDER_PERIOD_MS)
980                 update_stats = 1;
981
982         for (i = 0; i < internals->active_slave_count; i++) {
983                 slave_id = internals->active_slaves[i];
984                 rte_eth_stats_get(slave_id, &slave_stats);
985                 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
986                 bandwidth_left(slave_id, tx_bytes,
987                                 internals->slave_update_idx, &bwg_array[i]);
988                 bwg_array[i].slave = slave_id;
989
990                 if (update_stats) {
991                         tlb_last_obytets[slave_id] = slave_stats.obytes;
992                 }
993         }
994
995         if (update_stats == 1)
996                 internals->slave_update_idx = 0;
997
998         slave_count = i;
999         qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
1000         for (i = 0; i < slave_count; i++)
1001                 internals->tlb_slaves_order[i] = bwg_array[i].slave;
1002
1003         rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
1004                         (struct bond_dev_private *)internals);
1005 }
1006
1007 static uint16_t
1008 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1009 {
1010         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1011         struct bond_dev_private *internals = bd_tx_q->dev_private;
1012
1013         struct rte_eth_dev *primary_port =
1014                         &rte_eth_devices[internals->primary_port];
1015         uint16_t num_tx_total = 0;
1016         uint16_t i, j;
1017
1018         uint16_t num_of_slaves = internals->active_slave_count;
1019         uint16_t slaves[RTE_MAX_ETHPORTS];
1020
1021         struct ether_hdr *ether_hdr;
1022         struct ether_addr primary_slave_addr;
1023         struct ether_addr active_slave_addr;
1024
1025         if (num_of_slaves < 1)
1026                 return num_tx_total;
1027
1028         memcpy(slaves, internals->tlb_slaves_order,
1029                                 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
1030
1031
1032         ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
1033
1034         if (nb_pkts > 3) {
1035                 for (i = 0; i < 3; i++)
1036                         rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
1037         }
1038
1039         for (i = 0; i < num_of_slaves; i++) {
1040                 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
1041                 for (j = num_tx_total; j < nb_pkts; j++) {
1042                         if (j + 3 < nb_pkts)
1043                                 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
1044
1045                         ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
1046                         if (is_same_ether_addr(&ether_hdr->s_addr, &primary_slave_addr))
1047                                 ether_addr_copy(&active_slave_addr, &ether_hdr->s_addr);
1048 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1049                                         mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
1050 #endif
1051                 }
1052
1053                 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1054                                 bufs + num_tx_total, nb_pkts - num_tx_total);
1055
1056                 if (num_tx_total == nb_pkts)
1057                         break;
1058         }
1059
1060         return num_tx_total;
1061 }
1062
1063 void
1064 bond_tlb_disable(struct bond_dev_private *internals)
1065 {
1066         rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
1067 }
1068
1069 void
1070 bond_tlb_enable(struct bond_dev_private *internals)
1071 {
1072         bond_ethdev_update_tlb_slave_cb(internals);
1073 }
1074
1075 static uint16_t
1076 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1077 {
1078         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1079         struct bond_dev_private *internals = bd_tx_q->dev_private;
1080
1081         struct ether_hdr *eth_h;
1082         uint16_t ether_type, offset;
1083
1084         struct client_data *client_info;
1085
1086         /*
1087          * We create transmit buffers for every slave and one additional to send
1088          * through tlb. In worst case every packet will be send on one port.
1089          */
1090         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
1091         uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
1092
1093         /*
1094          * We create separate transmit buffers for update packets as they won't
1095          * be counted in num_tx_total.
1096          */
1097         struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1098         uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1099
1100         struct rte_mbuf *upd_pkt;
1101         size_t pkt_size;
1102
1103         uint16_t num_send, num_not_send = 0;
1104         uint16_t num_tx_total = 0;
1105         uint16_t slave_idx;
1106
1107         int i, j;
1108
1109         /* Search tx buffer for ARP packets and forward them to alb */
1110         for (i = 0; i < nb_pkts; i++) {
1111                 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
1112                 ether_type = eth_h->ether_type;
1113                 offset = get_vlan_offset(eth_h, &ether_type);
1114
1115                 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
1116                         slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1117
1118                         /* Change src mac in eth header */
1119                         rte_eth_macaddr_get(slave_idx, &eth_h->s_addr);
1120
1121                         /* Add packet to slave tx buffer */
1122                         slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1123                         slave_bufs_pkts[slave_idx]++;
1124                 } else {
1125                         /* If packet is not ARP, send it with TLB policy */
1126                         slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1127                                         bufs[i];
1128                         slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1129                 }
1130         }
1131
1132         /* Update connected client ARP tables */
1133         if (internals->mode6.ntt) {
1134                 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1135                         client_info = &internals->mode6.client_table[i];
1136
1137                         if (client_info->in_use) {
1138                                 /* Allocate new packet to send ARP update on current slave */
1139                                 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1140                                 if (upd_pkt == NULL) {
1141                                         RTE_LOG(ERR, PMD, "Failed to allocate ARP packet from pool\n");
1142                                         continue;
1143                                 }
1144                                 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
1145                                                 + client_info->vlan_count * sizeof(struct vlan_hdr);
1146                                 upd_pkt->data_len = pkt_size;
1147                                 upd_pkt->pkt_len = pkt_size;
1148
1149                                 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1150                                                 internals);
1151
1152                                 /* Add packet to update tx buffer */
1153                                 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1154                                 update_bufs_pkts[slave_idx]++;
1155                         }
1156                 }
1157                 internals->mode6.ntt = 0;
1158         }
1159
1160         /* Send ARP packets on proper slaves */
1161         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1162                 if (slave_bufs_pkts[i] > 0) {
1163                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1164                                         slave_bufs[i], slave_bufs_pkts[i]);
1165                         for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1166                                 bufs[nb_pkts - 1 - num_not_send - j] =
1167                                                 slave_bufs[i][nb_pkts - 1 - j];
1168                         }
1169
1170                         num_tx_total += num_send;
1171                         num_not_send += slave_bufs_pkts[i] - num_send;
1172
1173 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1174         /* Print TX stats including update packets */
1175                         for (j = 0; j < slave_bufs_pkts[i]; j++) {
1176                                 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
1177                                 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1178                         }
1179 #endif
1180                 }
1181         }
1182
1183         /* Send update packets on proper slaves */
1184         for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1185                 if (update_bufs_pkts[i] > 0) {
1186                         num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1187                                         update_bufs_pkts[i]);
1188                         for (j = num_send; j < update_bufs_pkts[i]; j++) {
1189                                 rte_pktmbuf_free(update_bufs[i][j]);
1190                         }
1191 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1192                         for (j = 0; j < update_bufs_pkts[i]; j++) {
1193                                 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
1194                                 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1195                         }
1196 #endif
1197                 }
1198         }
1199
1200         /* Send non-ARP packets using tlb policy */
1201         if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1202                 num_send = bond_ethdev_tx_burst_tlb(queue,
1203                                 slave_bufs[RTE_MAX_ETHPORTS],
1204                                 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1205
1206                 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1207                         bufs[nb_pkts - 1 - num_not_send - j] =
1208                                         slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1209                 }
1210
1211                 num_tx_total += num_send;
1212         }
1213
1214         return num_tx_total;
1215 }
1216
1217 static uint16_t
1218 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1219                 uint16_t nb_bufs)
1220 {
1221         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1222         struct bond_dev_private *internals = bd_tx_q->dev_private;
1223
1224         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1225         uint16_t slave_count;
1226
1227         /* Array to sort mbufs for transmission on each slave into */
1228         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1229         /* Number of mbufs for transmission on each slave */
1230         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1231         /* Mapping array generated by hash function to map mbufs to slaves */
1232         uint16_t bufs_slave_port_idxs[nb_bufs];
1233
1234         uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
1235         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1236
1237         uint16_t i, j;
1238
1239         if (unlikely(nb_bufs == 0))
1240                 return 0;
1241
1242         /* Copy slave list to protect against slave up/down changes during tx
1243          * bursting */
1244         slave_count = internals->active_slave_count;
1245         if (unlikely(slave_count < 1))
1246                 return 0;
1247
1248         memcpy(slave_port_ids, internals->active_slaves,
1249                         sizeof(slave_port_ids[0]) * slave_count);
1250
1251         /*
1252          * Populate slaves mbuf with the packets which are to be sent on it
1253          * selecting output slave using hash based on xmit policy
1254          */
1255         internals->burst_xmit_hash(bufs, nb_bufs, slave_count,
1256                         bufs_slave_port_idxs);
1257
1258         for (i = 0; i < nb_bufs; i++) {
1259                 /* Populate slave mbuf arrays with mbufs for that slave. */
1260                 uint8_t slave_idx = bufs_slave_port_idxs[i];
1261
1262                 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
1263         }
1264
1265         /* Send packet burst on each slave device */
1266         for (i = 0; i < slave_count; i++) {
1267                 if (slave_nb_bufs[i] == 0)
1268                         continue;
1269
1270                 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1271                                 bd_tx_q->queue_id, slave_bufs[i],
1272                                 slave_nb_bufs[i]);
1273
1274                 total_tx_count += slave_tx_count;
1275
1276                 /* If tx burst fails move packets to end of bufs */
1277                 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1278                         slave_tx_fail_count[i] = slave_nb_bufs[i] -
1279                                         slave_tx_count;
1280                         total_tx_fail_count += slave_tx_fail_count[i];
1281
1282                         /*
1283                          * Shift bufs to beginning of array to allow reordering
1284                          * later
1285                          */
1286                         for (j = 0; j < slave_tx_fail_count[i]; j++) {
1287                                 slave_bufs[i][j] =
1288                                         slave_bufs[i][(slave_tx_count - 1) + j];
1289                         }
1290                 }
1291         }
1292
1293         /*
1294          * If there are tx burst failures we move packets to end of bufs to
1295          * preserve expected PMD behaviour of all failed transmitted being
1296          * at the end of the input mbuf array
1297          */
1298         if (unlikely(total_tx_fail_count > 0)) {
1299                 int bufs_idx = nb_bufs - total_tx_fail_count - 1;
1300
1301                 for (i = 0; i < slave_count; i++) {
1302                         if (slave_tx_fail_count[i] > 0) {
1303                                 for (j = 0; j < slave_tx_fail_count[i]; j++)
1304                                         bufs[bufs_idx++] = slave_bufs[i][j];
1305                         }
1306                 }
1307         }
1308
1309         return total_tx_count;
1310 }
1311
1312 static uint16_t
1313 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1314                 uint16_t nb_bufs)
1315 {
1316         struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1317         struct bond_dev_private *internals = bd_tx_q->dev_private;
1318
1319         uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1320         uint16_t slave_count;
1321
1322         uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
1323         uint16_t dist_slave_count;
1324
1325         /* 2-D array to sort mbufs for transmission on each slave into */
1326         struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1327         /* Number of mbufs for transmission on each slave */
1328         uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1329         /* Mapping array generated by hash function to map mbufs to slaves */
1330         uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
1331
1332         uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
1333         uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1334
1335         uint16_t i, j;
1336
1337         if (unlikely(nb_bufs == 0))
1338                 return 0;
1339
1340         /* Copy slave list to protect against slave up/down changes during tx
1341          * bursting */
1342         slave_count = internals->active_slave_count;
1343         if (unlikely(slave_count < 1))
1344                 return 0;
1345
1346         memcpy(slave_port_ids, internals->active_slaves,
1347                         sizeof(slave_port_ids[0]) * slave_count);
1348
1349         dist_slave_count = 0;
1350         for (i = 0; i < slave_count; i++) {
1351                 struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
1352
1353                 if (ACTOR_STATE(port, DISTRIBUTING))
1354                         dist_slave_port_ids[dist_slave_count++] =
1355                                         slave_port_ids[i];
1356         }
1357
1358         if (likely(dist_slave_count > 1)) {
1359
1360                 /*
1361                  * Populate slaves mbuf with the packets which are to be sent
1362                  * on it, selecting output slave using hash based on xmit policy
1363                  */
1364                 internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
1365                                 bufs_slave_port_idxs);
1366
1367                 for (i = 0; i < nb_bufs; i++) {
1368                         /*
1369                          * Populate slave mbuf arrays with mbufs for that
1370                          * slave
1371                          */
1372                         uint8_t slave_idx = bufs_slave_port_idxs[i];
1373
1374                         slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] =
1375                                         bufs[i];
1376                 }
1377
1378
1379                 /* Send packet burst on each slave device */
1380                 for (i = 0; i < dist_slave_count; i++) {
1381                         if (slave_nb_bufs[i] == 0)
1382                                 continue;
1383
1384                         slave_tx_count = rte_eth_tx_burst(
1385                                         dist_slave_port_ids[i],
1386                                         bd_tx_q->queue_id, slave_bufs[i],
1387                                         slave_nb_bufs[i]);
1388
1389                         total_tx_count += slave_tx_count;
1390
1391                         /* If tx burst fails move packets to end of bufs */
1392                         if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1393                                 slave_tx_fail_count[i] = slave_nb_bufs[i] -
1394                                                 slave_tx_count;
1395                                 total_tx_fail_count += slave_tx_fail_count[i];
1396
1397                                 /*
1398                                  * Shift bufs to beginning of array to allow
1399                                  * reordering later
1400                                  */
1401                                 for (j = 0; j < slave_tx_fail_count[i]; j++)
1402                                         slave_bufs[i][j] =
1403                                                 slave_bufs[i]
1404                                                         [(slave_tx_count - 1)
1405                                                         + j];
1406                         }
1407                 }
1408
1409                 /*
1410                  * If there are tx burst failures we move packets to end of
1411                  * bufs to preserve expected PMD behaviour of all failed
1412                  * transmitted being at the end of the input mbuf array
1413                  */
1414                 if (unlikely(total_tx_fail_count > 0)) {
1415                         int bufs_idx = nb_bufs - total_tx_fail_count - 1;
1416
1417                         for (i = 0; i < slave_count; i++) {
1418                                 if (slave_tx_fail_count[i] > 0) {
1419                                         for (j = 0;
1420                                                 j < slave_tx_fail_count[i];
1421                                                 j++) {
1422                                                 bufs[bufs_idx++] =
1423                                                         slave_bufs[i][j];
1424                                         }
1425                                 }
1426                         }
1427                 }
1428         }
1429
1430         /* Check for LACP control packets and send if available */
1431         for (i = 0; i < slave_count; i++) {
1432                 struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
1433                 struct rte_mbuf *ctrl_pkt = NULL;
1434
1435                 if (likely(rte_ring_empty(port->tx_ring)))
1436                         continue;
1437
1438                 if (rte_ring_dequeue(port->tx_ring,
1439                                      (void **)&ctrl_pkt) != -ENOENT) {
1440                         slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1441                                         bd_tx_q->queue_id, &ctrl_pkt, 1);
1442                         /*
1443                          * re-enqueue LAG control plane packets to buffering
1444                          * ring if transmission fails so the packet isn't lost.
1445                          */
1446                         if (slave_tx_count != 1)
1447                                 rte_ring_enqueue(port->tx_ring, ctrl_pkt);
1448                 }
1449         }
1450
1451         return total_tx_count;
1452 }
1453
1454 static uint16_t
1455 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1456                 uint16_t nb_pkts)
1457 {
1458         struct bond_dev_private *internals;
1459         struct bond_tx_queue *bd_tx_q;
1460
1461         uint8_t tx_failed_flag = 0, num_of_slaves;
1462         uint16_t slaves[RTE_MAX_ETHPORTS];
1463
1464         uint16_t max_nb_of_tx_pkts = 0;
1465
1466         int slave_tx_total[RTE_MAX_ETHPORTS];
1467         int i, most_successful_tx_slave = -1;
1468
1469         bd_tx_q = (struct bond_tx_queue *)queue;
1470         internals = bd_tx_q->dev_private;
1471
1472         /* Copy slave list to protect against slave up/down changes during tx
1473          * bursting */
1474         num_of_slaves = internals->active_slave_count;
1475         memcpy(slaves, internals->active_slaves,
1476                         sizeof(internals->active_slaves[0]) * num_of_slaves);
1477
1478         if (num_of_slaves < 1)
1479                 return 0;
1480
1481         /* Increment reference count on mbufs */
1482         for (i = 0; i < nb_pkts; i++)
1483                 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1484
1485         /* Transmit burst on each active slave */
1486         for (i = 0; i < num_of_slaves; i++) {
1487                 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1488                                         bufs, nb_pkts);
1489
1490                 if (unlikely(slave_tx_total[i] < nb_pkts))
1491                         tx_failed_flag = 1;
1492
1493                 /* record the value and slave index for the slave which transmits the
1494                  * maximum number of packets */
1495                 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1496                         max_nb_of_tx_pkts = slave_tx_total[i];
1497                         most_successful_tx_slave = i;
1498                 }
1499         }
1500
1501         /* if slaves fail to transmit packets from burst, the calling application
1502          * is not expected to know about multiple references to packets so we must
1503          * handle failures of all packets except those of the most successful slave
1504          */
1505         if (unlikely(tx_failed_flag))
1506                 for (i = 0; i < num_of_slaves; i++)
1507                         if (i != most_successful_tx_slave)
1508                                 while (slave_tx_total[i] < nb_pkts)
1509                                         rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1510
1511         return max_nb_of_tx_pkts;
1512 }
1513
1514 void
1515 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1516 {
1517         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1518
1519         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1520                 /**
1521                  * If in mode 4 then save the link properties of the first
1522                  * slave, all subsequent slaves must match these properties
1523                  */
1524                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1525
1526                 bond_link->link_autoneg = slave_link->link_autoneg;
1527                 bond_link->link_duplex = slave_link->link_duplex;
1528                 bond_link->link_speed = slave_link->link_speed;
1529         } else {
1530                 /**
1531                  * In any other mode the link properties are set to default
1532                  * values of AUTONEG/DUPLEX
1533                  */
1534                 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1535                 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1536         }
1537 }
1538
1539 int
1540 link_properties_valid(struct rte_eth_dev *ethdev,
1541                 struct rte_eth_link *slave_link)
1542 {
1543         struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1544
1545         if (bond_ctx->mode == BONDING_MODE_8023AD) {
1546                 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1547
1548                 if (bond_link->link_duplex != slave_link->link_duplex ||
1549                         bond_link->link_autoneg != slave_link->link_autoneg ||
1550                         bond_link->link_speed != slave_link->link_speed)
1551                         return -1;
1552         }
1553
1554         return 0;
1555 }
1556
1557 int
1558 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1559 {
1560         struct ether_addr *mac_addr;
1561
1562         if (eth_dev == NULL) {
1563                 RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__);
1564                 return -1;
1565         }
1566
1567         if (dst_mac_addr == NULL) {
1568                 RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__);
1569                 return -1;
1570         }
1571
1572         mac_addr = eth_dev->data->mac_addrs;
1573
1574         ether_addr_copy(mac_addr, dst_mac_addr);
1575         return 0;
1576 }
1577
1578 int
1579 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1580 {
1581         struct ether_addr *mac_addr;
1582
1583         if (eth_dev == NULL) {
1584                 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1585                 return -1;
1586         }
1587
1588         if (new_mac_addr == NULL) {
1589                 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1590                 return -1;
1591         }
1592
1593         mac_addr = eth_dev->data->mac_addrs;
1594
1595         /* If new MAC is different to current MAC then update */
1596         if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1597                 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1598
1599         return 0;
1600 }
1601
1602 int
1603 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1604 {
1605         struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1606         int i;
1607
1608         /* Update slave devices MAC addresses */
1609         if (internals->slave_count < 1)
1610                 return -1;
1611
1612         switch (internals->mode) {
1613         case BONDING_MODE_ROUND_ROBIN:
1614         case BONDING_MODE_BALANCE:
1615         case BONDING_MODE_BROADCAST:
1616                 for (i = 0; i < internals->slave_count; i++) {
1617                         if (rte_eth_dev_default_mac_addr_set(
1618                                         internals->slaves[i].port_id,
1619                                         bonded_eth_dev->data->mac_addrs)) {
1620                                 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1621                                                 internals->slaves[i].port_id);
1622                                 return -1;
1623                         }
1624                 }
1625                 break;
1626         case BONDING_MODE_8023AD:
1627                 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1628                 break;
1629         case BONDING_MODE_ACTIVE_BACKUP:
1630         case BONDING_MODE_TLB:
1631         case BONDING_MODE_ALB:
1632         default:
1633                 for (i = 0; i < internals->slave_count; i++) {
1634                         if (internals->slaves[i].port_id ==
1635                                         internals->current_primary_port) {
1636                                 if (rte_eth_dev_default_mac_addr_set(
1637                                                 internals->primary_port,
1638                                                 bonded_eth_dev->data->mac_addrs)) {
1639                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1640                                                         internals->current_primary_port);
1641                                         return -1;
1642                                 }
1643                         } else {
1644                                 if (rte_eth_dev_default_mac_addr_set(
1645                                                 internals->slaves[i].port_id,
1646                                                 &internals->slaves[i].persisted_mac_addr)) {
1647                                         RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1648                                                         internals->slaves[i].port_id);
1649                                         return -1;
1650                                 }
1651                         }
1652                 }
1653         }
1654
1655         return 0;
1656 }
1657
1658 int
1659 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1660 {
1661         struct bond_dev_private *internals;
1662
1663         internals = eth_dev->data->dev_private;
1664
1665         switch (mode) {
1666         case BONDING_MODE_ROUND_ROBIN:
1667                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1668                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1669                 break;
1670         case BONDING_MODE_ACTIVE_BACKUP:
1671                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1672                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1673                 break;
1674         case BONDING_MODE_BALANCE:
1675                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1676                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1677                 break;
1678         case BONDING_MODE_BROADCAST:
1679                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1680                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1681                 break;
1682         case BONDING_MODE_8023AD:
1683                 if (bond_mode_8023ad_enable(eth_dev) != 0)
1684                         return -1;
1685
1686                 if (internals->mode4.dedicated_queues.enabled == 0) {
1687                         eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1688                         eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1689                         RTE_LOG(WARNING, PMD,
1690                                 "Using mode 4, it is necessary to do TX burst "
1691                                 "and RX burst at least every 100ms.\n");
1692                 } else {
1693                         /* Use flow director's optimization */
1694                         eth_dev->rx_pkt_burst =
1695                                         bond_ethdev_rx_burst_8023ad_fast_queue;
1696                         eth_dev->tx_pkt_burst =
1697                                         bond_ethdev_tx_burst_8023ad_fast_queue;
1698                 }
1699                 break;
1700         case BONDING_MODE_TLB:
1701                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1702                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1703                 break;
1704         case BONDING_MODE_ALB:
1705                 if (bond_mode_alb_enable(eth_dev) != 0)
1706                         return -1;
1707
1708                 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1709                 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1710                 break;
1711         default:
1712                 return -1;
1713         }
1714
1715         internals->mode = mode;
1716
1717         return 0;
1718 }
1719
1720
1721 static int
1722 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1723                 struct rte_eth_dev *slave_eth_dev)
1724 {
1725         int errval = 0;
1726         struct bond_dev_private *internals = (struct bond_dev_private *)
1727                 bonded_eth_dev->data->dev_private;
1728         struct port *port = &mode_8023ad_ports[slave_eth_dev->data->port_id];
1729
1730         if (port->slow_pool == NULL) {
1731                 char mem_name[256];
1732                 int slave_id = slave_eth_dev->data->port_id;
1733
1734                 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1735                                 slave_id);
1736                 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1737                         250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1738                         slave_eth_dev->data->numa_node);
1739
1740                 /* Any memory allocation failure in initialization is critical because
1741                  * resources can't be free, so reinitialization is impossible. */
1742                 if (port->slow_pool == NULL) {
1743                         rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1744                                 slave_id, mem_name, rte_strerror(rte_errno));
1745                 }
1746         }
1747
1748         if (internals->mode4.dedicated_queues.enabled == 1) {
1749                 /* Configure slow Rx queue */
1750
1751                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1752                                 internals->mode4.dedicated_queues.rx_qid, 128,
1753                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1754                                 NULL, port->slow_pool);
1755                 if (errval != 0) {
1756                         RTE_BOND_LOG(ERR,
1757                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1758                                         slave_eth_dev->data->port_id,
1759                                         internals->mode4.dedicated_queues.rx_qid,
1760                                         errval);
1761                         return errval;
1762                 }
1763
1764                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1765                                 internals->mode4.dedicated_queues.tx_qid, 512,
1766                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1767                                 NULL);
1768                 if (errval != 0) {
1769                         RTE_BOND_LOG(ERR,
1770                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1771                                 slave_eth_dev->data->port_id,
1772                                 internals->mode4.dedicated_queues.tx_qid,
1773                                 errval);
1774                         return errval;
1775                 }
1776         }
1777         return 0;
1778 }
1779
1780 int
1781 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1782                 struct rte_eth_dev *slave_eth_dev)
1783 {
1784         struct bond_rx_queue *bd_rx_q;
1785         struct bond_tx_queue *bd_tx_q;
1786         uint16_t nb_rx_queues;
1787         uint16_t nb_tx_queues;
1788
1789         int errval;
1790         uint16_t q_id;
1791         struct rte_flow_error flow_error;
1792
1793         struct bond_dev_private *internals = (struct bond_dev_private *)
1794                 bonded_eth_dev->data->dev_private;
1795
1796         /* Stop slave */
1797         rte_eth_dev_stop(slave_eth_dev->data->port_id);
1798
1799         /* Enable interrupts on slave device if supported */
1800         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1801                 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1802
1803         /* If RSS is enabled for bonding, try to enable it for slaves  */
1804         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1805                 if (bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len
1806                                 != 0) {
1807                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1808                                         bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
1809                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1810                                         bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
1811                 } else {
1812                         slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1813                 }
1814
1815                 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1816                                 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1817                 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1818                                 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1819         }
1820
1821         slave_eth_dev->data->dev_conf.rxmode.hw_vlan_filter =
1822                         bonded_eth_dev->data->dev_conf.rxmode.hw_vlan_filter;
1823
1824         nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1825         nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1826
1827         if (internals->mode == BONDING_MODE_8023AD) {
1828                 if (internals->mode4.dedicated_queues.enabled == 1) {
1829                         nb_rx_queues++;
1830                         nb_tx_queues++;
1831                 }
1832         }
1833
1834         /* Configure device */
1835         errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1836                         nb_rx_queues, nb_tx_queues,
1837                         &(slave_eth_dev->data->dev_conf));
1838         if (errval != 0) {
1839                 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u , err (%d)",
1840                                 slave_eth_dev->data->port_id, errval);
1841                 return errval;
1842         }
1843
1844         /* Setup Rx Queues */
1845         for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1846                 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1847
1848                 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1849                                 bd_rx_q->nb_rx_desc,
1850                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1851                                 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1852                 if (errval != 0) {
1853                         RTE_BOND_LOG(ERR,
1854                                         "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1855                                         slave_eth_dev->data->port_id, q_id, errval);
1856                         return errval;
1857                 }
1858         }
1859
1860         /* Setup Tx Queues */
1861         for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1862                 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1863
1864                 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1865                                 bd_tx_q->nb_tx_desc,
1866                                 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1867                                 &bd_tx_q->tx_conf);
1868                 if (errval != 0) {
1869                         RTE_BOND_LOG(ERR,
1870                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1871                                 slave_eth_dev->data->port_id, q_id, errval);
1872                         return errval;
1873                 }
1874         }
1875
1876         if (internals->mode == BONDING_MODE_8023AD &&
1877                         internals->mode4.dedicated_queues.enabled == 1) {
1878                 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1879                                 != 0)
1880                         return errval;
1881
1882                 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1883                                 slave_eth_dev->data->port_id) != 0) {
1884                         RTE_BOND_LOG(ERR,
1885                                 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1886                                 slave_eth_dev->data->port_id, q_id, errval);
1887                         return -1;
1888                 }
1889
1890                 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1891                         rte_flow_destroy(slave_eth_dev->data->port_id,
1892                                         internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1893                                         &flow_error);
1894
1895                 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1896                                 slave_eth_dev->data->port_id);
1897         }
1898
1899         /* Start device */
1900         errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1901         if (errval != 0) {
1902                 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1903                                 slave_eth_dev->data->port_id, errval);
1904                 return -1;
1905         }
1906
1907         /* If RSS is enabled for bonding, synchronize RETA */
1908         if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1909                 int i;
1910                 struct bond_dev_private *internals;
1911
1912                 internals = bonded_eth_dev->data->dev_private;
1913
1914                 for (i = 0; i < internals->slave_count; i++) {
1915                         if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1916                                 errval = rte_eth_dev_rss_reta_update(
1917                                                 slave_eth_dev->data->port_id,
1918                                                 &internals->reta_conf[0],
1919                                                 internals->slaves[i].reta_size);
1920                                 if (errval != 0) {
1921                                         RTE_LOG(WARNING, PMD,
1922                                                         "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1923                                                         " RSS Configuration for bonding may be inconsistent.\n",
1924                                                         slave_eth_dev->data->port_id, errval);
1925                                 }
1926                                 break;
1927                         }
1928                 }
1929         }
1930
1931         /* If lsc interrupt is set, check initial slave's link status */
1932         if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1933                 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1934                 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1935                         RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1936                         NULL);
1937         }
1938
1939         return 0;
1940 }
1941
1942 void
1943 slave_remove(struct bond_dev_private *internals,
1944                 struct rte_eth_dev *slave_eth_dev)
1945 {
1946         uint8_t i;
1947
1948         for (i = 0; i < internals->slave_count; i++)
1949                 if (internals->slaves[i].port_id ==
1950                                 slave_eth_dev->data->port_id)
1951                         break;
1952
1953         if (i < (internals->slave_count - 1))
1954                 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1955                                 sizeof(internals->slaves[0]) *
1956                                 (internals->slave_count - i - 1));
1957
1958         internals->slave_count--;
1959
1960         /* force reconfiguration of slave interfaces */
1961         _rte_eth_dev_reset(slave_eth_dev);
1962 }
1963
1964 static void
1965 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1966
1967 void
1968 slave_add(struct bond_dev_private *internals,
1969                 struct rte_eth_dev *slave_eth_dev)
1970 {
1971         struct bond_slave_details *slave_details =
1972                         &internals->slaves[internals->slave_count];
1973
1974         slave_details->port_id = slave_eth_dev->data->port_id;
1975         slave_details->last_link_status = 0;
1976
1977         /* Mark slave devices that don't support interrupts so we can
1978          * compensate when we start the bond
1979          */
1980         if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1981                 slave_details->link_status_poll_enabled = 1;
1982         }
1983
1984         slave_details->link_status_wait_to_complete = 0;
1985         /* clean tlb_last_obytes when adding port for bonding device */
1986         memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1987                         sizeof(struct ether_addr));
1988 }
1989
1990 void
1991 bond_ethdev_primary_set(struct bond_dev_private *internals,
1992                 uint16_t slave_port_id)
1993 {
1994         int i;
1995
1996         if (internals->active_slave_count < 1)
1997                 internals->current_primary_port = slave_port_id;
1998         else
1999                 /* Search bonded device slave ports for new proposed primary port */
2000                 for (i = 0; i < internals->active_slave_count; i++) {
2001                         if (internals->active_slaves[i] == slave_port_id)
2002                                 internals->current_primary_port = slave_port_id;
2003                 }
2004 }
2005
2006 static void
2007 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
2008
2009 static int
2010 bond_ethdev_start(struct rte_eth_dev *eth_dev)
2011 {
2012         struct bond_dev_private *internals;
2013         int i;
2014
2015         /* slave eth dev will be started by bonded device */
2016         if (check_for_bonded_ethdev(eth_dev)) {
2017                 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
2018                                 eth_dev->data->port_id);
2019                 return -1;
2020         }
2021
2022         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2023         eth_dev->data->dev_started = 1;
2024
2025         internals = eth_dev->data->dev_private;
2026
2027         if (internals->slave_count == 0) {
2028                 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
2029                 return -1;
2030         }
2031
2032         if (internals->user_defined_mac == 0) {
2033                 struct ether_addr *new_mac_addr = NULL;
2034
2035                 for (i = 0; i < internals->slave_count; i++)
2036                         if (internals->slaves[i].port_id == internals->primary_port)
2037                                 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
2038
2039                 if (new_mac_addr == NULL)
2040                         return -1;
2041
2042                 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
2043                         RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
2044                                         eth_dev->data->port_id);
2045                         return -1;
2046                 }
2047         }
2048
2049         /* Update all slave devices MACs*/
2050         if (mac_address_slaves_update(eth_dev) != 0)
2051                 return -1;
2052
2053         /* If bonded device is configure in promiscuous mode then re-apply config */
2054         if (internals->promiscuous_en)
2055                 bond_ethdev_promiscuous_enable(eth_dev);
2056
2057         if (internals->mode == BONDING_MODE_8023AD) {
2058                 if (internals->mode4.dedicated_queues.enabled == 1) {
2059                         internals->mode4.dedicated_queues.rx_qid =
2060                                         eth_dev->data->nb_rx_queues;
2061                         internals->mode4.dedicated_queues.tx_qid =
2062                                         eth_dev->data->nb_tx_queues;
2063                 }
2064         }
2065
2066
2067         /* Reconfigure each slave device if starting bonded device */
2068         for (i = 0; i < internals->slave_count; i++) {
2069                 struct rte_eth_dev *slave_ethdev =
2070                                 &(rte_eth_devices[internals->slaves[i].port_id]);
2071                 if (slave_configure(eth_dev, slave_ethdev) != 0) {
2072                         RTE_BOND_LOG(ERR,
2073                                 "bonded port (%d) failed to reconfigure slave device (%d)",
2074                                 eth_dev->data->port_id,
2075                                 internals->slaves[i].port_id);
2076                         return -1;
2077                 }
2078                 /* We will need to poll for link status if any slave doesn't
2079                  * support interrupts
2080                  */
2081                 if (internals->slaves[i].link_status_poll_enabled)
2082                         internals->link_status_polling_enabled = 1;
2083         }
2084         /* start polling if needed */
2085         if (internals->link_status_polling_enabled) {
2086                 rte_eal_alarm_set(
2087                         internals->link_status_polling_interval_ms * 1000,
2088                         bond_ethdev_slave_link_status_change_monitor,
2089                         (void *)&rte_eth_devices[internals->port_id]);
2090         }
2091
2092         if (internals->user_defined_primary_port)
2093                 bond_ethdev_primary_set(internals, internals->primary_port);
2094
2095         if (internals->mode == BONDING_MODE_8023AD)
2096                 bond_mode_8023ad_start(eth_dev);
2097
2098         if (internals->mode == BONDING_MODE_TLB ||
2099                         internals->mode == BONDING_MODE_ALB)
2100                 bond_tlb_enable(internals);
2101
2102         return 0;
2103 }
2104
2105 static void
2106 bond_ethdev_free_queues(struct rte_eth_dev *dev)
2107 {
2108         uint8_t i;
2109
2110         if (dev->data->rx_queues != NULL) {
2111                 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2112                         rte_free(dev->data->rx_queues[i]);
2113                         dev->data->rx_queues[i] = NULL;
2114                 }
2115                 dev->data->nb_rx_queues = 0;
2116         }
2117
2118         if (dev->data->tx_queues != NULL) {
2119                 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2120                         rte_free(dev->data->tx_queues[i]);
2121                         dev->data->tx_queues[i] = NULL;
2122                 }
2123                 dev->data->nb_tx_queues = 0;
2124         }
2125 }
2126
2127 void
2128 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2129 {
2130         struct bond_dev_private *internals = eth_dev->data->dev_private;
2131         uint8_t i;
2132
2133         if (internals->mode == BONDING_MODE_8023AD) {
2134                 struct port *port;
2135                 void *pkt = NULL;
2136
2137                 bond_mode_8023ad_stop(eth_dev);
2138
2139                 /* Discard all messages to/from mode 4 state machines */
2140                 for (i = 0; i < internals->active_slave_count; i++) {
2141                         port = &mode_8023ad_ports[internals->active_slaves[i]];
2142
2143                         RTE_ASSERT(port->rx_ring != NULL);
2144                         while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2145                                 rte_pktmbuf_free(pkt);
2146
2147                         RTE_ASSERT(port->tx_ring != NULL);
2148                         while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2149                                 rte_pktmbuf_free(pkt);
2150                 }
2151         }
2152
2153         if (internals->mode == BONDING_MODE_TLB ||
2154                         internals->mode == BONDING_MODE_ALB) {
2155                 bond_tlb_disable(internals);
2156                 for (i = 0; i < internals->active_slave_count; i++)
2157                         tlb_last_obytets[internals->active_slaves[i]] = 0;
2158         }
2159
2160         internals->active_slave_count = 0;
2161         internals->link_status_polling_enabled = 0;
2162         for (i = 0; i < internals->slave_count; i++)
2163                 internals->slaves[i].last_link_status = 0;
2164
2165         eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2166         eth_dev->data->dev_started = 0;
2167 }
2168
2169 void
2170 bond_ethdev_close(struct rte_eth_dev *dev)
2171 {
2172         struct bond_dev_private *internals = dev->data->dev_private;
2173         uint8_t bond_port_id = internals->port_id;
2174         int skipped = 0;
2175
2176         RTE_LOG(INFO, EAL, "Closing bonded device %s\n", dev->device->name);
2177         while (internals->slave_count != skipped) {
2178                 uint16_t port_id = internals->slaves[skipped].port_id;
2179
2180                 rte_eth_dev_stop(port_id);
2181
2182                 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2183                         RTE_LOG(ERR, EAL,
2184                                 "Failed to remove port %d from bonded device "
2185                                 "%s\n", port_id, dev->device->name);
2186                         skipped++;
2187                 }
2188         }
2189         bond_ethdev_free_queues(dev);
2190         rte_bitmap_reset(internals->vlan_filter_bmp);
2191 }
2192
2193 /* forward declaration */
2194 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2195
2196 static void
2197 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2198 {
2199         struct bond_dev_private *internals = dev->data->dev_private;
2200
2201         uint16_t max_nb_rx_queues = UINT16_MAX;
2202         uint16_t max_nb_tx_queues = UINT16_MAX;
2203
2204         dev_info->max_mac_addrs = 1;
2205
2206         dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2207                         internals->candidate_max_rx_pktlen :
2208                         ETHER_MAX_JUMBO_FRAME_LEN;
2209
2210         /* Max number of tx/rx queues that the bonded device can support is the
2211          * minimum values of the bonded slaves, as all slaves must be capable
2212          * of supporting the same number of tx/rx queues.
2213          */
2214         if (internals->slave_count > 0) {
2215                 struct rte_eth_dev_info slave_info;
2216                 uint8_t idx;
2217
2218                 for (idx = 0; idx < internals->slave_count; idx++) {
2219                         rte_eth_dev_info_get(internals->slaves[idx].port_id,
2220                                         &slave_info);
2221
2222                         if (slave_info.max_rx_queues < max_nb_rx_queues)
2223                                 max_nb_rx_queues = slave_info.max_rx_queues;
2224
2225                         if (slave_info.max_tx_queues < max_nb_tx_queues)
2226                                 max_nb_tx_queues = slave_info.max_tx_queues;
2227                 }
2228         }
2229
2230         dev_info->max_rx_queues = max_nb_rx_queues;
2231         dev_info->max_tx_queues = max_nb_tx_queues;
2232
2233         /**
2234          * If dedicated hw queues enabled for link bonding device in LACP mode
2235          * then we need to reduce the maximum number of data path queues by 1.
2236          */
2237         if (internals->mode == BONDING_MODE_8023AD &&
2238                 internals->mode4.dedicated_queues.enabled == 1) {
2239                 dev_info->max_rx_queues--;
2240                 dev_info->max_tx_queues--;
2241         }
2242
2243         dev_info->min_rx_bufsize = 0;
2244
2245         dev_info->rx_offload_capa = internals->rx_offload_capa;
2246         dev_info->tx_offload_capa = internals->tx_offload_capa;
2247         dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2248
2249         dev_info->reta_size = internals->reta_size;
2250 }
2251
2252 static int
2253 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2254 {
2255         int res;
2256         uint16_t i;
2257         struct bond_dev_private *internals = dev->data->dev_private;
2258
2259         /* don't do this while a slave is being added */
2260         rte_spinlock_lock(&internals->lock);
2261
2262         if (on)
2263                 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2264         else
2265                 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2266
2267         for (i = 0; i < internals->slave_count; i++) {
2268                 uint16_t port_id = internals->slaves[i].port_id;
2269
2270                 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2271                 if (res == ENOTSUP)
2272                         RTE_LOG(WARNING, PMD,
2273                                 "Setting VLAN filter on slave port %u not supported.\n",
2274                                 port_id);
2275         }
2276
2277         rte_spinlock_unlock(&internals->lock);
2278         return 0;
2279 }
2280
2281 static int
2282 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2283                 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2284                 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2285 {
2286         struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2287                         rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2288                                         0, dev->data->numa_node);
2289         if (bd_rx_q == NULL)
2290                 return -1;
2291
2292         bd_rx_q->queue_id = rx_queue_id;
2293         bd_rx_q->dev_private = dev->data->dev_private;
2294
2295         bd_rx_q->nb_rx_desc = nb_rx_desc;
2296
2297         memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2298         bd_rx_q->mb_pool = mb_pool;
2299
2300         dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2301
2302         return 0;
2303 }
2304
2305 static int
2306 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2307                 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2308                 const struct rte_eth_txconf *tx_conf)
2309 {
2310         struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
2311                         rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2312                                         0, dev->data->numa_node);
2313
2314         if (bd_tx_q == NULL)
2315                 return -1;
2316
2317         bd_tx_q->queue_id = tx_queue_id;
2318         bd_tx_q->dev_private = dev->data->dev_private;
2319
2320         bd_tx_q->nb_tx_desc = nb_tx_desc;
2321         memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2322
2323         dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2324
2325         return 0;
2326 }
2327
2328 static void
2329 bond_ethdev_rx_queue_release(void *queue)
2330 {
2331         if (queue == NULL)
2332                 return;
2333
2334         rte_free(queue);
2335 }
2336
2337 static void
2338 bond_ethdev_tx_queue_release(void *queue)
2339 {
2340         if (queue == NULL)
2341                 return;
2342
2343         rte_free(queue);
2344 }
2345
2346 static void
2347 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2348 {
2349         struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2350         struct bond_dev_private *internals;
2351
2352         /* Default value for polling slave found is true as we don't want to
2353          * disable the polling thread if we cannot get the lock */
2354         int i, polling_slave_found = 1;
2355
2356         if (cb_arg == NULL)
2357                 return;
2358
2359         bonded_ethdev = (struct rte_eth_dev *)cb_arg;
2360         internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
2361
2362         if (!bonded_ethdev->data->dev_started ||
2363                 !internals->link_status_polling_enabled)
2364                 return;
2365
2366         /* If device is currently being configured then don't check slaves link
2367          * status, wait until next period */
2368         if (rte_spinlock_trylock(&internals->lock)) {
2369                 if (internals->slave_count > 0)
2370                         polling_slave_found = 0;
2371
2372                 for (i = 0; i < internals->slave_count; i++) {
2373                         if (!internals->slaves[i].link_status_poll_enabled)
2374                                 continue;
2375
2376                         slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2377                         polling_slave_found = 1;
2378
2379                         /* Update slave link status */
2380                         (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2381                                         internals->slaves[i].link_status_wait_to_complete);
2382
2383                         /* if link status has changed since last checked then call lsc
2384                          * event callback */
2385                         if (slave_ethdev->data->dev_link.link_status !=
2386                                         internals->slaves[i].last_link_status) {
2387                                 internals->slaves[i].last_link_status =
2388                                                 slave_ethdev->data->dev_link.link_status;
2389
2390                                 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2391                                                 RTE_ETH_EVENT_INTR_LSC,
2392                                                 &bonded_ethdev->data->port_id,
2393                                                 NULL);
2394                         }
2395                 }
2396                 rte_spinlock_unlock(&internals->lock);
2397         }
2398
2399         if (polling_slave_found)
2400                 /* Set alarm to continue monitoring link status of slave ethdev's */
2401                 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2402                                 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2403 }
2404
2405 static int
2406 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2407 {
2408         void (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2409
2410         struct bond_dev_private *bond_ctx;
2411         struct rte_eth_link slave_link;
2412
2413         uint32_t idx;
2414
2415         bond_ctx = ethdev->data->dev_private;
2416
2417         ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2418
2419         if (ethdev->data->dev_started == 0 ||
2420                         bond_ctx->active_slave_count == 0) {
2421                 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2422                 return 0;
2423         }
2424
2425         ethdev->data->dev_link.link_status = ETH_LINK_UP;
2426
2427         if (wait_to_complete)
2428                 link_update = rte_eth_link_get;
2429         else
2430                 link_update = rte_eth_link_get_nowait;
2431
2432         switch (bond_ctx->mode) {
2433         case BONDING_MODE_BROADCAST:
2434                 /**
2435                  * Setting link speed to UINT32_MAX to ensure we pick up the
2436                  * value of the first active slave
2437                  */
2438                 ethdev->data->dev_link.link_speed = UINT32_MAX;
2439
2440                 /**
2441                  * link speed is minimum value of all the slaves link speed as
2442                  * packet loss will occur on this slave if transmission at rates
2443                  * greater than this are attempted
2444                  */
2445                 for (idx = 1; idx < bond_ctx->active_slave_count; idx++) {
2446                         link_update(bond_ctx->active_slaves[0], &slave_link);
2447
2448                         if (slave_link.link_speed <
2449                                         ethdev->data->dev_link.link_speed)
2450                                 ethdev->data->dev_link.link_speed =
2451                                                 slave_link.link_speed;
2452                 }
2453                 break;
2454         case BONDING_MODE_ACTIVE_BACKUP:
2455                 /* Current primary slave */
2456                 link_update(bond_ctx->current_primary_port, &slave_link);
2457
2458                 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2459                 break;
2460         case BONDING_MODE_8023AD:
2461                 ethdev->data->dev_link.link_autoneg =
2462                                 bond_ctx->mode4.slave_link.link_autoneg;
2463                 ethdev->data->dev_link.link_duplex =
2464                                 bond_ctx->mode4.slave_link.link_duplex;
2465                 /* fall through to update link speed */
2466         case BONDING_MODE_ROUND_ROBIN:
2467         case BONDING_MODE_BALANCE:
2468         case BONDING_MODE_TLB:
2469         case BONDING_MODE_ALB:
2470         default:
2471                 /**
2472                  * In theses mode the maximum theoretical link speed is the sum
2473                  * of all the slaves
2474                  */
2475                 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2476
2477                 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2478                         link_update(bond_ctx->active_slaves[idx], &slave_link);
2479
2480                         ethdev->data->dev_link.link_speed +=
2481                                         slave_link.link_speed;
2482                 }
2483         }
2484
2485
2486         return 0;
2487 }
2488
2489
2490 static int
2491 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2492 {
2493         struct bond_dev_private *internals = dev->data->dev_private;
2494         struct rte_eth_stats slave_stats;
2495         int i, j;
2496
2497         for (i = 0; i < internals->slave_count; i++) {
2498                 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2499
2500                 stats->ipackets += slave_stats.ipackets;
2501                 stats->opackets += slave_stats.opackets;
2502                 stats->ibytes += slave_stats.ibytes;
2503                 stats->obytes += slave_stats.obytes;
2504                 stats->imissed += slave_stats.imissed;
2505                 stats->ierrors += slave_stats.ierrors;
2506                 stats->oerrors += slave_stats.oerrors;
2507                 stats->rx_nombuf += slave_stats.rx_nombuf;
2508
2509                 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2510                         stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2511                         stats->q_opackets[j] += slave_stats.q_opackets[j];
2512                         stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2513                         stats->q_obytes[j] += slave_stats.q_obytes[j];
2514                         stats->q_errors[j] += slave_stats.q_errors[j];
2515                 }
2516
2517         }
2518
2519         return 0;
2520 }
2521
2522 static void
2523 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2524 {
2525         struct bond_dev_private *internals = dev->data->dev_private;
2526         int i;
2527
2528         for (i = 0; i < internals->slave_count; i++)
2529                 rte_eth_stats_reset(internals->slaves[i].port_id);
2530 }
2531
2532 static void
2533 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2534 {
2535         struct bond_dev_private *internals = eth_dev->data->dev_private;
2536         int i;
2537
2538         internals->promiscuous_en = 1;
2539
2540         switch (internals->mode) {
2541         /* Promiscuous mode is propagated to all slaves */
2542         case BONDING_MODE_ROUND_ROBIN:
2543         case BONDING_MODE_BALANCE:
2544         case BONDING_MODE_BROADCAST:
2545                 for (i = 0; i < internals->slave_count; i++)
2546                         rte_eth_promiscuous_enable(internals->slaves[i].port_id);
2547                 break;
2548         /* In mode4 promiscus mode is managed when slave is added/removed */
2549         case BONDING_MODE_8023AD:
2550                 break;
2551         /* Promiscuous mode is propagated only to primary slave */
2552         case BONDING_MODE_ACTIVE_BACKUP:
2553         case BONDING_MODE_TLB:
2554         case BONDING_MODE_ALB:
2555         default:
2556                 rte_eth_promiscuous_enable(internals->current_primary_port);
2557         }
2558 }
2559
2560 static void
2561 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2562 {
2563         struct bond_dev_private *internals = dev->data->dev_private;
2564         int i;
2565
2566         internals->promiscuous_en = 0;
2567
2568         switch (internals->mode) {
2569         /* Promiscuous mode is propagated to all slaves */
2570         case BONDING_MODE_ROUND_ROBIN:
2571         case BONDING_MODE_BALANCE:
2572         case BONDING_MODE_BROADCAST:
2573                 for (i = 0; i < internals->slave_count; i++)
2574                         rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2575                 break;
2576         /* In mode4 promiscus mode is set managed when slave is added/removed */
2577         case BONDING_MODE_8023AD:
2578                 break;
2579         /* Promiscuous mode is propagated only to primary slave */
2580         case BONDING_MODE_ACTIVE_BACKUP:
2581         case BONDING_MODE_TLB:
2582         case BONDING_MODE_ALB:
2583         default:
2584                 rte_eth_promiscuous_disable(internals->current_primary_port);
2585         }
2586 }
2587
2588 static void
2589 bond_ethdev_delayed_lsc_propagation(void *arg)
2590 {
2591         if (arg == NULL)
2592                 return;
2593
2594         _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2595                         RTE_ETH_EVENT_INTR_LSC, NULL);
2596 }
2597
2598 int
2599 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2600                 void *param, void *ret_param __rte_unused)
2601 {
2602         struct rte_eth_dev *bonded_eth_dev;
2603         struct bond_dev_private *internals;
2604         struct rte_eth_link link;
2605         int rc = -1;
2606
2607         int i, valid_slave = 0;
2608         uint8_t active_pos;
2609         uint8_t lsc_flag = 0;
2610
2611         if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2612                 return rc;
2613
2614         bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
2615
2616         if (check_for_bonded_ethdev(bonded_eth_dev))
2617                 return rc;
2618
2619         internals = bonded_eth_dev->data->dev_private;
2620
2621         /* If the device isn't started don't handle interrupts */
2622         if (!bonded_eth_dev->data->dev_started)
2623                 return rc;
2624
2625         /* verify that port_id is a valid slave of bonded port */
2626         for (i = 0; i < internals->slave_count; i++) {
2627                 if (internals->slaves[i].port_id == port_id) {
2628                         valid_slave = 1;
2629                         break;
2630                 }
2631         }
2632
2633         if (!valid_slave)
2634                 return rc;
2635
2636         /* Search for port in active port list */
2637         active_pos = find_slave_by_id(internals->active_slaves,
2638                         internals->active_slave_count, port_id);
2639
2640         rte_eth_link_get_nowait(port_id, &link);
2641         if (link.link_status) {
2642                 if (active_pos < internals->active_slave_count)
2643                         return rc;
2644
2645                 /* if no active slave ports then set this port to be primary port */
2646                 if (internals->active_slave_count < 1) {
2647                         /* If first active slave, then change link status */
2648                         bonded_eth_dev->data->dev_link.link_status = ETH_LINK_UP;
2649                         internals->current_primary_port = port_id;
2650                         lsc_flag = 1;
2651
2652                         mac_address_slaves_update(bonded_eth_dev);
2653                 }
2654
2655                 activate_slave(bonded_eth_dev, port_id);
2656
2657                 /* If user has defined the primary port then default to using it */
2658                 if (internals->user_defined_primary_port &&
2659                                 internals->primary_port == port_id)
2660                         bond_ethdev_primary_set(internals, port_id);
2661         } else {
2662                 if (active_pos == internals->active_slave_count)
2663                         return rc;
2664
2665                 /* Remove from active slave list */
2666                 deactivate_slave(bonded_eth_dev, port_id);
2667
2668                 if (internals->active_slave_count < 1)
2669                         lsc_flag = 1;
2670
2671                 /* Update primary id, take first active slave from list or if none
2672                  * available set to -1 */
2673                 if (port_id == internals->current_primary_port) {
2674                         if (internals->active_slave_count > 0)
2675                                 bond_ethdev_primary_set(internals,
2676                                                 internals->active_slaves[0]);
2677                         else
2678                                 internals->current_primary_port = internals->primary_port;
2679                 }
2680         }
2681
2682         /**
2683          * Update bonded device link properties after any change to active
2684          * slaves
2685          */
2686         bond_ethdev_link_update(bonded_eth_dev, 0);
2687
2688         if (lsc_flag) {
2689                 /* Cancel any possible outstanding interrupts if delays are enabled */
2690                 if (internals->link_up_delay_ms > 0 ||
2691                         internals->link_down_delay_ms > 0)
2692                         rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2693                                         bonded_eth_dev);
2694
2695                 if (bonded_eth_dev->data->dev_link.link_status) {
2696                         if (internals->link_up_delay_ms > 0)
2697                                 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2698                                                 bond_ethdev_delayed_lsc_propagation,
2699                                                 (void *)bonded_eth_dev);
2700                         else
2701                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2702                                                 RTE_ETH_EVENT_INTR_LSC,
2703                                                 NULL);
2704
2705                 } else {
2706                         if (internals->link_down_delay_ms > 0)
2707                                 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2708                                                 bond_ethdev_delayed_lsc_propagation,
2709                                                 (void *)bonded_eth_dev);
2710                         else
2711                                 _rte_eth_dev_callback_process(bonded_eth_dev,
2712                                                 RTE_ETH_EVENT_INTR_LSC,
2713                                                 NULL);
2714                 }
2715         }
2716         return 0;
2717 }
2718
2719 static int
2720 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2721                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2722 {
2723         unsigned i, j;
2724         int result = 0;
2725         int slave_reta_size;
2726         unsigned reta_count;
2727         struct bond_dev_private *internals = dev->data->dev_private;
2728
2729         if (reta_size != internals->reta_size)
2730                 return -EINVAL;
2731
2732          /* Copy RETA table */
2733         reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2734
2735         for (i = 0; i < reta_count; i++) {
2736                 internals->reta_conf[i].mask = reta_conf[i].mask;
2737                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2738                         if ((reta_conf[i].mask >> j) & 0x01)
2739                                 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2740         }
2741
2742         /* Fill rest of array */
2743         for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2744                 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2745                                 sizeof(internals->reta_conf[0]) * reta_count);
2746
2747         /* Propagate RETA over slaves */
2748         for (i = 0; i < internals->slave_count; i++) {
2749                 slave_reta_size = internals->slaves[i].reta_size;
2750                 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2751                                 &internals->reta_conf[0], slave_reta_size);
2752                 if (result < 0)
2753                         return result;
2754         }
2755
2756         return 0;
2757 }
2758
2759 static int
2760 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2761                 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2762 {
2763         int i, j;
2764         struct bond_dev_private *internals = dev->data->dev_private;
2765
2766         if (reta_size != internals->reta_size)
2767                 return -EINVAL;
2768
2769          /* Copy RETA table */
2770         for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2771                 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2772                         if ((reta_conf[i].mask >> j) & 0x01)
2773                                 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2774
2775         return 0;
2776 }
2777
2778 static int
2779 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2780                 struct rte_eth_rss_conf *rss_conf)
2781 {
2782         int i, result = 0;
2783         struct bond_dev_private *internals = dev->data->dev_private;
2784         struct rte_eth_rss_conf bond_rss_conf;
2785
2786         memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2787
2788         bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2789
2790         if (bond_rss_conf.rss_hf != 0)
2791                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2792
2793         if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2794                         sizeof(internals->rss_key)) {
2795                 if (bond_rss_conf.rss_key_len == 0)
2796                         bond_rss_conf.rss_key_len = 40;
2797                 internals->rss_key_len = bond_rss_conf.rss_key_len;
2798                 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2799                                 internals->rss_key_len);
2800         }
2801
2802         for (i = 0; i < internals->slave_count; i++) {
2803                 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2804                                 &bond_rss_conf);
2805                 if (result < 0)
2806                         return result;
2807         }
2808
2809         return 0;
2810 }
2811
2812 static int
2813 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2814                 struct rte_eth_rss_conf *rss_conf)
2815 {
2816         struct bond_dev_private *internals = dev->data->dev_private;
2817
2818         rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2819         rss_conf->rss_key_len = internals->rss_key_len;
2820         if (rss_conf->rss_key)
2821                 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2822
2823         return 0;
2824 }
2825
2826 static int
2827 bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
2828 {
2829         struct rte_eth_dev *slave_eth_dev;
2830         struct bond_dev_private *internals = dev->data->dev_private;
2831         int ret, i;
2832
2833         rte_spinlock_lock(&internals->lock);
2834
2835         for (i = 0; i < internals->slave_count; i++) {
2836                 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2837                 if (*slave_eth_dev->dev_ops->mtu_set == NULL) {
2838                         rte_spinlock_unlock(&internals->lock);
2839                         return -ENOTSUP;
2840                 }
2841         }
2842         for (i = 0; i < internals->slave_count; i++) {
2843                 ret = rte_eth_dev_set_mtu(internals->slaves[i].port_id, mtu);
2844                 if (ret < 0) {
2845                         rte_spinlock_unlock(&internals->lock);
2846                         return ret;
2847                 }
2848         }
2849
2850         rte_spinlock_unlock(&internals->lock);
2851         return 0;
2852 }
2853
2854 static void
2855 bond_ethdev_mac_address_set(struct rte_eth_dev *dev, struct ether_addr *addr)
2856 {
2857         if (mac_address_set(dev, addr))
2858                 RTE_BOND_LOG(ERR, "Failed to update MAC address");
2859 }
2860
2861 const struct eth_dev_ops default_dev_ops = {
2862         .dev_start            = bond_ethdev_start,
2863         .dev_stop             = bond_ethdev_stop,
2864         .dev_close            = bond_ethdev_close,
2865         .dev_configure        = bond_ethdev_configure,
2866         .dev_infos_get        = bond_ethdev_info,
2867         .vlan_filter_set      = bond_ethdev_vlan_filter_set,
2868         .rx_queue_setup       = bond_ethdev_rx_queue_setup,
2869         .tx_queue_setup       = bond_ethdev_tx_queue_setup,
2870         .rx_queue_release     = bond_ethdev_rx_queue_release,
2871         .tx_queue_release     = bond_ethdev_tx_queue_release,
2872         .link_update          = bond_ethdev_link_update,
2873         .stats_get            = bond_ethdev_stats_get,
2874         .stats_reset          = bond_ethdev_stats_reset,
2875         .promiscuous_enable   = bond_ethdev_promiscuous_enable,
2876         .promiscuous_disable  = bond_ethdev_promiscuous_disable,
2877         .reta_update          = bond_ethdev_rss_reta_update,
2878         .reta_query           = bond_ethdev_rss_reta_query,
2879         .rss_hash_update      = bond_ethdev_rss_hash_update,
2880         .rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get,
2881         .mtu_set              = bond_ethdev_mtu_set,
2882         .mac_addr_set         = bond_ethdev_mac_address_set
2883 };
2884
2885 static int
2886 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
2887 {
2888         const char *name = rte_vdev_device_name(dev);
2889         uint8_t socket_id = dev->device.numa_node;
2890         struct bond_dev_private *internals = NULL;
2891         struct rte_eth_dev *eth_dev = NULL;
2892         uint32_t vlan_filter_bmp_size;
2893
2894         /* now do all data allocation - for eth_dev structure, dummy pci driver
2895          * and internal (private) data
2896          */
2897
2898         /* reserve an ethdev entry */
2899         eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
2900         if (eth_dev == NULL) {
2901                 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
2902                 goto err;
2903         }
2904
2905         internals = eth_dev->data->dev_private;
2906         eth_dev->data->nb_rx_queues = (uint16_t)1;
2907         eth_dev->data->nb_tx_queues = (uint16_t)1;
2908
2909         eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN, 0,
2910                         socket_id);
2911         if (eth_dev->data->mac_addrs == NULL) {
2912                 RTE_BOND_LOG(ERR, "Unable to malloc mac_addrs");
2913                 goto err;
2914         }
2915
2916         eth_dev->dev_ops = &default_dev_ops;
2917         eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC;
2918
2919         rte_spinlock_init(&internals->lock);
2920
2921         internals->port_id = eth_dev->data->port_id;
2922         internals->mode = BONDING_MODE_INVALID;
2923         internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
2924         internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
2925         internals->burst_xmit_hash = burst_xmit_l2_hash;
2926         internals->user_defined_mac = 0;
2927
2928         internals->link_status_polling_enabled = 0;
2929
2930         internals->link_status_polling_interval_ms =
2931                 DEFAULT_POLLING_INTERVAL_10_MS;
2932         internals->link_down_delay_ms = 0;
2933         internals->link_up_delay_ms = 0;
2934
2935         internals->slave_count = 0;
2936         internals->active_slave_count = 0;
2937         internals->rx_offload_capa = 0;
2938         internals->tx_offload_capa = 0;
2939         internals->candidate_max_rx_pktlen = 0;
2940         internals->max_rx_pktlen = 0;
2941
2942         /* Initially allow to choose any offload type */
2943         internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
2944
2945         memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
2946         memset(internals->slaves, 0, sizeof(internals->slaves));
2947
2948         /* Set mode 4 default configuration */
2949         bond_mode_8023ad_setup(eth_dev, NULL);
2950         if (bond_ethdev_mode_set(eth_dev, mode)) {
2951                 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode too %d",
2952                                  eth_dev->data->port_id, mode);
2953                 goto err;
2954         }
2955
2956         vlan_filter_bmp_size =
2957                 rte_bitmap_get_memory_footprint(ETHER_MAX_VLAN_ID + 1);
2958         internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
2959                                                    RTE_CACHE_LINE_SIZE);
2960         if (internals->vlan_filter_bmpmem == NULL) {
2961                 RTE_BOND_LOG(ERR,
2962                              "Failed to allocate vlan bitmap for bonded device %u\n",
2963                              eth_dev->data->port_id);
2964                 goto err;
2965         }
2966
2967         internals->vlan_filter_bmp = rte_bitmap_init(ETHER_MAX_VLAN_ID + 1,
2968                         internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
2969         if (internals->vlan_filter_bmp == NULL) {
2970                 RTE_BOND_LOG(ERR,
2971                              "Failed to init vlan bitmap for bonded device %u\n",
2972                              eth_dev->data->port_id);
2973                 rte_free(internals->vlan_filter_bmpmem);
2974                 goto err;
2975         }
2976
2977         return eth_dev->data->port_id;
2978
2979 err:
2980         rte_free(internals);
2981         if (eth_dev != NULL) {
2982                 rte_free(eth_dev->data->mac_addrs);
2983                 rte_eth_dev_release_port(eth_dev);
2984         }
2985         return -1;
2986 }
2987
2988 static int
2989 bond_probe(struct rte_vdev_device *dev)
2990 {
2991         const char *name;
2992         struct bond_dev_private *internals;
2993         struct rte_kvargs *kvlist;
2994         uint8_t bonding_mode, socket_id/*, agg_mode*/;
2995         int  arg_count, port_id;
2996         uint8_t agg_mode;
2997
2998         if (!dev)
2999                 return -EINVAL;
3000
3001         name = rte_vdev_device_name(dev);
3002         RTE_LOG(INFO, EAL, "Initializing pmd_bond for %s\n", name);
3003
3004         kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
3005                 pmd_bond_init_valid_arguments);
3006         if (kvlist == NULL)
3007                 return -1;
3008
3009         /* Parse link bonding mode */
3010         if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
3011                 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
3012                                 &bond_ethdev_parse_slave_mode_kvarg,
3013                                 &bonding_mode) != 0) {
3014                         RTE_LOG(ERR, EAL, "Invalid mode for bonded device %s\n",
3015                                         name);
3016                         goto parse_error;
3017                 }
3018         } else {
3019                 RTE_LOG(ERR, EAL, "Mode must be specified only once for bonded "
3020                                 "device %s\n", name);
3021                 goto parse_error;
3022         }
3023
3024         /* Parse socket id to create bonding device on */
3025         arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
3026         if (arg_count == 1) {
3027                 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
3028                                 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
3029                                 != 0) {
3030                         RTE_LOG(ERR, EAL, "Invalid socket Id specified for "
3031                                         "bonded device %s\n", name);
3032                         goto parse_error;
3033                 }
3034         } else if (arg_count > 1) {
3035                 RTE_LOG(ERR, EAL, "Socket Id can be specified only once for "
3036                                 "bonded device %s\n", name);
3037                 goto parse_error;
3038         } else {
3039                 socket_id = rte_socket_id();
3040         }
3041
3042         dev->device.numa_node = socket_id;
3043
3044         /* Create link bonding eth device */
3045         port_id = bond_alloc(dev, bonding_mode);
3046         if (port_id < 0) {
3047                 RTE_LOG(ERR, EAL, "Failed to create socket %s in mode %u on "
3048                                 "socket %u.\n", name, bonding_mode, socket_id);
3049                 goto parse_error;
3050         }
3051         internals = rte_eth_devices[port_id].data->dev_private;
3052         internals->kvlist = kvlist;
3053
3054
3055         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3056                 if (rte_kvargs_process(kvlist,
3057                                 PMD_BOND_AGG_MODE_KVARG,
3058                                 &bond_ethdev_parse_slave_agg_mode_kvarg,
3059                                 &agg_mode) != 0) {
3060                         RTE_LOG(ERR, EAL,
3061                                         "Failed to parse agg selection mode for bonded device %s\n",
3062                                         name);
3063                         goto parse_error;
3064                 }
3065
3066                 if (internals->mode == BONDING_MODE_8023AD)
3067                         rte_eth_bond_8023ad_agg_selection_set(port_id,
3068                                         agg_mode);
3069         } else {
3070                 rte_eth_bond_8023ad_agg_selection_set(port_id, AGG_STABLE);
3071         }
3072
3073         RTE_LOG(INFO, EAL, "Create bonded device %s on port %d in mode %u on "
3074                         "socket %u.\n", name, port_id, bonding_mode, socket_id);
3075         return 0;
3076
3077 parse_error:
3078         rte_kvargs_free(kvlist);
3079
3080         return -1;
3081 }
3082
3083 static int
3084 bond_remove(struct rte_vdev_device *dev)
3085 {
3086         struct rte_eth_dev *eth_dev;
3087         struct bond_dev_private *internals;
3088         const char *name;
3089
3090         if (!dev)
3091                 return -EINVAL;
3092
3093         name = rte_vdev_device_name(dev);
3094         RTE_LOG(INFO, EAL, "Uninitializing pmd_bond for %s\n", name);
3095
3096         /* now free all data allocation - for eth_dev structure,
3097          * dummy pci driver and internal (private) data
3098          */
3099
3100         /* find an ethdev entry */
3101         eth_dev = rte_eth_dev_allocated(name);
3102         if (eth_dev == NULL)
3103                 return -ENODEV;
3104
3105         RTE_ASSERT(eth_dev->device == &dev->device);
3106
3107         internals = eth_dev->data->dev_private;
3108         if (internals->slave_count != 0)
3109                 return -EBUSY;
3110
3111         if (eth_dev->data->dev_started == 1) {
3112                 bond_ethdev_stop(eth_dev);
3113                 bond_ethdev_close(eth_dev);
3114         }
3115
3116         eth_dev->dev_ops = NULL;
3117         eth_dev->rx_pkt_burst = NULL;
3118         eth_dev->tx_pkt_burst = NULL;
3119
3120         internals = eth_dev->data->dev_private;
3121         rte_bitmap_free(internals->vlan_filter_bmp);
3122         rte_free(internals->vlan_filter_bmpmem);
3123         rte_free(eth_dev->data->dev_private);
3124         rte_free(eth_dev->data->mac_addrs);
3125
3126         rte_eth_dev_release_port(eth_dev);
3127
3128         return 0;
3129 }
3130
3131 /* this part will resolve the slave portids after all the other pdev and vdev
3132  * have been allocated */
3133 static int
3134 bond_ethdev_configure(struct rte_eth_dev *dev)
3135 {
3136         const char *name = dev->device->name;
3137         struct bond_dev_private *internals = dev->data->dev_private;
3138         struct rte_kvargs *kvlist = internals->kvlist;
3139         int arg_count;
3140         uint16_t port_id = dev - rte_eth_devices;
3141         uint8_t agg_mode;
3142
3143         static const uint8_t default_rss_key[40] = {
3144                 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3145                 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3146                 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3147                 0xBE, 0xAC, 0x01, 0xFA
3148         };
3149
3150         unsigned i, j;
3151
3152         /* If RSS is enabled, fill table and key with default values */
3153         if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
3154                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = internals->rss_key;
3155                 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = 0;
3156                 memcpy(internals->rss_key, default_rss_key, 40);
3157
3158                 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3159                         internals->reta_conf[i].mask = ~0LL;
3160                         for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3161                                 internals->reta_conf[i].reta[j] = j % dev->data->nb_rx_queues;
3162                 }
3163         }
3164
3165         /* set the max_rx_pktlen */
3166         internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3167
3168         /*
3169          * if no kvlist, it means that this bonded device has been created
3170          * through the bonding api.
3171          */
3172         if (!kvlist)
3173                 return 0;
3174
3175         /* Parse MAC address for bonded device */
3176         arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3177         if (arg_count == 1) {
3178                 struct ether_addr bond_mac;
3179
3180                 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3181                                 &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3182                         RTE_LOG(INFO, EAL, "Invalid mac address for bonded device %s\n",
3183                                         name);
3184                         return -1;
3185                 }
3186
3187                 /* Set MAC address */
3188                 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3189                         RTE_LOG(ERR, EAL,
3190                                         "Failed to set mac address on bonded device %s\n",
3191                                         name);
3192                         return -1;
3193                 }
3194         } else if (arg_count > 1) {
3195                 RTE_LOG(ERR, EAL,
3196                                 "MAC address can be specified only once for bonded device %s\n",
3197                                 name);
3198                 return -1;
3199         }
3200
3201         /* Parse/set balance mode transmit policy */
3202         arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3203         if (arg_count == 1) {
3204                 uint8_t xmit_policy;
3205
3206                 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3207                                 &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3208                                                 0) {
3209                         RTE_LOG(INFO, EAL,
3210                                         "Invalid xmit policy specified for bonded device %s\n",
3211                                         name);
3212                         return -1;
3213                 }
3214
3215                 /* Set balance mode transmit policy*/
3216                 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3217                         RTE_LOG(ERR, EAL,
3218                                         "Failed to set balance xmit policy on bonded device %s\n",
3219                                         name);
3220                         return -1;
3221                 }
3222         } else if (arg_count > 1) {
3223                 RTE_LOG(ERR, EAL,
3224                                 "Transmit policy can be specified only once for bonded device"
3225                                 " %s\n", name);
3226                 return -1;
3227         }
3228
3229         if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3230                 if (rte_kvargs_process(kvlist,
3231                                 PMD_BOND_AGG_MODE_KVARG,
3232                                 &bond_ethdev_parse_slave_agg_mode_kvarg,
3233                                 &agg_mode) != 0) {
3234                         RTE_LOG(ERR, EAL,
3235                                         "Failed to parse agg selection mode for bonded device %s\n",
3236                                         name);
3237                 }
3238                 if (internals->mode == BONDING_MODE_8023AD)
3239                                 rte_eth_bond_8023ad_agg_selection_set(port_id,
3240                                                 agg_mode);
3241         }
3242
3243         /* Parse/add slave ports to bonded device */
3244         if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3245                 struct bond_ethdev_slave_ports slave_ports;
3246                 unsigned i;
3247
3248                 memset(&slave_ports, 0, sizeof(slave_ports));
3249
3250                 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3251                                 &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3252                         RTE_LOG(ERR, EAL,
3253                                         "Failed to parse slave ports for bonded device %s\n",
3254                                         name);
3255                         return -1;
3256                 }
3257
3258                 for (i = 0; i < slave_ports.slave_count; i++) {
3259                         if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3260                                 RTE_LOG(ERR, EAL,
3261                                                 "Failed to add port %d as slave to bonded device %s\n",
3262                                                 slave_ports.slaves[i], name);
3263                         }
3264                 }
3265
3266         } else {
3267                 RTE_LOG(INFO, EAL, "No slaves specified for bonded device %s\n", name);
3268                 return -1;
3269         }
3270
3271         /* Parse/set primary slave port id*/
3272         arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3273         if (arg_count == 1) {
3274                 uint16_t primary_slave_port_id;
3275
3276                 if (rte_kvargs_process(kvlist,
3277                                 PMD_BOND_PRIMARY_SLAVE_KVARG,
3278                                 &bond_ethdev_parse_primary_slave_port_id_kvarg,
3279                                 &primary_slave_port_id) < 0) {
3280                         RTE_LOG(INFO, EAL,
3281                                         "Invalid primary slave port id specified for bonded device"
3282                                         " %s\n", name);
3283                         return -1;
3284                 }
3285
3286                 /* Set balance mode transmit policy*/
3287                 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3288                                 != 0) {
3289                         RTE_LOG(ERR, EAL,
3290                                         "Failed to set primary slave port %d on bonded device %s\n",
3291                                         primary_slave_port_id, name);
3292                         return -1;
3293                 }
3294         } else if (arg_count > 1) {
3295                 RTE_LOG(INFO, EAL,
3296                                 "Primary slave can be specified only once for bonded device"
3297                                 " %s\n", name);
3298                 return -1;
3299         }
3300
3301         /* Parse link status monitor polling interval */
3302         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3303         if (arg_count == 1) {
3304                 uint32_t lsc_poll_interval_ms;
3305
3306                 if (rte_kvargs_process(kvlist,
3307                                 PMD_BOND_LSC_POLL_PERIOD_KVARG,
3308                                 &bond_ethdev_parse_time_ms_kvarg,
3309                                 &lsc_poll_interval_ms) < 0) {
3310                         RTE_LOG(INFO, EAL,
3311                                         "Invalid lsc polling interval value specified for bonded"
3312                                         " device %s\n", name);
3313                         return -1;
3314                 }
3315
3316                 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3317                                 != 0) {
3318                         RTE_LOG(ERR, EAL,
3319                                         "Failed to set lsc monitor polling interval (%u ms) on"
3320                                         " bonded device %s\n", lsc_poll_interval_ms, name);
3321                         return -1;
3322                 }
3323         } else if (arg_count > 1) {
3324                 RTE_LOG(INFO, EAL,
3325                                 "LSC polling interval can be specified only once for bonded"
3326                                 " device %s\n", name);
3327                 return -1;
3328         }
3329
3330         /* Parse link up interrupt propagation delay */
3331         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3332         if (arg_count == 1) {
3333                 uint32_t link_up_delay_ms;
3334
3335                 if (rte_kvargs_process(kvlist,
3336                                 PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3337                                 &bond_ethdev_parse_time_ms_kvarg,
3338                                 &link_up_delay_ms) < 0) {
3339                         RTE_LOG(INFO, EAL,
3340                                         "Invalid link up propagation delay value specified for"
3341                                         " bonded device %s\n", name);
3342                         return -1;
3343                 }
3344
3345                 /* Set balance mode transmit policy*/
3346                 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3347                                 != 0) {
3348                         RTE_LOG(ERR, EAL,
3349                                         "Failed to set link up propagation delay (%u ms) on bonded"
3350                                         " device %s\n", link_up_delay_ms, name);
3351                         return -1;
3352                 }
3353         } else if (arg_count > 1) {
3354                 RTE_LOG(INFO, EAL,
3355                                 "Link up propagation delay can be specified only once for"
3356                                 " bonded device %s\n", name);
3357                 return -1;
3358         }
3359
3360         /* Parse link down interrupt propagation delay */
3361         arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3362         if (arg_count == 1) {
3363                 uint32_t link_down_delay_ms;
3364
3365                 if (rte_kvargs_process(kvlist,
3366                                 PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3367                                 &bond_ethdev_parse_time_ms_kvarg,
3368                                 &link_down_delay_ms) < 0) {
3369                         RTE_LOG(INFO, EAL,
3370                                         "Invalid link down propagation delay value specified for"
3371                                         " bonded device %s\n", name);
3372                         return -1;
3373                 }
3374
3375                 /* Set balance mode transmit policy*/
3376                 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3377                                 != 0) {
3378                         RTE_LOG(ERR, EAL,
3379                                         "Failed to set link down propagation delay (%u ms) on"
3380                                         " bonded device %s\n", link_down_delay_ms, name);
3381                         return -1;
3382                 }
3383         } else if (arg_count > 1) {
3384                 RTE_LOG(INFO, EAL,
3385                                 "Link down propagation delay can be specified only once for"
3386                                 " bonded device %s\n", name);
3387                 return -1;
3388         }
3389
3390         return 0;
3391 }
3392
3393 struct rte_vdev_driver pmd_bond_drv = {
3394         .probe = bond_probe,
3395         .remove = bond_remove,
3396 };
3397
3398 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3399 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3400
3401 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3402         "slave=<ifc> "
3403         "primary=<ifc> "
3404         "mode=[0-6] "
3405         "xmit_policy=[l2 | l23 | l34] "
3406         "agg_mode=[count | stable | bandwidth] "
3407         "socket_id=<int> "
3408         "mac=<mac addr> "
3409         "lsc_poll_period_ms=<int> "
3410         "up_delay=<int> "
3411         "down_delay=<int>");