New upstream version 17.11.1
[deb_dpdk.git] / drivers / net / bonding / rte_eth_bond_8023ad.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <stddef.h>
35 #include <string.h>
36 #include <stdbool.h>
37
38 #include <rte_alarm.h>
39 #include <rte_malloc.h>
40 #include <rte_errno.h>
41 #include <rte_cycles.h>
42 #include <rte_compat.h>
43
44 #include "rte_eth_bond_private.h"
45
46 static void bond_mode_8023ad_ext_periodic_cb(void *arg);
47 #ifdef RTE_LIBRTE_BOND_DEBUG_8023AD
48 #define MODE4_DEBUG(fmt, ...) RTE_LOG(DEBUG, PMD, "%6u [Port %u: %s] " fmt, \
49                         bond_dbg_get_time_diff_ms(), slave_id, \
50                         __func__, ##__VA_ARGS__)
51
52 static uint64_t start_time;
53
54 static unsigned
55 bond_dbg_get_time_diff_ms(void)
56 {
57         uint64_t now;
58
59         now = rte_rdtsc();
60         if (start_time == 0)
61                 start_time = now;
62
63         return ((now - start_time) * 1000) / rte_get_tsc_hz();
64 }
65
66 static void
67 bond_print_lacp(struct lacpdu *l)
68 {
69         char a_address[18];
70         char p_address[18];
71         char a_state[256] = { 0 };
72         char p_state[256] = { 0 };
73
74         static const char * const state_labels[] = {
75                 "ACT", "TIMEOUT", "AGG", "SYNC", "COL", "DIST", "DEF", "EXP"
76         };
77
78         int a_len = 0;
79         int p_len = 0;
80         uint8_t i;
81         uint8_t *addr;
82
83         addr = l->actor.port_params.system.addr_bytes;
84         snprintf(a_address, sizeof(a_address), "%02X:%02X:%02X:%02X:%02X:%02X",
85                 addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]);
86
87         addr = l->partner.port_params.system.addr_bytes;
88         snprintf(p_address, sizeof(p_address), "%02X:%02X:%02X:%02X:%02X:%02X",
89                 addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]);
90
91         for (i = 0; i < 8; i++) {
92                 if ((l->actor.state >> i) & 1) {
93                         a_len += snprintf(&a_state[a_len], RTE_DIM(a_state) - a_len, "%s ",
94                                 state_labels[i]);
95                 }
96
97                 if ((l->partner.state >> i) & 1) {
98                         p_len += snprintf(&p_state[p_len], RTE_DIM(p_state) - p_len, "%s ",
99                                 state_labels[i]);
100                 }
101         }
102
103         if (a_len && a_state[a_len-1] == ' ')
104                 a_state[a_len-1] = '\0';
105
106         if (p_len && p_state[p_len-1] == ' ')
107                 p_state[p_len-1] = '\0';
108
109         RTE_LOG(DEBUG, PMD, "LACP: {\n"\
110                         "  subtype= %02X\n"\
111                         "  ver_num=%02X\n"\
112                         "  actor={ tlv=%02X, len=%02X\n"\
113                         "    pri=%04X, system=%s, key=%04X, p_pri=%04X p_num=%04X\n"\
114                         "       state={ %s }\n"\
115                         "  }\n"\
116                         "  partner={ tlv=%02X, len=%02X\n"\
117                         "    pri=%04X, system=%s, key=%04X, p_pri=%04X p_num=%04X\n"\
118                         "       state={ %s }\n"\
119                         "  }\n"\
120                         "  collector={info=%02X, length=%02X, max_delay=%04X\n, " \
121                                                         "type_term=%02X, terminator_length = %02X}\n",\
122                         l->subtype,\
123                         l->version_number,\
124                         l->actor.tlv_type_info,\
125                         l->actor.info_length,\
126                         l->actor.port_params.system_priority,\
127                         a_address,\
128                         l->actor.port_params.key,\
129                         l->actor.port_params.port_priority,\
130                         l->actor.port_params.port_number,\
131                         a_state,\
132                         l->partner.tlv_type_info,\
133                         l->partner.info_length,\
134                         l->partner.port_params.system_priority,\
135                         p_address,\
136                         l->partner.port_params.key,\
137                         l->partner.port_params.port_priority,\
138                         l->partner.port_params.port_number,\
139                         p_state,\
140                         l->tlv_type_collector_info,\
141                         l->collector_info_length,\
142                         l->collector_max_delay,\
143                         l->tlv_type_terminator,\
144                         l->terminator_length);
145
146 }
147 #define BOND_PRINT_LACP(lacpdu) bond_print_lacp(lacpdu)
148 #else
149 #define BOND_PRINT_LACP(lacpdu) do { } while (0)
150 #define MODE4_DEBUG(fmt, ...) do { } while (0)
151 #endif
152
153 static const struct ether_addr lacp_mac_addr = {
154         .addr_bytes = { 0x01, 0x80, 0xC2, 0x00, 0x00, 0x02 }
155 };
156
157 struct port mode_8023ad_ports[RTE_MAX_ETHPORTS];
158
159 static void
160 timer_cancel(uint64_t *timer)
161 {
162         *timer = 0;
163 }
164
165 static void
166 timer_set(uint64_t *timer, uint64_t timeout)
167 {
168         *timer = rte_rdtsc() + timeout;
169 }
170
171 /* Forces given timer to be in expired state. */
172 static void
173 timer_force_expired(uint64_t *timer)
174 {
175         *timer = rte_rdtsc();
176 }
177
178 static bool
179 timer_is_stopped(uint64_t *timer)
180 {
181         return *timer == 0;
182 }
183
184 static bool
185 timer_is_expired(uint64_t *timer)
186 {
187         return *timer < rte_rdtsc();
188 }
189
190 /* Timer is in running state if it is not stopped nor expired */
191 static bool
192 timer_is_running(uint64_t *timer)
193 {
194         return !timer_is_stopped(timer) && !timer_is_expired(timer);
195 }
196
197 static void
198 set_warning_flags(struct port *port, uint16_t flags)
199 {
200         int retval;
201         uint16_t old;
202         uint16_t new_flag = 0;
203
204         do {
205                 old = port->warnings_to_show;
206                 new_flag = old | flags;
207                 retval = rte_atomic16_cmpset(&port->warnings_to_show, old, new_flag);
208         } while (unlikely(retval == 0));
209 }
210
211 static void
212 show_warnings(uint16_t slave_id)
213 {
214         struct port *port = &mode_8023ad_ports[slave_id];
215         uint8_t warnings;
216
217         do {
218                 warnings = port->warnings_to_show;
219         } while (rte_atomic16_cmpset(&port->warnings_to_show, warnings, 0) == 0);
220
221         if (!warnings)
222                 return;
223
224         if (!timer_is_expired(&port->warning_timer))
225                 return;
226
227
228         timer_set(&port->warning_timer, BOND_8023AD_WARNINGS_PERIOD_MS *
229                         rte_get_tsc_hz() / 1000);
230
231         if (warnings & WRN_RX_QUEUE_FULL) {
232                 RTE_LOG(DEBUG, PMD,
233                         "Slave %u: failed to enqueue LACP packet into RX ring.\n"
234                         "Receive and transmit functions must be invoked on bonded\n"
235                         "interface at least 10 times per second or LACP will not\n"
236                         "work correctly\n", slave_id);
237         }
238
239         if (warnings & WRN_TX_QUEUE_FULL) {
240                 RTE_LOG(DEBUG, PMD,
241                         "Slave %u: failed to enqueue LACP packet into TX ring.\n"
242                         "Receive and transmit functions must be invoked on bonded\n"
243                         "interface at least 10 times per second or LACP will not\n"
244                         "work correctly\n", slave_id);
245         }
246
247         if (warnings & WRN_RX_MARKER_TO_FAST)
248                 RTE_LOG(INFO, PMD, "Slave %u: marker to early - ignoring.\n", slave_id);
249
250         if (warnings & WRN_UNKNOWN_SLOW_TYPE) {
251                 RTE_LOG(INFO, PMD,
252                         "Slave %u: ignoring unknown slow protocol frame type", slave_id);
253         }
254
255         if (warnings & WRN_UNKNOWN_MARKER_TYPE)
256                 RTE_LOG(INFO, PMD, "Slave %u: ignoring unknown marker type", slave_id);
257
258         if (warnings & WRN_NOT_LACP_CAPABLE)
259                 MODE4_DEBUG("Port %u is not LACP capable!\n", slave_id);
260 }
261
262 static void
263 record_default(struct port *port)
264 {
265         /* Record default parameters for partner. Partner admin parameters
266          * are not implemented so set them to arbitrary default (last known) and
267          * mark actor that parner is in defaulted state. */
268         port->partner_state = STATE_LACP_ACTIVE;
269         ACTOR_STATE_SET(port, DEFAULTED);
270 }
271
272 /** Function handles rx state machine.
273  *
274  * This function implements Receive State Machine from point 5.4.12 in
275  * 802.1AX documentation. It should be called periodically.
276  *
277  * @param lacpdu                LACPDU received.
278  * @param port                  Port on which LACPDU was received.
279  */
280 static void
281 rx_machine(struct bond_dev_private *internals, uint16_t slave_id,
282                 struct lacpdu *lacp)
283 {
284         struct port *agg, *port = &mode_8023ad_ports[slave_id];
285         uint64_t timeout;
286
287         if (SM_FLAG(port, BEGIN)) {
288                 /* Initialize stuff */
289                 MODE4_DEBUG("-> INITIALIZE\n");
290                 SM_FLAG_CLR(port, MOVED);
291                 port->selected = UNSELECTED;
292
293                 record_default(port);
294
295                 ACTOR_STATE_CLR(port, EXPIRED);
296                 timer_cancel(&port->current_while_timer);
297
298                 /* DISABLED: On initialization partner is out of sync */
299                 PARTNER_STATE_CLR(port, SYNCHRONIZATION);
300
301                 /* LACP DISABLED stuff if LACP not enabled on this port */
302                 if (!SM_FLAG(port, LACP_ENABLED))
303                         PARTNER_STATE_CLR(port, AGGREGATION);
304                 else
305                         PARTNER_STATE_SET(port, AGGREGATION);
306         }
307
308         if (!SM_FLAG(port, LACP_ENABLED)) {
309                 /* Update parameters only if state changed */
310                 if (!timer_is_stopped(&port->current_while_timer)) {
311                         port->selected = UNSELECTED;
312                         record_default(port);
313                         PARTNER_STATE_CLR(port, AGGREGATION);
314                         ACTOR_STATE_CLR(port, EXPIRED);
315                         timer_cancel(&port->current_while_timer);
316                 }
317                 return;
318         }
319
320         if (lacp) {
321                 MODE4_DEBUG("LACP -> CURRENT\n");
322                 BOND_PRINT_LACP(lacp);
323                 /* Update selected flag. If partner parameters are defaulted assume they
324                  * are match. If not defaulted  compare LACP actor with ports parner
325                  * params. */
326                 if (!ACTOR_STATE(port, DEFAULTED) &&
327                         (ACTOR_STATE(port, AGGREGATION) != PARTNER_STATE(port, AGGREGATION)
328                         || memcmp(&port->partner, &lacp->actor.port_params,
329                                 sizeof(port->partner)) != 0)) {
330                         MODE4_DEBUG("selected <- UNSELECTED\n");
331                         port->selected = UNSELECTED;
332                 }
333
334                 /* Record this PDU actor params as partner params */
335                 memcpy(&port->partner, &lacp->actor.port_params,
336                         sizeof(struct port_params));
337                 port->partner_state = lacp->actor.state;
338
339                 /* Partner parameters are not defaulted any more */
340                 ACTOR_STATE_CLR(port, DEFAULTED);
341
342                 /* If LACP partner params match this port actor params */
343                 agg = &mode_8023ad_ports[port->aggregator_port_id];
344                 bool match = port->actor.system_priority ==
345                         lacp->partner.port_params.system_priority &&
346                         is_same_ether_addr(&agg->actor.system,
347                         &lacp->partner.port_params.system) &&
348                         port->actor.port_priority ==
349                         lacp->partner.port_params.port_priority &&
350                         port->actor.port_number ==
351                         lacp->partner.port_params.port_number;
352
353                 /* Update NTT if partners information are outdated (xored and masked
354                  * bits are set)*/
355                 uint8_t state_mask = STATE_LACP_ACTIVE | STATE_LACP_SHORT_TIMEOUT |
356                         STATE_SYNCHRONIZATION | STATE_AGGREGATION;
357
358                 if (((port->actor_state ^ lacp->partner.state) & state_mask) ||
359                                 match == false) {
360                         SM_FLAG_SET(port, NTT);
361                 }
362
363                 /* If LACP partner params match this port actor params */
364                 if (match == true && ACTOR_STATE(port, AGGREGATION) ==
365                                 PARTNER_STATE(port,     AGGREGATION))
366                         PARTNER_STATE_SET(port, SYNCHRONIZATION);
367                 else if (!PARTNER_STATE(port, AGGREGATION) && ACTOR_STATE(port,
368                                 AGGREGATION))
369                         PARTNER_STATE_SET(port, SYNCHRONIZATION);
370                 else
371                         PARTNER_STATE_CLR(port, SYNCHRONIZATION);
372
373                 if (ACTOR_STATE(port, LACP_SHORT_TIMEOUT))
374                         timeout = internals->mode4.short_timeout;
375                 else
376                         timeout = internals->mode4.long_timeout;
377
378                 timer_set(&port->current_while_timer, timeout);
379                 ACTOR_STATE_CLR(port, EXPIRED);
380                 return; /* No state change */
381         }
382
383         /* If CURRENT state timer is not running (stopped or expired)
384          * transit to EXPIRED state from DISABLED or CURRENT */
385         if (!timer_is_running(&port->current_while_timer)) {
386                 ACTOR_STATE_SET(port, EXPIRED);
387                 PARTNER_STATE_CLR(port, SYNCHRONIZATION);
388                 PARTNER_STATE_SET(port, LACP_SHORT_TIMEOUT);
389                 timer_set(&port->current_while_timer, internals->mode4.short_timeout);
390         }
391 }
392
393 /**
394  * Function handles periodic tx state machine.
395  *
396  * Function implements Periodic Transmission state machine from point 5.4.13
397  * in 802.1AX documentation. It should be called periodically.
398  *
399  * @param port                  Port to handle state machine.
400  */
401 static void
402 periodic_machine(struct bond_dev_private *internals, uint16_t slave_id)
403 {
404         struct port *port = &mode_8023ad_ports[slave_id];
405         /* Calculate if either site is LACP enabled */
406         uint64_t timeout;
407         uint8_t active = ACTOR_STATE(port, LACP_ACTIVE) ||
408                 PARTNER_STATE(port, LACP_ACTIVE);
409
410         uint8_t is_partner_fast, was_partner_fast;
411         /* No periodic is on BEGIN, LACP DISABLE or when both sides are pasive */
412         if (SM_FLAG(port, BEGIN) || !SM_FLAG(port, LACP_ENABLED) || !active) {
413                 timer_cancel(&port->periodic_timer);
414                 timer_force_expired(&port->tx_machine_timer);
415                 SM_FLAG_CLR(port, PARTNER_SHORT_TIMEOUT);
416
417                 MODE4_DEBUG("-> NO_PERIODIC ( %s%s%s)\n",
418                         SM_FLAG(port, BEGIN) ? "begind " : "",
419                         SM_FLAG(port, LACP_ENABLED) ? "" : "LACP disabled ",
420                         active ? "LACP active " : "LACP pasive ");
421                 return;
422         }
423
424         is_partner_fast = PARTNER_STATE(port, LACP_SHORT_TIMEOUT);
425         was_partner_fast = SM_FLAG(port, PARTNER_SHORT_TIMEOUT);
426
427         /* If periodic timer is not started, transit from NO PERIODIC to FAST/SLOW.
428          * Other case: check if timer expire or partners settings changed. */
429         if (!timer_is_stopped(&port->periodic_timer)) {
430                 if (timer_is_expired(&port->periodic_timer)) {
431                         SM_FLAG_SET(port, NTT);
432                 } else if (is_partner_fast != was_partner_fast) {
433                         /* Partners timeout  was slow and now it is fast -> send LACP.
434                          * In other case (was fast and now it is slow) just switch
435                          * timeout to slow without forcing send of LACP (because standard
436                          * say so)*/
437                         if (is_partner_fast)
438                                 SM_FLAG_SET(port, NTT);
439                 } else
440                         return; /* Nothing changed */
441         }
442
443         /* Handle state transition to FAST/SLOW LACP timeout */
444         if (is_partner_fast) {
445                 timeout = internals->mode4.fast_periodic_timeout;
446                 SM_FLAG_SET(port, PARTNER_SHORT_TIMEOUT);
447         } else {
448                 timeout = internals->mode4.slow_periodic_timeout;
449                 SM_FLAG_CLR(port, PARTNER_SHORT_TIMEOUT);
450         }
451
452         timer_set(&port->periodic_timer, timeout);
453 }
454
455 /**
456  * Function handles mux state machine.
457  *
458  * Function implements Mux Machine from point 5.4.15 in 802.1AX documentation.
459  * It should be called periodically.
460  *
461  * @param port                  Port to handle state machine.
462  */
463 static void
464 mux_machine(struct bond_dev_private *internals, uint16_t slave_id)
465 {
466         struct port *port = &mode_8023ad_ports[slave_id];
467
468         /* Save current state for later use */
469         const uint8_t state_mask = STATE_SYNCHRONIZATION | STATE_DISTRIBUTING |
470                 STATE_COLLECTING;
471
472         /* Enter DETACHED state on BEGIN condition or from any other state if
473          * port was unselected */
474         if (SM_FLAG(port, BEGIN) ||
475                         port->selected == UNSELECTED || (port->selected == STANDBY &&
476                                 (port->actor_state & state_mask) != 0)) {
477                 /* detach mux from aggregator */
478                 port->actor_state &= ~state_mask;
479                 /* Set ntt to true if BEGIN condition or transition from any other state
480                  * which is indicated that wait_while_timer was started */
481                 if (SM_FLAG(port, BEGIN) ||
482                                 !timer_is_stopped(&port->wait_while_timer)) {
483                         SM_FLAG_SET(port, NTT);
484                         MODE4_DEBUG("-> DETACHED\n");
485                 }
486                 timer_cancel(&port->wait_while_timer);
487         }
488
489         if (timer_is_stopped(&port->wait_while_timer)) {
490                 if (port->selected == SELECTED || port->selected == STANDBY) {
491                         timer_set(&port->wait_while_timer,
492                                 internals->mode4.aggregate_wait_timeout);
493
494                         MODE4_DEBUG("DETACHED -> WAITING\n");
495                 }
496                 /* Waiting state entered */
497                 return;
498         }
499
500         /* Transit next state if port is ready */
501         if (!timer_is_expired(&port->wait_while_timer))
502                 return;
503
504         if ((ACTOR_STATE(port, DISTRIBUTING) || ACTOR_STATE(port, COLLECTING)) &&
505                 !PARTNER_STATE(port, SYNCHRONIZATION)) {
506                 /* If in COLLECTING or DISTRIBUTING state and partner becomes out of
507                  * sync transit to ATACHED state.  */
508                 ACTOR_STATE_CLR(port, DISTRIBUTING);
509                 ACTOR_STATE_CLR(port, COLLECTING);
510                 /* Clear actor sync to activate transit ATACHED in condition bellow */
511                 ACTOR_STATE_CLR(port, SYNCHRONIZATION);
512                 MODE4_DEBUG("Out of sync -> ATTACHED\n");
513         }
514
515         if (!ACTOR_STATE(port, SYNCHRONIZATION)) {
516                 /* attach mux to aggregator */
517                 RTE_ASSERT((port->actor_state & (STATE_COLLECTING |
518                         STATE_DISTRIBUTING)) == 0);
519
520                 ACTOR_STATE_SET(port, SYNCHRONIZATION);
521                 SM_FLAG_SET(port, NTT);
522                 MODE4_DEBUG("ATTACHED Entered\n");
523         } else if (!ACTOR_STATE(port, COLLECTING)) {
524                 /* Start collecting if in sync */
525                 if (PARTNER_STATE(port, SYNCHRONIZATION)) {
526                         MODE4_DEBUG("ATTACHED -> COLLECTING\n");
527                         ACTOR_STATE_SET(port, COLLECTING);
528                         SM_FLAG_SET(port, NTT);
529                 }
530         } else if (ACTOR_STATE(port, COLLECTING)) {
531                 /* Check if partner is in COLLECTING state. If so this port can
532                  * distribute frames to it */
533                 if (!ACTOR_STATE(port, DISTRIBUTING)) {
534                         if (PARTNER_STATE(port, COLLECTING)) {
535                                 /* Enable  DISTRIBUTING if partner is collecting */
536                                 ACTOR_STATE_SET(port, DISTRIBUTING);
537                                 SM_FLAG_SET(port, NTT);
538                                 MODE4_DEBUG("COLLECTING -> DISTRIBUTING\n");
539                                 RTE_LOG(INFO, PMD,
540                                         "Bond %u: slave id %u distributing started.\n",
541                                         internals->port_id, slave_id);
542                         }
543                 } else {
544                         if (!PARTNER_STATE(port, COLLECTING)) {
545                                 /* Disable DISTRIBUTING (enter COLLECTING state) if partner
546                                  * is not collecting */
547                                 ACTOR_STATE_CLR(port, DISTRIBUTING);
548                                 SM_FLAG_SET(port, NTT);
549                                 MODE4_DEBUG("DISTRIBUTING -> COLLECTING\n");
550                                 RTE_LOG(INFO, PMD,
551                                         "Bond %u: slave id %u distributing stopped.\n",
552                                         internals->port_id, slave_id);
553                         }
554                 }
555         }
556 }
557
558 /**
559  * Function handles transmit state machine.
560  *
561  * Function implements Transmit Machine from point 5.4.16 in 802.1AX
562  * documentation.
563  *
564  * @param port
565  */
566 static void
567 tx_machine(struct bond_dev_private *internals, uint16_t slave_id)
568 {
569         struct port *agg, *port = &mode_8023ad_ports[slave_id];
570
571         struct rte_mbuf *lacp_pkt = NULL;
572         struct lacpdu_header *hdr;
573         struct lacpdu *lacpdu;
574
575         /* If periodic timer is not running periodic machine is in NO PERIODIC and
576          * according to 802.3ax standard tx machine should not transmit any frames
577          * and set ntt to false. */
578         if (timer_is_stopped(&port->periodic_timer))
579                 SM_FLAG_CLR(port, NTT);
580
581         if (!SM_FLAG(port, NTT))
582                 return;
583
584         if (!timer_is_expired(&port->tx_machine_timer))
585                 return;
586
587         lacp_pkt = rte_pktmbuf_alloc(port->mbuf_pool);
588         if (lacp_pkt == NULL) {
589                 RTE_LOG(ERR, PMD, "Failed to allocate LACP packet from pool\n");
590                 return;
591         }
592
593         lacp_pkt->data_len = sizeof(*hdr);
594         lacp_pkt->pkt_len = sizeof(*hdr);
595
596         hdr = rte_pktmbuf_mtod(lacp_pkt, struct lacpdu_header *);
597
598         /* Source and destination MAC */
599         ether_addr_copy(&lacp_mac_addr, &hdr->eth_hdr.d_addr);
600         rte_eth_macaddr_get(slave_id, &hdr->eth_hdr.s_addr);
601         hdr->eth_hdr.ether_type = rte_cpu_to_be_16(ETHER_TYPE_SLOW);
602
603         lacpdu = &hdr->lacpdu;
604         memset(lacpdu, 0, sizeof(*lacpdu));
605
606         /* Initialize LACP part */
607         lacpdu->subtype = SLOW_SUBTYPE_LACP;
608         lacpdu->version_number = 1;
609
610         /* ACTOR */
611         lacpdu->actor.tlv_type_info = TLV_TYPE_ACTOR_INFORMATION;
612         lacpdu->actor.info_length = sizeof(struct lacpdu_actor_partner_params);
613         memcpy(&hdr->lacpdu.actor.port_params, &port->actor,
614                         sizeof(port->actor));
615         agg = &mode_8023ad_ports[port->aggregator_port_id];
616         ether_addr_copy(&agg->actor.system, &hdr->lacpdu.actor.port_params.system);
617         lacpdu->actor.state = port->actor_state;
618
619         /* PARTNER */
620         lacpdu->partner.tlv_type_info = TLV_TYPE_PARTNER_INFORMATION;
621         lacpdu->partner.info_length = sizeof(struct lacpdu_actor_partner_params);
622         memcpy(&lacpdu->partner.port_params, &port->partner,
623                         sizeof(struct port_params));
624         lacpdu->partner.state = port->partner_state;
625
626         /* Other fields */
627         lacpdu->tlv_type_collector_info = TLV_TYPE_COLLECTOR_INFORMATION;
628         lacpdu->collector_info_length = 0x10;
629         lacpdu->collector_max_delay = 0;
630
631         lacpdu->tlv_type_terminator = TLV_TYPE_TERMINATOR_INFORMATION;
632         lacpdu->terminator_length = 0;
633
634         MODE4_DEBUG("Sending LACP frame\n");
635         BOND_PRINT_LACP(lacpdu);
636
637         if (internals->mode4.dedicated_queues.enabled == 0) {
638                 int retval = rte_ring_enqueue(port->tx_ring, lacp_pkt);
639                 if (retval != 0) {
640                         /* If TX ring full, drop packet and free message.
641                            Retransmission will happen in next function call. */
642                         rte_pktmbuf_free(lacp_pkt);
643                         set_warning_flags(port, WRN_TX_QUEUE_FULL);
644                         return;
645                 }
646         } else {
647                 uint16_t pkts_sent = rte_eth_tx_burst(slave_id,
648                                 internals->mode4.dedicated_queues.tx_qid,
649                                 &lacp_pkt, 1);
650                 if (pkts_sent != 1) {
651                         rte_pktmbuf_free(lacp_pkt);
652                         set_warning_flags(port, WRN_TX_QUEUE_FULL);
653                         return;
654                 }
655         }
656
657
658         timer_set(&port->tx_machine_timer, internals->mode4.tx_period_timeout);
659         SM_FLAG_CLR(port, NTT);
660 }
661
662 static uint8_t
663 max_index(uint64_t *a, int n)
664 {
665         if (n <= 0)
666                 return -1;
667
668         int i, max_i = 0;
669         uint64_t max = a[0];
670
671         for (i = 1; i < n; ++i) {
672                 if (a[i] > max) {
673                         max = a[i];
674                         max_i = i;
675                 }
676         }
677
678         return max_i;
679 }
680
681 /**
682  * Function assigns port to aggregator.
683  *
684  * @param bond_dev_private      Pointer to bond_dev_private structure.
685  * @param port_pos                      Port to assign.
686  */
687 static void
688 selection_logic(struct bond_dev_private *internals, uint8_t slave_id)
689 {
690         struct port *agg, *port;
691         uint16_t slaves_count, new_agg_id, i, j = 0;
692         uint16_t *slaves;
693         uint64_t agg_bandwidth[8] = {0};
694         uint64_t agg_count[8] = {0};
695         uint16_t default_slave = 0;
696         uint8_t mode_count_id, mode_band_id;
697         struct rte_eth_link link_info;
698
699         slaves = internals->active_slaves;
700         slaves_count = internals->active_slave_count;
701         port = &mode_8023ad_ports[slave_id];
702
703         /* Search for aggregator suitable for this port */
704         for (i = 0; i < slaves_count; ++i) {
705                 agg = &mode_8023ad_ports[slaves[i]];
706                 /* Skip ports that are not aggreagators */
707                 if (agg->aggregator_port_id != slaves[i])
708                         continue;
709
710                 agg_count[agg->aggregator_port_id] += 1;
711                 rte_eth_link_get_nowait(slaves[i], &link_info);
712                 agg_bandwidth[agg->aggregator_port_id] += link_info.link_speed;
713
714                 /* Actors system ID is not checked since all slave device have the same
715                  * ID (MAC address). */
716                 if ((agg->actor.key == port->actor.key &&
717                         agg->partner.system_priority == port->partner.system_priority &&
718                         is_same_ether_addr(&agg->partner.system, &port->partner.system) == 1
719                         && (agg->partner.key == port->partner.key)) &&
720                         is_zero_ether_addr(&port->partner.system) != 1 &&
721                         (agg->actor.key &
722                                 rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY)) != 0) {
723
724                         if (j == 0)
725                                 default_slave = i;
726                         j++;
727                 }
728         }
729
730         switch (internals->mode4.agg_selection) {
731         case AGG_COUNT:
732                 mode_count_id = max_index(
733                                 (uint64_t *)agg_count, slaves_count);
734                 new_agg_id = mode_count_id;
735                 break;
736         case AGG_BANDWIDTH:
737                 mode_band_id = max_index(
738                                 (uint64_t *)agg_bandwidth, slaves_count);
739                 new_agg_id = mode_band_id;
740                 break;
741         case AGG_STABLE:
742                 if (default_slave == slaves_count)
743                         new_agg_id = slave_id;
744                 else
745                         new_agg_id = slaves[default_slave];
746                 break;
747         default:
748                 if (default_slave == slaves_count)
749                         new_agg_id = slave_id;
750                 else
751                         new_agg_id = slaves[default_slave];
752                 break;
753         }
754
755         if (new_agg_id != port->aggregator_port_id) {
756                 port->aggregator_port_id = new_agg_id;
757
758                 MODE4_DEBUG("-> SELECTED: ID=%3u\n"
759                         "\t%s aggregator ID=%3u\n",
760                         port->aggregator_port_id,
761                         port->aggregator_port_id == slave_id ?
762                                 "aggregator not found, using default" : "aggregator found",
763                         port->aggregator_port_id);
764         }
765
766         port->selected = SELECTED;
767 }
768
769 /* Function maps DPDK speed to bonding speed stored in key field */
770 static uint16_t
771 link_speed_key(uint16_t speed) {
772         uint16_t key_speed;
773
774         switch (speed) {
775         case ETH_SPEED_NUM_NONE:
776                 key_speed = 0x00;
777                 break;
778         case ETH_SPEED_NUM_10M:
779                 key_speed = BOND_LINK_SPEED_KEY_10M;
780                 break;
781         case ETH_SPEED_NUM_100M:
782                 key_speed = BOND_LINK_SPEED_KEY_100M;
783                 break;
784         case ETH_SPEED_NUM_1G:
785                 key_speed = BOND_LINK_SPEED_KEY_1000M;
786                 break;
787         case ETH_SPEED_NUM_10G:
788                 key_speed = BOND_LINK_SPEED_KEY_10G;
789                 break;
790         case ETH_SPEED_NUM_20G:
791                 key_speed = BOND_LINK_SPEED_KEY_20G;
792                 break;
793         case ETH_SPEED_NUM_40G:
794                 key_speed = BOND_LINK_SPEED_KEY_40G;
795                 break;
796         default:
797                 /* Unknown speed*/
798                 key_speed = 0xFFFF;
799         }
800
801         return key_speed;
802 }
803
804 static void
805 rx_machine_update(struct bond_dev_private *internals, uint8_t slave_id,
806                 struct rte_mbuf *lacp_pkt) {
807         struct lacpdu_header *lacp;
808
809         if (lacp_pkt != NULL) {
810                 lacp = rte_pktmbuf_mtod(lacp_pkt, struct lacpdu_header *);
811                 RTE_ASSERT(lacp->lacpdu.subtype == SLOW_SUBTYPE_LACP);
812
813                 /* This is LACP frame so pass it to rx_machine */
814                 rx_machine(internals, slave_id, &lacp->lacpdu);
815                 rte_pktmbuf_free(lacp_pkt);
816         } else
817                 rx_machine(internals, slave_id, NULL);
818 }
819
820 static void
821 bond_mode_8023ad_periodic_cb(void *arg)
822 {
823         struct rte_eth_dev *bond_dev = arg;
824         struct bond_dev_private *internals = bond_dev->data->dev_private;
825         struct port *port;
826         struct rte_eth_link link_info;
827         struct ether_addr slave_addr;
828         struct rte_mbuf *lacp_pkt = NULL;
829
830         uint8_t i, slave_id;
831
832
833         /* Update link status on each port */
834         for (i = 0; i < internals->active_slave_count; i++) {
835                 uint16_t key;
836
837                 slave_id = internals->active_slaves[i];
838                 rte_eth_link_get_nowait(slave_id, &link_info);
839                 rte_eth_macaddr_get(slave_id, &slave_addr);
840
841                 if (link_info.link_status != 0) {
842                         key = link_speed_key(link_info.link_speed) << 1;
843                         if (link_info.link_duplex == ETH_LINK_FULL_DUPLEX)
844                                 key |= BOND_LINK_FULL_DUPLEX_KEY;
845                 } else
846                         key = 0;
847
848                 port = &mode_8023ad_ports[slave_id];
849
850                 key = rte_cpu_to_be_16(key);
851                 if (key != port->actor.key) {
852                         if (!(key & rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY)))
853                                 set_warning_flags(port, WRN_NOT_LACP_CAPABLE);
854
855                         port->actor.key = key;
856                         SM_FLAG_SET(port, NTT);
857                 }
858
859                 if (!is_same_ether_addr(&port->actor.system, &slave_addr)) {
860                         ether_addr_copy(&slave_addr, &port->actor.system);
861                         if (port->aggregator_port_id == slave_id)
862                                 SM_FLAG_SET(port, NTT);
863                 }
864         }
865
866         for (i = 0; i < internals->active_slave_count; i++) {
867                 slave_id = internals->active_slaves[i];
868                 port = &mode_8023ad_ports[slave_id];
869
870                 if ((port->actor.key &
871                                 rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY)) == 0) {
872
873                         SM_FLAG_SET(port, BEGIN);
874
875                         /* LACP is disabled on half duples or link is down */
876                         if (SM_FLAG(port, LACP_ENABLED)) {
877                                 /* If port was enabled set it to BEGIN state */
878                                 SM_FLAG_CLR(port, LACP_ENABLED);
879                                 ACTOR_STATE_CLR(port, DISTRIBUTING);
880                                 ACTOR_STATE_CLR(port, COLLECTING);
881                         }
882
883                         /* Skip this port processing */
884                         continue;
885                 }
886
887                 SM_FLAG_SET(port, LACP_ENABLED);
888
889                 if (internals->mode4.dedicated_queues.enabled == 0) {
890                         /* Find LACP packet to this port. Do not check subtype,
891                          * it is done in function that queued packet
892                          */
893                         int retval = rte_ring_dequeue(port->rx_ring,
894                                         (void **)&lacp_pkt);
895
896                         if (retval != 0)
897                                 lacp_pkt = NULL;
898
899                         rx_machine_update(internals, slave_id, lacp_pkt);
900                 } else {
901                         uint16_t rx_count = rte_eth_rx_burst(slave_id,
902                                         internals->mode4.dedicated_queues.rx_qid,
903                                         &lacp_pkt, 1);
904
905                         if (rx_count == 1)
906                                 bond_mode_8023ad_handle_slow_pkt(internals,
907                                                 slave_id, lacp_pkt);
908                         else
909                                 rx_machine_update(internals, slave_id, NULL);
910                 }
911
912                 periodic_machine(internals, slave_id);
913                 mux_machine(internals, slave_id);
914                 tx_machine(internals, slave_id);
915                 selection_logic(internals, slave_id);
916
917                 SM_FLAG_CLR(port, BEGIN);
918                 show_warnings(slave_id);
919         }
920
921         rte_eal_alarm_set(internals->mode4.update_timeout_us,
922                         bond_mode_8023ad_periodic_cb, arg);
923 }
924
925 void
926 bond_mode_8023ad_activate_slave(struct rte_eth_dev *bond_dev,
927                                 uint16_t slave_id)
928 {
929         struct bond_dev_private *internals = bond_dev->data->dev_private;
930
931         struct port *port = &mode_8023ad_ports[slave_id];
932         struct port_params initial = {
933                         .system = { { 0 } },
934                         .system_priority = rte_cpu_to_be_16(0xFFFF),
935                         .key = rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY),
936                         .port_priority = rte_cpu_to_be_16(0x00FF),
937                         .port_number = 0,
938         };
939
940         char mem_name[RTE_ETH_NAME_MAX_LEN];
941         int socket_id;
942         unsigned element_size;
943         uint32_t total_tx_desc;
944         struct bond_tx_queue *bd_tx_q;
945         uint16_t q_id;
946
947         /* Given slave mus not be in active list */
948         RTE_ASSERT(find_slave_by_id(internals->active_slaves,
949         internals->active_slave_count, slave_id) == internals->active_slave_count);
950         RTE_SET_USED(internals); /* used only for assert when enabled */
951
952         memcpy(&port->actor, &initial, sizeof(struct port_params));
953         /* Standard requires that port ID must be grater than 0.
954          * Add 1 do get corresponding port_number */
955         port->actor.port_number = rte_cpu_to_be_16(slave_id + 1);
956
957         memcpy(&port->partner, &initial, sizeof(struct port_params));
958
959         /* default states */
960         port->actor_state = STATE_AGGREGATION | STATE_LACP_ACTIVE | STATE_DEFAULTED;
961         port->partner_state = STATE_LACP_ACTIVE | STATE_AGGREGATION;
962         port->sm_flags = SM_FLAGS_BEGIN;
963
964         /* use this port as agregator */
965         port->aggregator_port_id = slave_id;
966         rte_eth_promiscuous_enable(slave_id);
967
968         timer_cancel(&port->warning_timer);
969
970         if (port->mbuf_pool != NULL)
971                 return;
972
973         RTE_ASSERT(port->rx_ring == NULL);
974         RTE_ASSERT(port->tx_ring == NULL);
975
976         socket_id = rte_eth_dev_socket_id(slave_id);
977         if (socket_id == (int)LCORE_ID_ANY)
978                 socket_id = rte_socket_id();
979
980         element_size = sizeof(struct slow_protocol_frame) +
981                                 RTE_PKTMBUF_HEADROOM;
982
983         /* The size of the mempool should be at least:
984          * the sum of the TX descriptors + BOND_MODE_8023AX_SLAVE_TX_PKTS */
985         total_tx_desc = BOND_MODE_8023AX_SLAVE_TX_PKTS;
986         for (q_id = 0; q_id < bond_dev->data->nb_tx_queues; q_id++) {
987                 bd_tx_q = (struct bond_tx_queue*)bond_dev->data->tx_queues[q_id];
988                 total_tx_desc += bd_tx_q->nb_tx_desc;
989         }
990
991         snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_pool", slave_id);
992         port->mbuf_pool = rte_pktmbuf_pool_create(mem_name, total_tx_desc,
993                 RTE_MEMPOOL_CACHE_MAX_SIZE >= 32 ?
994                         32 : RTE_MEMPOOL_CACHE_MAX_SIZE,
995                 0, element_size, socket_id);
996
997         /* Any memory allocation failure in initialization is critical because
998          * resources can't be free, so reinitialization is impossible. */
999         if (port->mbuf_pool == NULL) {
1000                 rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1001                         slave_id, mem_name, rte_strerror(rte_errno));
1002         }
1003
1004         snprintf(mem_name, RTE_DIM(mem_name), "slave_%u_rx", slave_id);
1005         port->rx_ring = rte_ring_create(mem_name,
1006                         rte_align32pow2(BOND_MODE_8023AX_SLAVE_RX_PKTS), socket_id, 0);
1007
1008         if (port->rx_ring == NULL) {
1009                 rte_panic("Slave %u: Failed to create rx ring '%s': %s\n", slave_id,
1010                         mem_name, rte_strerror(rte_errno));
1011         }
1012
1013         /* TX ring is at least one pkt longer to make room for marker packet. */
1014         snprintf(mem_name, RTE_DIM(mem_name), "slave_%u_tx", slave_id);
1015         port->tx_ring = rte_ring_create(mem_name,
1016                         rte_align32pow2(BOND_MODE_8023AX_SLAVE_TX_PKTS + 1), socket_id, 0);
1017
1018         if (port->tx_ring == NULL) {
1019                 rte_panic("Slave %u: Failed to create tx ring '%s': %s\n", slave_id,
1020                         mem_name, rte_strerror(rte_errno));
1021         }
1022 }
1023
1024 int
1025 bond_mode_8023ad_deactivate_slave(struct rte_eth_dev *bond_dev __rte_unused,
1026                 uint16_t slave_id)
1027 {
1028         void *pkt = NULL;
1029         struct port *port = NULL;
1030         uint8_t old_partner_state;
1031
1032         port = &mode_8023ad_ports[slave_id];
1033
1034         ACTOR_STATE_CLR(port, AGGREGATION);
1035         port->selected = UNSELECTED;
1036
1037         old_partner_state = port->partner_state;
1038         record_default(port);
1039
1040         /* If partner timeout state changes then disable timer */
1041         if (!((old_partner_state ^ port->partner_state) &
1042                         STATE_LACP_SHORT_TIMEOUT))
1043                 timer_cancel(&port->current_while_timer);
1044
1045         PARTNER_STATE_CLR(port, AGGREGATION);
1046         ACTOR_STATE_CLR(port, EXPIRED);
1047
1048         /* flush rx/tx rings */
1049         while (rte_ring_dequeue(port->rx_ring, &pkt) == 0)
1050                 rte_pktmbuf_free((struct rte_mbuf *)pkt);
1051
1052         while (rte_ring_dequeue(port->tx_ring, &pkt) == 0)
1053                         rte_pktmbuf_free((struct rte_mbuf *)pkt);
1054         return 0;
1055 }
1056
1057 void
1058 bond_mode_8023ad_mac_address_update(struct rte_eth_dev *bond_dev)
1059 {
1060         struct bond_dev_private *internals = bond_dev->data->dev_private;
1061         struct ether_addr slave_addr;
1062         struct port *slave, *agg_slave;
1063         uint16_t slave_id, i, j;
1064
1065         bond_mode_8023ad_stop(bond_dev);
1066
1067         for (i = 0; i < internals->active_slave_count; i++) {
1068                 slave_id = internals->active_slaves[i];
1069                 slave = &mode_8023ad_ports[slave_id];
1070                 rte_eth_macaddr_get(slave_id, &slave_addr);
1071
1072                 if (is_same_ether_addr(&slave_addr, &slave->actor.system))
1073                         continue;
1074
1075                 ether_addr_copy(&slave_addr, &slave->actor.system);
1076                 /* Do nothing if this port is not an aggregator. In other case
1077                  * Set NTT flag on every port that use this aggregator. */
1078                 if (slave->aggregator_port_id != slave_id)
1079                         continue;
1080
1081                 for (j = 0; j < internals->active_slave_count; j++) {
1082                         agg_slave = &mode_8023ad_ports[internals->active_slaves[j]];
1083                         if (agg_slave->aggregator_port_id == slave_id)
1084                                 SM_FLAG_SET(agg_slave, NTT);
1085                 }
1086         }
1087
1088         if (bond_dev->data->dev_started)
1089                 bond_mode_8023ad_start(bond_dev);
1090 }
1091
1092 static void
1093 bond_mode_8023ad_conf_get(struct rte_eth_dev *dev,
1094                 struct rte_eth_bond_8023ad_conf *conf)
1095 {
1096         struct bond_dev_private *internals = dev->data->dev_private;
1097         struct mode8023ad_private *mode4 = &internals->mode4;
1098         uint64_t ms_ticks = rte_get_tsc_hz() / 1000;
1099
1100         conf->fast_periodic_ms = mode4->fast_periodic_timeout / ms_ticks;
1101         conf->slow_periodic_ms = mode4->slow_periodic_timeout / ms_ticks;
1102         conf->short_timeout_ms = mode4->short_timeout / ms_ticks;
1103         conf->long_timeout_ms = mode4->long_timeout / ms_ticks;
1104         conf->aggregate_wait_timeout_ms = mode4->aggregate_wait_timeout / ms_ticks;
1105         conf->tx_period_ms = mode4->tx_period_timeout / ms_ticks;
1106         conf->update_timeout_ms = mode4->update_timeout_us / 1000;
1107         conf->rx_marker_period_ms = mode4->rx_marker_timeout / ms_ticks;
1108         conf->slowrx_cb = mode4->slowrx_cb;
1109         conf->agg_selection = mode4->agg_selection;
1110 }
1111
1112 static void
1113 bond_mode_8023ad_conf_get_default(struct rte_eth_bond_8023ad_conf *conf)
1114 {
1115         conf->fast_periodic_ms = BOND_8023AD_FAST_PERIODIC_MS;
1116         conf->slow_periodic_ms = BOND_8023AD_SLOW_PERIODIC_MS;
1117         conf->short_timeout_ms = BOND_8023AD_SHORT_TIMEOUT_MS;
1118         conf->long_timeout_ms = BOND_8023AD_LONG_TIMEOUT_MS;
1119         conf->aggregate_wait_timeout_ms = BOND_8023AD_AGGREGATE_WAIT_TIMEOUT_MS;
1120         conf->tx_period_ms = BOND_8023AD_TX_MACHINE_PERIOD_MS;
1121         conf->rx_marker_period_ms = BOND_8023AD_RX_MARKER_PERIOD_MS;
1122         conf->update_timeout_ms = BOND_MODE_8023AX_UPDATE_TIMEOUT_MS;
1123         conf->slowrx_cb = NULL;
1124         conf->agg_selection = AGG_STABLE;
1125 }
1126
1127 static void
1128 bond_mode_8023ad_conf_assign(struct mode8023ad_private *mode4,
1129                 struct rte_eth_bond_8023ad_conf *conf)
1130 {
1131         uint64_t ms_ticks = rte_get_tsc_hz() / 1000;
1132
1133         mode4->fast_periodic_timeout = conf->fast_periodic_ms * ms_ticks;
1134         mode4->slow_periodic_timeout = conf->slow_periodic_ms * ms_ticks;
1135         mode4->short_timeout = conf->short_timeout_ms * ms_ticks;
1136         mode4->long_timeout = conf->long_timeout_ms * ms_ticks;
1137         mode4->aggregate_wait_timeout = conf->aggregate_wait_timeout_ms * ms_ticks;
1138         mode4->tx_period_timeout = conf->tx_period_ms * ms_ticks;
1139         mode4->rx_marker_timeout = conf->rx_marker_period_ms * ms_ticks;
1140         mode4->update_timeout_us = conf->update_timeout_ms * 1000;
1141
1142         mode4->dedicated_queues.enabled = 0;
1143         mode4->dedicated_queues.rx_qid = UINT16_MAX;
1144         mode4->dedicated_queues.tx_qid = UINT16_MAX;
1145 }
1146
1147 void
1148 bond_mode_8023ad_setup(struct rte_eth_dev *dev,
1149                 struct rte_eth_bond_8023ad_conf *conf)
1150 {
1151         struct rte_eth_bond_8023ad_conf def_conf;
1152         struct bond_dev_private *internals = dev->data->dev_private;
1153         struct mode8023ad_private *mode4 = &internals->mode4;
1154
1155         if (conf == NULL) {
1156                 conf = &def_conf;
1157                 bond_mode_8023ad_conf_get_default(conf);
1158         }
1159
1160         bond_mode_8023ad_stop(dev);
1161         bond_mode_8023ad_conf_assign(mode4, conf);
1162         mode4->slowrx_cb = conf->slowrx_cb;
1163         mode4->agg_selection = AGG_STABLE;
1164
1165         if (dev->data->dev_started)
1166                 bond_mode_8023ad_start(dev);
1167 }
1168
1169 int
1170 bond_mode_8023ad_enable(struct rte_eth_dev *bond_dev)
1171 {
1172         struct bond_dev_private *internals = bond_dev->data->dev_private;
1173         uint8_t i;
1174
1175         for (i = 0; i < internals->active_slave_count; i++)
1176                 bond_mode_8023ad_activate_slave(bond_dev,
1177                                 internals->active_slaves[i]);
1178
1179         return 0;
1180 }
1181
1182 int
1183 bond_mode_8023ad_start(struct rte_eth_dev *bond_dev)
1184 {
1185         struct bond_dev_private *internals = bond_dev->data->dev_private;
1186         struct mode8023ad_private *mode4 = &internals->mode4;
1187         static const uint64_t us = BOND_MODE_8023AX_UPDATE_TIMEOUT_MS * 1000;
1188
1189         if (mode4->slowrx_cb)
1190                 return rte_eal_alarm_set(us, &bond_mode_8023ad_ext_periodic_cb,
1191                                          bond_dev);
1192
1193         return rte_eal_alarm_set(us, &bond_mode_8023ad_periodic_cb, bond_dev);
1194 }
1195
1196 void
1197 bond_mode_8023ad_stop(struct rte_eth_dev *bond_dev)
1198 {
1199         struct bond_dev_private *internals = bond_dev->data->dev_private;
1200         struct mode8023ad_private *mode4 = &internals->mode4;
1201
1202         if (mode4->slowrx_cb) {
1203                 rte_eal_alarm_cancel(&bond_mode_8023ad_ext_periodic_cb,
1204                                      bond_dev);
1205                 return;
1206         }
1207         rte_eal_alarm_cancel(&bond_mode_8023ad_periodic_cb, bond_dev);
1208 }
1209
1210 void
1211 bond_mode_8023ad_handle_slow_pkt(struct bond_dev_private *internals,
1212                                   uint16_t slave_id, struct rte_mbuf *pkt)
1213 {
1214         struct mode8023ad_private *mode4 = &internals->mode4;
1215         struct port *port = &mode_8023ad_ports[slave_id];
1216         struct marker_header *m_hdr;
1217         uint64_t marker_timer, old_marker_timer;
1218         int retval;
1219         uint8_t wrn, subtype;
1220         /* If packet is a marker, we send response now by reusing given packet
1221          * and update only source MAC, destination MAC is multicast so don't
1222          * update it. Other frames will be handled later by state machines */
1223         subtype = rte_pktmbuf_mtod(pkt,
1224                         struct slow_protocol_frame *)->slow_protocol.subtype;
1225
1226         if (subtype == SLOW_SUBTYPE_MARKER) {
1227                 m_hdr = rte_pktmbuf_mtod(pkt, struct marker_header *);
1228
1229                 if (likely(m_hdr->marker.tlv_type_marker != MARKER_TLV_TYPE_INFO)) {
1230                         wrn = WRN_UNKNOWN_MARKER_TYPE;
1231                         goto free_out;
1232                 }
1233
1234                 /* Setup marker timer. Do it in loop in case concurrent access. */
1235                 do {
1236                         old_marker_timer = port->rx_marker_timer;
1237                         if (!timer_is_expired(&old_marker_timer)) {
1238                                 wrn = WRN_RX_MARKER_TO_FAST;
1239                                 goto free_out;
1240                         }
1241
1242                         timer_set(&marker_timer, mode4->rx_marker_timeout);
1243                         retval = rte_atomic64_cmpset(&port->rx_marker_timer,
1244                                 old_marker_timer, marker_timer);
1245                 } while (unlikely(retval == 0));
1246
1247                 m_hdr->marker.tlv_type_marker = MARKER_TLV_TYPE_RESP;
1248                 rte_eth_macaddr_get(slave_id, &m_hdr->eth_hdr.s_addr);
1249
1250                 if (internals->mode4.dedicated_queues.enabled == 0) {
1251                         int retval = rte_ring_enqueue(port->tx_ring, pkt);
1252                         if (retval != 0) {
1253                                 /* reset timer */
1254                                 port->rx_marker_timer = 0;
1255                                 wrn = WRN_TX_QUEUE_FULL;
1256                                 goto free_out;
1257                         }
1258                 } else {
1259                         /* Send packet directly to the slow queue */
1260                         uint16_t tx_count = rte_eth_tx_burst(slave_id,
1261                                         internals->mode4.dedicated_queues.tx_qid,
1262                                         &pkt, 1);
1263                         if (tx_count != 1) {
1264                                 /* reset timer */
1265                                 port->rx_marker_timer = 0;
1266                                 wrn = WRN_TX_QUEUE_FULL;
1267                                 goto free_out;
1268                         }
1269                 }
1270         } else if (likely(subtype == SLOW_SUBTYPE_LACP)) {
1271                 if (internals->mode4.dedicated_queues.enabled == 0) {
1272                         int retval = rte_ring_enqueue(port->rx_ring, pkt);
1273                         if (retval != 0) {
1274                                 /* If RX fing full free lacpdu message and drop packet */
1275                                 wrn = WRN_RX_QUEUE_FULL;
1276                                 goto free_out;
1277                         }
1278                 } else
1279                         rx_machine_update(internals, slave_id, pkt);
1280         } else {
1281                 wrn = WRN_UNKNOWN_SLOW_TYPE;
1282                 goto free_out;
1283         }
1284
1285         return;
1286
1287 free_out:
1288         set_warning_flags(port, wrn);
1289         rte_pktmbuf_free(pkt);
1290 }
1291
1292 int
1293 rte_eth_bond_8023ad_conf_get(uint16_t port_id,
1294                 struct rte_eth_bond_8023ad_conf *conf)
1295 {
1296         struct rte_eth_dev *bond_dev;
1297
1298         if (valid_bonded_port_id(port_id) != 0)
1299                 return -EINVAL;
1300
1301         if (conf == NULL)
1302                 return -EINVAL;
1303
1304         bond_dev = &rte_eth_devices[port_id];
1305         bond_mode_8023ad_conf_get(bond_dev, conf);
1306         return 0;
1307 }
1308
1309 int
1310 rte_eth_bond_8023ad_agg_selection_set(uint16_t port_id,
1311                 enum rte_bond_8023ad_agg_selection agg_selection)
1312 {
1313         struct rte_eth_dev *bond_dev;
1314         struct bond_dev_private *internals;
1315         struct mode8023ad_private *mode4;
1316
1317         bond_dev = &rte_eth_devices[port_id];
1318         internals = bond_dev->data->dev_private;
1319
1320         if (valid_bonded_port_id(port_id) != 0)
1321                 return -EINVAL;
1322         if (internals->mode != 4)
1323                 return -EINVAL;
1324
1325         mode4 = &internals->mode4;
1326         if (agg_selection == AGG_COUNT || agg_selection == AGG_BANDWIDTH
1327                         || agg_selection == AGG_STABLE)
1328                 mode4->agg_selection = agg_selection;
1329         return 0;
1330 }
1331
1332 int rte_eth_bond_8023ad_agg_selection_get(uint16_t port_id)
1333 {
1334         struct rte_eth_dev *bond_dev;
1335         struct bond_dev_private *internals;
1336         struct mode8023ad_private *mode4;
1337
1338         bond_dev = &rte_eth_devices[port_id];
1339         internals = bond_dev->data->dev_private;
1340
1341         if (valid_bonded_port_id(port_id) != 0)
1342                 return -EINVAL;
1343         if (internals->mode != 4)
1344                 return -EINVAL;
1345         mode4 = &internals->mode4;
1346
1347         return mode4->agg_selection;
1348 }
1349
1350
1351
1352 static int
1353 bond_8023ad_setup_validate(uint16_t port_id,
1354                 struct rte_eth_bond_8023ad_conf *conf)
1355 {
1356         if (valid_bonded_port_id(port_id) != 0)
1357                 return -EINVAL;
1358
1359         if (conf != NULL) {
1360                 /* Basic sanity check */
1361                 if (conf->slow_periodic_ms == 0 ||
1362                                 conf->fast_periodic_ms >= conf->slow_periodic_ms ||
1363                                 conf->long_timeout_ms == 0 ||
1364                                 conf->short_timeout_ms >= conf->long_timeout_ms ||
1365                                 conf->aggregate_wait_timeout_ms == 0 ||
1366                                 conf->tx_period_ms == 0 ||
1367                                 conf->rx_marker_period_ms == 0 ||
1368                                 conf->update_timeout_ms == 0) {
1369                         RTE_LOG(ERR, PMD, "given mode 4 configuration is invalid\n");
1370                         return -EINVAL;
1371                 }
1372         }
1373
1374         return 0;
1375 }
1376
1377
1378 int
1379 rte_eth_bond_8023ad_setup(uint16_t port_id,
1380                 struct rte_eth_bond_8023ad_conf *conf)
1381 {
1382         struct rte_eth_dev *bond_dev;
1383         int err;
1384
1385         err = bond_8023ad_setup_validate(port_id, conf);
1386         if (err != 0)
1387                 return err;
1388
1389         bond_dev = &rte_eth_devices[port_id];
1390         bond_mode_8023ad_setup(bond_dev, conf);
1391
1392         return 0;
1393 }
1394
1395
1396
1397
1398
1399 int
1400 rte_eth_bond_8023ad_slave_info(uint16_t port_id, uint16_t slave_id,
1401                 struct rte_eth_bond_8023ad_slave_info *info)
1402 {
1403         struct rte_eth_dev *bond_dev;
1404         struct bond_dev_private *internals;
1405         struct port *port;
1406
1407         if (info == NULL || valid_bonded_port_id(port_id) != 0 ||
1408                         rte_eth_bond_mode_get(port_id) != BONDING_MODE_8023AD)
1409                 return -EINVAL;
1410
1411         bond_dev = &rte_eth_devices[port_id];
1412
1413         internals = bond_dev->data->dev_private;
1414         if (find_slave_by_id(internals->active_slaves,
1415                         internals->active_slave_count, slave_id) ==
1416                                 internals->active_slave_count)
1417                 return -EINVAL;
1418
1419         port = &mode_8023ad_ports[slave_id];
1420         info->selected = port->selected;
1421
1422         info->actor_state = port->actor_state;
1423         rte_memcpy(&info->actor, &port->actor, sizeof(port->actor));
1424
1425         info->partner_state = port->partner_state;
1426         rte_memcpy(&info->partner, &port->partner, sizeof(port->partner));
1427
1428         info->agg_port_id = port->aggregator_port_id;
1429         return 0;
1430 }
1431
1432 static int
1433 bond_8023ad_ext_validate(uint16_t port_id, uint16_t slave_id)
1434 {
1435         struct rte_eth_dev *bond_dev;
1436         struct bond_dev_private *internals;
1437         struct mode8023ad_private *mode4;
1438
1439         if (rte_eth_bond_mode_get(port_id) != BONDING_MODE_8023AD)
1440                 return -EINVAL;
1441
1442         bond_dev = &rte_eth_devices[port_id];
1443
1444         if (!bond_dev->data->dev_started)
1445                 return -EINVAL;
1446
1447         internals = bond_dev->data->dev_private;
1448         if (find_slave_by_id(internals->active_slaves,
1449                         internals->active_slave_count, slave_id) ==
1450                                 internals->active_slave_count)
1451                 return -EINVAL;
1452
1453         mode4 = &internals->mode4;
1454         if (mode4->slowrx_cb == NULL)
1455                 return -EINVAL;
1456
1457         return 0;
1458 }
1459
1460 int
1461 rte_eth_bond_8023ad_ext_collect(uint16_t port_id, uint16_t slave_id,
1462                                 int enabled)
1463 {
1464         struct port *port;
1465         int res;
1466
1467         res = bond_8023ad_ext_validate(port_id, slave_id);
1468         if (res != 0)
1469                 return res;
1470
1471         port = &mode_8023ad_ports[slave_id];
1472
1473         if (enabled)
1474                 ACTOR_STATE_SET(port, COLLECTING);
1475         else
1476                 ACTOR_STATE_CLR(port, COLLECTING);
1477
1478         return 0;
1479 }
1480
1481 int
1482 rte_eth_bond_8023ad_ext_distrib(uint16_t port_id, uint16_t slave_id,
1483                                 int enabled)
1484 {
1485         struct port *port;
1486         int res;
1487
1488         res = bond_8023ad_ext_validate(port_id, slave_id);
1489         if (res != 0)
1490                 return res;
1491
1492         port = &mode_8023ad_ports[slave_id];
1493
1494         if (enabled)
1495                 ACTOR_STATE_SET(port, DISTRIBUTING);
1496         else
1497                 ACTOR_STATE_CLR(port, DISTRIBUTING);
1498
1499         return 0;
1500 }
1501
1502 int
1503 rte_eth_bond_8023ad_ext_distrib_get(uint16_t port_id, uint16_t slave_id)
1504 {
1505         struct port *port;
1506         int err;
1507
1508         err = bond_8023ad_ext_validate(port_id, slave_id);
1509         if (err != 0)
1510                 return err;
1511
1512         port = &mode_8023ad_ports[slave_id];
1513         return ACTOR_STATE(port, DISTRIBUTING);
1514 }
1515
1516 int
1517 rte_eth_bond_8023ad_ext_collect_get(uint16_t port_id, uint16_t slave_id)
1518 {
1519         struct port *port;
1520         int err;
1521
1522         err = bond_8023ad_ext_validate(port_id, slave_id);
1523         if (err != 0)
1524                 return err;
1525
1526         port = &mode_8023ad_ports[slave_id];
1527         return ACTOR_STATE(port, COLLECTING);
1528 }
1529
1530 int
1531 rte_eth_bond_8023ad_ext_slowtx(uint16_t port_id, uint16_t slave_id,
1532                 struct rte_mbuf *lacp_pkt)
1533 {
1534         struct port *port;
1535         int res;
1536
1537         res = bond_8023ad_ext_validate(port_id, slave_id);
1538         if (res != 0)
1539                 return res;
1540
1541         port = &mode_8023ad_ports[slave_id];
1542
1543         if (rte_pktmbuf_pkt_len(lacp_pkt) < sizeof(struct lacpdu_header))
1544                 return -EINVAL;
1545
1546         struct lacpdu_header *lacp;
1547
1548         /* only enqueue LACPDUs */
1549         lacp = rte_pktmbuf_mtod(lacp_pkt, struct lacpdu_header *);
1550         if (lacp->lacpdu.subtype != SLOW_SUBTYPE_LACP)
1551                 return -EINVAL;
1552
1553         MODE4_DEBUG("sending LACP frame\n");
1554
1555         return rte_ring_enqueue(port->tx_ring, lacp_pkt);
1556 }
1557
1558 static void
1559 bond_mode_8023ad_ext_periodic_cb(void *arg)
1560 {
1561         struct rte_eth_dev *bond_dev = arg;
1562         struct bond_dev_private *internals = bond_dev->data->dev_private;
1563         struct mode8023ad_private *mode4 = &internals->mode4;
1564         struct port *port;
1565         void *pkt = NULL;
1566         uint16_t i, slave_id;
1567
1568         for (i = 0; i < internals->active_slave_count; i++) {
1569                 slave_id = internals->active_slaves[i];
1570                 port = &mode_8023ad_ports[slave_id];
1571
1572                 if (rte_ring_dequeue(port->rx_ring, &pkt) == 0) {
1573                         struct rte_mbuf *lacp_pkt = pkt;
1574                         struct lacpdu_header *lacp;
1575
1576                         lacp = rte_pktmbuf_mtod(lacp_pkt,
1577                                                 struct lacpdu_header *);
1578                         RTE_VERIFY(lacp->lacpdu.subtype == SLOW_SUBTYPE_LACP);
1579
1580                         /* This is LACP frame so pass it to rx callback.
1581                          * Callback is responsible for freeing mbuf.
1582                          */
1583                         mode4->slowrx_cb(slave_id, lacp_pkt);
1584                 }
1585         }
1586
1587         rte_eal_alarm_set(internals->mode4.update_timeout_us,
1588                         bond_mode_8023ad_ext_periodic_cb, arg);
1589 }
1590
1591 int
1592 rte_eth_bond_8023ad_dedicated_queues_enable(uint16_t port)
1593 {
1594         int retval = 0;
1595         struct rte_eth_dev *dev = &rte_eth_devices[port];
1596         struct bond_dev_private *internals = (struct bond_dev_private *)
1597                 dev->data->dev_private;
1598
1599         if (check_for_bonded_ethdev(dev) != 0)
1600                 return -1;
1601
1602         if (bond_8023ad_slow_pkt_hw_filter_supported(port) != 0)
1603                 return -1;
1604
1605         /* Device must be stopped to set up slow queue */
1606         if (dev->data->dev_started)
1607                 return -1;
1608
1609         internals->mode4.dedicated_queues.enabled = 1;
1610
1611         bond_ethdev_mode_set(dev, internals->mode);
1612         return retval;
1613 }
1614
1615 int
1616 rte_eth_bond_8023ad_dedicated_queues_disable(uint16_t port)
1617 {
1618         int retval = 0;
1619         struct rte_eth_dev *dev = &rte_eth_devices[port];
1620         struct bond_dev_private *internals = (struct bond_dev_private *)
1621                 dev->data->dev_private;
1622
1623         if (check_for_bonded_ethdev(dev) != 0)
1624                 return -1;
1625
1626         /* Device must be stopped to set up slow queue */
1627         if (dev->data->dev_started)
1628                 return -1;
1629
1630         internals->mode4.dedicated_queues.enabled = 0;
1631
1632         bond_ethdev_mode_set(dev, internals->mode);
1633
1634         return retval;
1635 }