New upstream version 17.11-rc3
[deb_dpdk.git] / drivers / net / bonding / rte_eth_bond_8023ad.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <stddef.h>
35 #include <string.h>
36 #include <stdbool.h>
37
38 #include <rte_alarm.h>
39 #include <rte_malloc.h>
40 #include <rte_errno.h>
41 #include <rte_cycles.h>
42 #include <rte_compat.h>
43
44 #include "rte_eth_bond_private.h"
45
46 static void bond_mode_8023ad_ext_periodic_cb(void *arg);
47 #ifdef RTE_LIBRTE_BOND_DEBUG_8023AD
48 #define MODE4_DEBUG(fmt, ...) RTE_LOG(DEBUG, PMD, "%6u [Port %u: %s] " fmt, \
49                         bond_dbg_get_time_diff_ms(), slave_id, \
50                         __func__, ##__VA_ARGS__)
51
52 static uint64_t start_time;
53
54 static unsigned
55 bond_dbg_get_time_diff_ms(void)
56 {
57         uint64_t now;
58
59         now = rte_rdtsc();
60         if (start_time == 0)
61                 start_time = now;
62
63         return ((now - start_time) * 1000) / rte_get_tsc_hz();
64 }
65
66 static void
67 bond_print_lacp(struct lacpdu *l)
68 {
69         char a_address[18];
70         char p_address[18];
71         char a_state[256] = { 0 };
72         char p_state[256] = { 0 };
73
74         static const char * const state_labels[] = {
75                 "ACT", "TIMEOUT", "AGG", "SYNC", "COL", "DIST", "DEF", "EXP"
76         };
77
78         int a_len = 0;
79         int p_len = 0;
80         uint8_t i;
81         uint8_t *addr;
82
83         addr = l->actor.port_params.system.addr_bytes;
84         snprintf(a_address, sizeof(a_address), "%02X:%02X:%02X:%02X:%02X:%02X",
85                 addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]);
86
87         addr = l->partner.port_params.system.addr_bytes;
88         snprintf(p_address, sizeof(p_address), "%02X:%02X:%02X:%02X:%02X:%02X",
89                 addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]);
90
91         for (i = 0; i < 8; i++) {
92                 if ((l->actor.state >> i) & 1) {
93                         a_len += snprintf(&a_state[a_len], RTE_DIM(a_state) - a_len, "%s ",
94                                 state_labels[i]);
95                 }
96
97                 if ((l->partner.state >> i) & 1) {
98                         p_len += snprintf(&p_state[p_len], RTE_DIM(p_state) - p_len, "%s ",
99                                 state_labels[i]);
100                 }
101         }
102
103         if (a_len && a_state[a_len-1] == ' ')
104                 a_state[a_len-1] = '\0';
105
106         if (p_len && p_state[p_len-1] == ' ')
107                 p_state[p_len-1] = '\0';
108
109         RTE_LOG(DEBUG, PMD, "LACP: {\n"\
110                         "  subtype= %02X\n"\
111                         "  ver_num=%02X\n"\
112                         "  actor={ tlv=%02X, len=%02X\n"\
113                         "    pri=%04X, system=%s, key=%04X, p_pri=%04X p_num=%04X\n"\
114                         "       state={ %s }\n"\
115                         "  }\n"\
116                         "  partner={ tlv=%02X, len=%02X\n"\
117                         "    pri=%04X, system=%s, key=%04X, p_pri=%04X p_num=%04X\n"\
118                         "       state={ %s }\n"\
119                         "  }\n"\
120                         "  collector={info=%02X, length=%02X, max_delay=%04X\n, " \
121                                                         "type_term=%02X, terminator_length = %02X}\n",\
122                         l->subtype,\
123                         l->version_number,\
124                         l->actor.tlv_type_info,\
125                         l->actor.info_length,\
126                         l->actor.port_params.system_priority,\
127                         a_address,\
128                         l->actor.port_params.key,\
129                         l->actor.port_params.port_priority,\
130                         l->actor.port_params.port_number,\
131                         a_state,\
132                         l->partner.tlv_type_info,\
133                         l->partner.info_length,\
134                         l->partner.port_params.system_priority,\
135                         p_address,\
136                         l->partner.port_params.key,\
137                         l->partner.port_params.port_priority,\
138                         l->partner.port_params.port_number,\
139                         p_state,\
140                         l->tlv_type_collector_info,\
141                         l->collector_info_length,\
142                         l->collector_max_delay,\
143                         l->tlv_type_terminator,\
144                         l->terminator_length);
145
146 }
147 #define BOND_PRINT_LACP(lacpdu) bond_print_lacp(lacpdu)
148 #else
149 #define BOND_PRINT_LACP(lacpdu) do { } while (0)
150 #define MODE4_DEBUG(fmt, ...) do { } while (0)
151 #endif
152
153 static const struct ether_addr lacp_mac_addr = {
154         .addr_bytes = { 0x01, 0x80, 0xC2, 0x00, 0x00, 0x02 }
155 };
156
157 struct port mode_8023ad_ports[RTE_MAX_ETHPORTS];
158
159 static void
160 timer_cancel(uint64_t *timer)
161 {
162         *timer = 0;
163 }
164
165 static void
166 timer_set(uint64_t *timer, uint64_t timeout)
167 {
168         *timer = rte_rdtsc() + timeout;
169 }
170
171 /* Forces given timer to be in expired state. */
172 static void
173 timer_force_expired(uint64_t *timer)
174 {
175         *timer = rte_rdtsc();
176 }
177
178 static bool
179 timer_is_stopped(uint64_t *timer)
180 {
181         return *timer == 0;
182 }
183
184 static bool
185 timer_is_expired(uint64_t *timer)
186 {
187         return *timer < rte_rdtsc();
188 }
189
190 /* Timer is in running state if it is not stopped nor expired */
191 static bool
192 timer_is_running(uint64_t *timer)
193 {
194         return !timer_is_stopped(timer) && !timer_is_expired(timer);
195 }
196
197 static void
198 set_warning_flags(struct port *port, uint16_t flags)
199 {
200         int retval;
201         uint16_t old;
202         uint16_t new_flag = 0;
203
204         do {
205                 old = port->warnings_to_show;
206                 new_flag = old | flags;
207                 retval = rte_atomic16_cmpset(&port->warnings_to_show, old, new_flag);
208         } while (unlikely(retval == 0));
209 }
210
211 static void
212 show_warnings(uint16_t slave_id)
213 {
214         struct port *port = &mode_8023ad_ports[slave_id];
215         uint8_t warnings;
216
217         do {
218                 warnings = port->warnings_to_show;
219         } while (rte_atomic16_cmpset(&port->warnings_to_show, warnings, 0) == 0);
220
221         if (!warnings)
222                 return;
223
224         if (!timer_is_expired(&port->warning_timer))
225                 return;
226
227
228         timer_set(&port->warning_timer, BOND_8023AD_WARNINGS_PERIOD_MS *
229                         rte_get_tsc_hz() / 1000);
230
231         if (warnings & WRN_RX_QUEUE_FULL) {
232                 RTE_LOG(DEBUG, PMD,
233                         "Slave %u: failed to enqueue LACP packet into RX ring.\n"
234                         "Receive and transmit functions must be invoked on bonded\n"
235                         "interface at least 10 times per second or LACP will not\n"
236                         "work correctly\n", slave_id);
237         }
238
239         if (warnings & WRN_TX_QUEUE_FULL) {
240                 RTE_LOG(DEBUG, PMD,
241                         "Slave %u: failed to enqueue LACP packet into TX ring.\n"
242                         "Receive and transmit functions must be invoked on bonded\n"
243                         "interface at least 10 times per second or LACP will not\n"
244                         "work correctly\n", slave_id);
245         }
246
247         if (warnings & WRN_RX_MARKER_TO_FAST)
248                 RTE_LOG(INFO, PMD, "Slave %u: marker to early - ignoring.\n", slave_id);
249
250         if (warnings & WRN_UNKNOWN_SLOW_TYPE) {
251                 RTE_LOG(INFO, PMD,
252                         "Slave %u: ignoring unknown slow protocol frame type", slave_id);
253         }
254
255         if (warnings & WRN_UNKNOWN_MARKER_TYPE)
256                 RTE_LOG(INFO, PMD, "Slave %u: ignoring unknown marker type", slave_id);
257
258         if (warnings & WRN_NOT_LACP_CAPABLE)
259                 MODE4_DEBUG("Port %u is not LACP capable!\n", slave_id);
260 }
261
262 static void
263 record_default(struct port *port)
264 {
265         /* Record default parameters for partner. Partner admin parameters
266          * are not implemented so set them to arbitrary default (last known) and
267          * mark actor that parner is in defaulted state. */
268         port->partner_state = STATE_LACP_ACTIVE;
269         ACTOR_STATE_SET(port, DEFAULTED);
270 }
271
272 /** Function handles rx state machine.
273  *
274  * This function implements Receive State Machine from point 5.4.12 in
275  * 802.1AX documentation. It should be called periodically.
276  *
277  * @param lacpdu                LACPDU received.
278  * @param port                  Port on which LACPDU was received.
279  */
280 static void
281 rx_machine(struct bond_dev_private *internals, uint16_t slave_id,
282                 struct lacpdu *lacp)
283 {
284         struct port *agg, *port = &mode_8023ad_ports[slave_id];
285         uint64_t timeout;
286
287         if (SM_FLAG(port, BEGIN)) {
288                 /* Initialize stuff */
289                 MODE4_DEBUG("-> INITIALIZE\n");
290                 SM_FLAG_CLR(port, MOVED);
291                 port->selected = UNSELECTED;
292
293                 record_default(port);
294
295                 ACTOR_STATE_CLR(port, EXPIRED);
296                 timer_cancel(&port->current_while_timer);
297
298                 /* DISABLED: On initialization partner is out of sync */
299                 PARTNER_STATE_CLR(port, SYNCHRONIZATION);
300
301                 /* LACP DISABLED stuff if LACP not enabled on this port */
302                 if (!SM_FLAG(port, LACP_ENABLED))
303                         PARTNER_STATE_CLR(port, AGGREGATION);
304                 else
305                         PARTNER_STATE_SET(port, AGGREGATION);
306         }
307
308         if (!SM_FLAG(port, LACP_ENABLED)) {
309                 /* Update parameters only if state changed */
310                 if (!timer_is_stopped(&port->current_while_timer)) {
311                         port->selected = UNSELECTED;
312                         record_default(port);
313                         PARTNER_STATE_CLR(port, AGGREGATION);
314                         ACTOR_STATE_CLR(port, EXPIRED);
315                         timer_cancel(&port->current_while_timer);
316                 }
317                 return;
318         }
319
320         if (lacp) {
321                 MODE4_DEBUG("LACP -> CURRENT\n");
322                 BOND_PRINT_LACP(lacp);
323                 /* Update selected flag. If partner parameters are defaulted assume they
324                  * are match. If not defaulted  compare LACP actor with ports parner
325                  * params. */
326                 if (!ACTOR_STATE(port, DEFAULTED) &&
327                         (ACTOR_STATE(port, AGGREGATION) != PARTNER_STATE(port, AGGREGATION)
328                         || memcmp(&port->partner, &lacp->actor.port_params,
329                                 sizeof(port->partner)) != 0)) {
330                         MODE4_DEBUG("selected <- UNSELECTED\n");
331                         port->selected = UNSELECTED;
332                 }
333
334                 /* Record this PDU actor params as partner params */
335                 memcpy(&port->partner, &lacp->actor.port_params,
336                         sizeof(struct port_params));
337                 port->partner_state = lacp->actor.state;
338
339                 /* Partner parameters are not defaulted any more */
340                 ACTOR_STATE_CLR(port, DEFAULTED);
341
342                 /* If LACP partner params match this port actor params */
343                 agg = &mode_8023ad_ports[port->aggregator_port_id];
344                 bool match = port->actor.system_priority ==
345                         lacp->partner.port_params.system_priority &&
346                         is_same_ether_addr(&agg->actor.system,
347                         &lacp->partner.port_params.system) &&
348                         port->actor.port_priority ==
349                         lacp->partner.port_params.port_priority &&
350                         port->actor.port_number ==
351                         lacp->partner.port_params.port_number;
352
353                 /* Update NTT if partners information are outdated (xored and masked
354                  * bits are set)*/
355                 uint8_t state_mask = STATE_LACP_ACTIVE | STATE_LACP_SHORT_TIMEOUT |
356                         STATE_SYNCHRONIZATION | STATE_AGGREGATION;
357
358                 if (((port->actor_state ^ lacp->partner.state) & state_mask) ||
359                                 match == false) {
360                         SM_FLAG_SET(port, NTT);
361                 }
362
363                 /* If LACP partner params match this port actor params */
364                 if (match == true && ACTOR_STATE(port, AGGREGATION) ==
365                                 PARTNER_STATE(port,     AGGREGATION))
366                         PARTNER_STATE_SET(port, SYNCHRONIZATION);
367                 else if (!PARTNER_STATE(port, AGGREGATION) && ACTOR_STATE(port,
368                                 AGGREGATION))
369                         PARTNER_STATE_SET(port, SYNCHRONIZATION);
370                 else
371                         PARTNER_STATE_CLR(port, SYNCHRONIZATION);
372
373                 if (ACTOR_STATE(port, LACP_SHORT_TIMEOUT))
374                         timeout = internals->mode4.short_timeout;
375                 else
376                         timeout = internals->mode4.long_timeout;
377
378                 timer_set(&port->current_while_timer, timeout);
379                 ACTOR_STATE_CLR(port, EXPIRED);
380                 return; /* No state change */
381         }
382
383         /* If CURRENT state timer is not running (stopped or expired)
384          * transit to EXPIRED state from DISABLED or CURRENT */
385         if (!timer_is_running(&port->current_while_timer)) {
386                 ACTOR_STATE_SET(port, EXPIRED);
387                 PARTNER_STATE_CLR(port, SYNCHRONIZATION);
388                 PARTNER_STATE_SET(port, LACP_SHORT_TIMEOUT);
389                 timer_set(&port->current_while_timer, internals->mode4.short_timeout);
390         }
391 }
392
393 /**
394  * Function handles periodic tx state machine.
395  *
396  * Function implements Periodic Transmission state machine from point 5.4.13
397  * in 802.1AX documentation. It should be called periodically.
398  *
399  * @param port                  Port to handle state machine.
400  */
401 static void
402 periodic_machine(struct bond_dev_private *internals, uint16_t slave_id)
403 {
404         struct port *port = &mode_8023ad_ports[slave_id];
405         /* Calculate if either site is LACP enabled */
406         uint64_t timeout;
407         uint8_t active = ACTOR_STATE(port, LACP_ACTIVE) ||
408                 PARTNER_STATE(port, LACP_ACTIVE);
409
410         uint8_t is_partner_fast, was_partner_fast;
411         /* No periodic is on BEGIN, LACP DISABLE or when both sides are pasive */
412         if (SM_FLAG(port, BEGIN) || !SM_FLAG(port, LACP_ENABLED) || !active) {
413                 timer_cancel(&port->periodic_timer);
414                 timer_force_expired(&port->tx_machine_timer);
415                 SM_FLAG_CLR(port, PARTNER_SHORT_TIMEOUT);
416
417                 MODE4_DEBUG("-> NO_PERIODIC ( %s%s%s)\n",
418                         SM_FLAG(port, BEGIN) ? "begind " : "",
419                         SM_FLAG(port, LACP_ENABLED) ? "" : "LACP disabled ",
420                         active ? "LACP active " : "LACP pasive ");
421                 return;
422         }
423
424         is_partner_fast = PARTNER_STATE(port, LACP_SHORT_TIMEOUT);
425         was_partner_fast = SM_FLAG(port, PARTNER_SHORT_TIMEOUT);
426
427         /* If periodic timer is not started, transit from NO PERIODIC to FAST/SLOW.
428          * Other case: check if timer expire or partners settings changed. */
429         if (!timer_is_stopped(&port->periodic_timer)) {
430                 if (timer_is_expired(&port->periodic_timer)) {
431                         SM_FLAG_SET(port, NTT);
432                 } else if (is_partner_fast != was_partner_fast) {
433                         /* Partners timeout  was slow and now it is fast -> send LACP.
434                          * In other case (was fast and now it is slow) just switch
435                          * timeout to slow without forcing send of LACP (because standard
436                          * say so)*/
437                         if (is_partner_fast)
438                                 SM_FLAG_SET(port, NTT);
439                 } else
440                         return; /* Nothing changed */
441         }
442
443         /* Handle state transition to FAST/SLOW LACP timeout */
444         if (is_partner_fast) {
445                 timeout = internals->mode4.fast_periodic_timeout;
446                 SM_FLAG_SET(port, PARTNER_SHORT_TIMEOUT);
447         } else {
448                 timeout = internals->mode4.slow_periodic_timeout;
449                 SM_FLAG_CLR(port, PARTNER_SHORT_TIMEOUT);
450         }
451
452         timer_set(&port->periodic_timer, timeout);
453 }
454
455 /**
456  * Function handles mux state machine.
457  *
458  * Function implements Mux Machine from point 5.4.15 in 802.1AX documentation.
459  * It should be called periodically.
460  *
461  * @param port                  Port to handle state machine.
462  */
463 static void
464 mux_machine(struct bond_dev_private *internals, uint16_t slave_id)
465 {
466         struct port *port = &mode_8023ad_ports[slave_id];
467
468         /* Save current state for later use */
469         const uint8_t state_mask = STATE_SYNCHRONIZATION | STATE_DISTRIBUTING |
470                 STATE_COLLECTING;
471
472         /* Enter DETACHED state on BEGIN condition or from any other state if
473          * port was unselected */
474         if (SM_FLAG(port, BEGIN) ||
475                         port->selected == UNSELECTED || (port->selected == STANDBY &&
476                                 (port->actor_state & state_mask) != 0)) {
477                 /* detach mux from aggregator */
478                 port->actor_state &= ~state_mask;
479                 /* Set ntt to true if BEGIN condition or transition from any other state
480                  * which is indicated that wait_while_timer was started */
481                 if (SM_FLAG(port, BEGIN) ||
482                                 !timer_is_stopped(&port->wait_while_timer)) {
483                         SM_FLAG_SET(port, NTT);
484                         MODE4_DEBUG("-> DETACHED\n");
485                 }
486                 timer_cancel(&port->wait_while_timer);
487         }
488
489         if (timer_is_stopped(&port->wait_while_timer)) {
490                 if (port->selected == SELECTED || port->selected == STANDBY) {
491                         timer_set(&port->wait_while_timer,
492                                 internals->mode4.aggregate_wait_timeout);
493
494                         MODE4_DEBUG("DETACHED -> WAITING\n");
495                 }
496                 /* Waiting state entered */
497                 return;
498         }
499
500         /* Transit next state if port is ready */
501         if (!timer_is_expired(&port->wait_while_timer))
502                 return;
503
504         if ((ACTOR_STATE(port, DISTRIBUTING) || ACTOR_STATE(port, COLLECTING)) &&
505                 !PARTNER_STATE(port, SYNCHRONIZATION)) {
506                 /* If in COLLECTING or DISTRIBUTING state and partner becomes out of
507                  * sync transit to ATACHED state.  */
508                 ACTOR_STATE_CLR(port, DISTRIBUTING);
509                 ACTOR_STATE_CLR(port, COLLECTING);
510                 /* Clear actor sync to activate transit ATACHED in condition bellow */
511                 ACTOR_STATE_CLR(port, SYNCHRONIZATION);
512                 MODE4_DEBUG("Out of sync -> ATTACHED\n");
513         }
514
515         if (!ACTOR_STATE(port, SYNCHRONIZATION)) {
516                 /* attach mux to aggregator */
517                 RTE_ASSERT((port->actor_state & (STATE_COLLECTING |
518                         STATE_DISTRIBUTING)) == 0);
519
520                 ACTOR_STATE_SET(port, SYNCHRONIZATION);
521                 SM_FLAG_SET(port, NTT);
522                 MODE4_DEBUG("ATTACHED Entered\n");
523         } else if (!ACTOR_STATE(port, COLLECTING)) {
524                 /* Start collecting if in sync */
525                 if (PARTNER_STATE(port, SYNCHRONIZATION)) {
526                         MODE4_DEBUG("ATTACHED -> COLLECTING\n");
527                         ACTOR_STATE_SET(port, COLLECTING);
528                         SM_FLAG_SET(port, NTT);
529                 }
530         } else if (ACTOR_STATE(port, COLLECTING)) {
531                 /* Check if partner is in COLLECTING state. If so this port can
532                  * distribute frames to it */
533                 if (!ACTOR_STATE(port, DISTRIBUTING)) {
534                         if (PARTNER_STATE(port, COLLECTING)) {
535                                 /* Enable  DISTRIBUTING if partner is collecting */
536                                 ACTOR_STATE_SET(port, DISTRIBUTING);
537                                 SM_FLAG_SET(port, NTT);
538                                 MODE4_DEBUG("COLLECTING -> DISTRIBUTING\n");
539                                 RTE_LOG(INFO, PMD,
540                                         "Bond %u: slave id %u distributing started.\n",
541                                         internals->port_id, slave_id);
542                         }
543                 } else {
544                         if (!PARTNER_STATE(port, COLLECTING)) {
545                                 /* Disable DISTRIBUTING (enter COLLECTING state) if partner
546                                  * is not collecting */
547                                 ACTOR_STATE_CLR(port, DISTRIBUTING);
548                                 SM_FLAG_SET(port, NTT);
549                                 MODE4_DEBUG("DISTRIBUTING -> COLLECTING\n");
550                                 RTE_LOG(INFO, PMD,
551                                         "Bond %u: slave id %u distributing stopped.\n",
552                                         internals->port_id, slave_id);
553                         }
554                 }
555         }
556 }
557
558 /**
559  * Function handles transmit state machine.
560  *
561  * Function implements Transmit Machine from point 5.4.16 in 802.1AX
562  * documentation.
563  *
564  * @param port
565  */
566 static void
567 tx_machine(struct bond_dev_private *internals, uint16_t slave_id)
568 {
569         struct port *agg, *port = &mode_8023ad_ports[slave_id];
570
571         struct rte_mbuf *lacp_pkt = NULL;
572         struct lacpdu_header *hdr;
573         struct lacpdu *lacpdu;
574
575         /* If periodic timer is not running periodic machine is in NO PERIODIC and
576          * according to 802.3ax standard tx machine should not transmit any frames
577          * and set ntt to false. */
578         if (timer_is_stopped(&port->periodic_timer))
579                 SM_FLAG_CLR(port, NTT);
580
581         if (!SM_FLAG(port, NTT))
582                 return;
583
584         if (!timer_is_expired(&port->tx_machine_timer))
585                 return;
586
587         lacp_pkt = rte_pktmbuf_alloc(port->mbuf_pool);
588         if (lacp_pkt == NULL) {
589                 RTE_LOG(ERR, PMD, "Failed to allocate LACP packet from pool\n");
590                 return;
591         }
592
593         lacp_pkt->data_len = sizeof(*hdr);
594         lacp_pkt->pkt_len = sizeof(*hdr);
595
596         hdr = rte_pktmbuf_mtod(lacp_pkt, struct lacpdu_header *);
597
598         /* Source and destination MAC */
599         ether_addr_copy(&lacp_mac_addr, &hdr->eth_hdr.d_addr);
600         rte_eth_macaddr_get(slave_id, &hdr->eth_hdr.s_addr);
601         hdr->eth_hdr.ether_type = rte_cpu_to_be_16(ETHER_TYPE_SLOW);
602
603         lacpdu = &hdr->lacpdu;
604         memset(lacpdu, 0, sizeof(*lacpdu));
605
606         /* Initialize LACP part */
607         lacpdu->subtype = SLOW_SUBTYPE_LACP;
608         lacpdu->version_number = 1;
609
610         /* ACTOR */
611         lacpdu->actor.tlv_type_info = TLV_TYPE_ACTOR_INFORMATION;
612         lacpdu->actor.info_length = sizeof(struct lacpdu_actor_partner_params);
613         memcpy(&hdr->lacpdu.actor.port_params, &port->actor,
614                         sizeof(port->actor));
615         agg = &mode_8023ad_ports[port->aggregator_port_id];
616         ether_addr_copy(&agg->actor.system, &hdr->lacpdu.actor.port_params.system);
617         lacpdu->actor.state = port->actor_state;
618
619         /* PARTNER */
620         lacpdu->partner.tlv_type_info = TLV_TYPE_PARTNER_INFORMATION;
621         lacpdu->partner.info_length = sizeof(struct lacpdu_actor_partner_params);
622         memcpy(&lacpdu->partner.port_params, &port->partner,
623                         sizeof(struct port_params));
624         lacpdu->partner.state = port->partner_state;
625
626         /* Other fields */
627         lacpdu->tlv_type_collector_info = TLV_TYPE_COLLECTOR_INFORMATION;
628         lacpdu->collector_info_length = 0x10;
629         lacpdu->collector_max_delay = 0;
630
631         lacpdu->tlv_type_terminator = TLV_TYPE_TERMINATOR_INFORMATION;
632         lacpdu->terminator_length = 0;
633
634         MODE4_DEBUG("Sending LACP frame\n");
635         BOND_PRINT_LACP(lacpdu);
636
637         if (internals->mode4.dedicated_queues.enabled == 0) {
638                 int retval = rte_ring_enqueue(port->tx_ring, lacp_pkt);
639                 if (retval != 0) {
640                         /* If TX ring full, drop packet and free message.
641                            Retransmission will happen in next function call. */
642                         rte_pktmbuf_free(lacp_pkt);
643                         set_warning_flags(port, WRN_TX_QUEUE_FULL);
644                         return;
645                 }
646         } else {
647                 uint16_t pkts_sent = rte_eth_tx_burst(slave_id,
648                                 internals->mode4.dedicated_queues.tx_qid,
649                                 &lacp_pkt, 1);
650                 if (pkts_sent != 1) {
651                         rte_pktmbuf_free(lacp_pkt);
652                         set_warning_flags(port, WRN_TX_QUEUE_FULL);
653                         return;
654                 }
655         }
656
657
658         timer_set(&port->tx_machine_timer, internals->mode4.tx_period_timeout);
659         SM_FLAG_CLR(port, NTT);
660 }
661
662 static uint8_t
663 max_index(uint64_t *a, int n)
664 {
665         if (n <= 0)
666                 return -1;
667
668         int i, max_i = 0;
669         uint64_t max = a[0];
670
671         for (i = 1; i < n; ++i) {
672                 if (a[i] > max) {
673                         max = a[i];
674                         max_i = i;
675                 }
676         }
677
678         return max_i;
679 }
680
681 /**
682  * Function assigns port to aggregator.
683  *
684  * @param bond_dev_private      Pointer to bond_dev_private structure.
685  * @param port_pos                      Port to assign.
686  */
687 static void
688 selection_logic(struct bond_dev_private *internals, uint8_t slave_id)
689 {
690         struct port *agg, *port;
691         uint16_t slaves_count, new_agg_id, i, j = 0;
692         uint16_t *slaves;
693         uint64_t agg_bandwidth[8] = {0};
694         uint64_t agg_count[8] = {0};
695         uint16_t default_slave = 0;
696         uint8_t mode_count_id, mode_band_id;
697         struct rte_eth_link link_info;
698
699         slaves = internals->active_slaves;
700         slaves_count = internals->active_slave_count;
701         port = &mode_8023ad_ports[slave_id];
702
703         /* Search for aggregator suitable for this port */
704         for (i = 0; i < slaves_count; ++i) {
705                 agg = &mode_8023ad_ports[slaves[i]];
706                 /* Skip ports that are not aggreagators */
707                 if (agg->aggregator_port_id != slaves[i])
708                         continue;
709
710                 agg_count[agg->aggregator_port_id] += 1;
711                 rte_eth_link_get_nowait(slaves[i], &link_info);
712                 agg_bandwidth[agg->aggregator_port_id] += link_info.link_speed;
713
714                 /* Actors system ID is not checked since all slave device have the same
715                  * ID (MAC address). */
716                 if ((agg->actor.key == port->actor.key &&
717                         agg->partner.system_priority == port->partner.system_priority &&
718                         is_same_ether_addr(&agg->partner.system, &port->partner.system) == 1
719                         && (agg->partner.key == port->partner.key)) &&
720                         is_zero_ether_addr(&port->partner.system) != 1 &&
721                         (agg->actor.key &
722                                 rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY)) != 0) {
723
724                         if (j == 0)
725                                 default_slave = i;
726                         j++;
727                 }
728         }
729
730         switch (internals->mode4.agg_selection) {
731         case AGG_COUNT:
732                 mode_count_id = max_index(
733                                 (uint64_t *)agg_count, slaves_count);
734                 new_agg_id = mode_count_id;
735                 break;
736         case AGG_BANDWIDTH:
737                 mode_band_id = max_index(
738                                 (uint64_t *)agg_bandwidth, slaves_count);
739                 new_agg_id = mode_band_id;
740                 break;
741         case AGG_STABLE:
742                 if (default_slave == slaves_count)
743                         new_agg_id = slave_id;
744                 else
745                         new_agg_id = slaves[default_slave];
746                 break;
747         default:
748                 if (default_slave == slaves_count)
749                         new_agg_id = slave_id;
750                 else
751                         new_agg_id = slaves[default_slave];
752                 break;
753         }
754
755         if (new_agg_id != port->aggregator_port_id) {
756                 port->aggregator_port_id = new_agg_id;
757
758                 MODE4_DEBUG("-> SELECTED: ID=%3u\n"
759                         "\t%s aggregator ID=%3u\n",
760                         port->aggregator_port_id,
761                         port->aggregator_port_id == slave_id ?
762                                 "aggregator not found, using default" : "aggregator found",
763                         port->aggregator_port_id);
764         }
765
766         port->selected = SELECTED;
767 }
768
769 /* Function maps DPDK speed to bonding speed stored in key field */
770 static uint16_t
771 link_speed_key(uint16_t speed) {
772         uint16_t key_speed;
773
774         switch (speed) {
775         case ETH_SPEED_NUM_NONE:
776                 key_speed = 0x00;
777                 break;
778         case ETH_SPEED_NUM_10M:
779                 key_speed = BOND_LINK_SPEED_KEY_10M;
780                 break;
781         case ETH_SPEED_NUM_100M:
782                 key_speed = BOND_LINK_SPEED_KEY_100M;
783                 break;
784         case ETH_SPEED_NUM_1G:
785                 key_speed = BOND_LINK_SPEED_KEY_1000M;
786                 break;
787         case ETH_SPEED_NUM_10G:
788                 key_speed = BOND_LINK_SPEED_KEY_10G;
789                 break;
790         case ETH_SPEED_NUM_20G:
791                 key_speed = BOND_LINK_SPEED_KEY_20G;
792                 break;
793         case ETH_SPEED_NUM_40G:
794                 key_speed = BOND_LINK_SPEED_KEY_40G;
795                 break;
796         default:
797                 /* Unknown speed*/
798                 key_speed = 0xFFFF;
799         }
800
801         return key_speed;
802 }
803
804 static void
805 rx_machine_update(struct bond_dev_private *internals, uint8_t slave_id,
806                 struct rte_mbuf *lacp_pkt) {
807         struct lacpdu_header *lacp;
808
809         if (lacp_pkt != NULL) {
810                 lacp = rte_pktmbuf_mtod(lacp_pkt, struct lacpdu_header *);
811                 RTE_ASSERT(lacp->lacpdu.subtype == SLOW_SUBTYPE_LACP);
812
813                 /* This is LACP frame so pass it to rx_machine */
814                 rx_machine(internals, slave_id, &lacp->lacpdu);
815                 rte_pktmbuf_free(lacp_pkt);
816         } else
817                 rx_machine(internals, slave_id, NULL);
818 }
819
820 static void
821 bond_mode_8023ad_periodic_cb(void *arg)
822 {
823         struct rte_eth_dev *bond_dev = arg;
824         struct bond_dev_private *internals = bond_dev->data->dev_private;
825         struct port *port;
826         struct rte_eth_link link_info;
827         struct ether_addr slave_addr;
828         struct rte_mbuf *lacp_pkt = NULL;
829
830         uint8_t i, slave_id;
831
832
833         /* Update link status on each port */
834         for (i = 0; i < internals->active_slave_count; i++) {
835                 uint16_t key;
836
837                 slave_id = internals->active_slaves[i];
838                 rte_eth_link_get_nowait(slave_id, &link_info);
839                 rte_eth_macaddr_get(slave_id, &slave_addr);
840
841                 if (link_info.link_status != 0) {
842                         key = link_speed_key(link_info.link_speed) << 1;
843                         if (link_info.link_duplex == ETH_LINK_FULL_DUPLEX)
844                                 key |= BOND_LINK_FULL_DUPLEX_KEY;
845                 } else
846                         key = 0;
847
848                 port = &mode_8023ad_ports[slave_id];
849
850                 key = rte_cpu_to_be_16(key);
851                 if (key != port->actor.key) {
852                         if (!(key & rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY)))
853                                 set_warning_flags(port, WRN_NOT_LACP_CAPABLE);
854
855                         port->actor.key = key;
856                         SM_FLAG_SET(port, NTT);
857                 }
858
859                 if (!is_same_ether_addr(&port->actor.system, &slave_addr)) {
860                         ether_addr_copy(&slave_addr, &port->actor.system);
861                         if (port->aggregator_port_id == slave_id)
862                                 SM_FLAG_SET(port, NTT);
863                 }
864         }
865
866         for (i = 0; i < internals->active_slave_count; i++) {
867                 slave_id = internals->active_slaves[i];
868                 port = &mode_8023ad_ports[slave_id];
869
870                 if ((port->actor.key &
871                                 rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY)) == 0) {
872
873                         SM_FLAG_SET(port, BEGIN);
874
875                         /* LACP is disabled on half duples or link is down */
876                         if (SM_FLAG(port, LACP_ENABLED)) {
877                                 /* If port was enabled set it to BEGIN state */
878                                 SM_FLAG_CLR(port, LACP_ENABLED);
879                                 ACTOR_STATE_CLR(port, DISTRIBUTING);
880                                 ACTOR_STATE_CLR(port, COLLECTING);
881                         }
882
883                         /* Skip this port processing */
884                         continue;
885                 }
886
887                 SM_FLAG_SET(port, LACP_ENABLED);
888
889                 if (internals->mode4.dedicated_queues.enabled == 0) {
890                         /* Find LACP packet to this port. Do not check subtype,
891                          * it is done in function that queued packet
892                          */
893                         int retval = rte_ring_dequeue(port->rx_ring,
894                                         (void **)&lacp_pkt);
895
896                         if (retval != 0)
897                                 lacp_pkt = NULL;
898
899                         rx_machine_update(internals, slave_id, lacp_pkt);
900                 } else {
901                         uint16_t rx_count = rte_eth_rx_burst(slave_id,
902                                         internals->mode4.dedicated_queues.rx_qid,
903                                         &lacp_pkt, 1);
904
905                         if (rx_count == 1)
906                                 bond_mode_8023ad_handle_slow_pkt(internals,
907                                                 slave_id, lacp_pkt);
908                         else
909                                 rx_machine_update(internals, slave_id, NULL);
910                 }
911
912                 periodic_machine(internals, slave_id);
913                 mux_machine(internals, slave_id);
914                 tx_machine(internals, slave_id);
915                 selection_logic(internals, slave_id);
916
917                 SM_FLAG_CLR(port, BEGIN);
918                 show_warnings(slave_id);
919         }
920
921         rte_eal_alarm_set(internals->mode4.update_timeout_us,
922                         bond_mode_8023ad_periodic_cb, arg);
923 }
924
925 void
926 bond_mode_8023ad_activate_slave(struct rte_eth_dev *bond_dev,
927                                 uint16_t slave_id)
928 {
929         struct bond_dev_private *internals = bond_dev->data->dev_private;
930
931         struct port *port = &mode_8023ad_ports[slave_id];
932         struct port_params initial = {
933                         .system = { { 0 } },
934                         .system_priority = rte_cpu_to_be_16(0xFFFF),
935                         .key = rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY),
936                         .port_priority = rte_cpu_to_be_16(0x00FF),
937                         .port_number = 0,
938         };
939
940         char mem_name[RTE_ETH_NAME_MAX_LEN];
941         int socket_id;
942         unsigned element_size;
943         uint32_t total_tx_desc;
944         struct bond_tx_queue *bd_tx_q;
945         uint16_t q_id;
946
947         /* Given slave mus not be in active list */
948         RTE_ASSERT(find_slave_by_id(internals->active_slaves,
949         internals->active_slave_count, slave_id) == internals->active_slave_count);
950         RTE_SET_USED(internals); /* used only for assert when enabled */
951
952         memcpy(&port->actor, &initial, sizeof(struct port_params));
953         /* Standard requires that port ID must be grater than 0.
954          * Add 1 do get corresponding port_number */
955         port->actor.port_number = rte_cpu_to_be_16(slave_id + 1);
956
957         memcpy(&port->partner, &initial, sizeof(struct port_params));
958
959         /* default states */
960         port->actor_state = STATE_AGGREGATION | STATE_LACP_ACTIVE | STATE_DEFAULTED;
961         port->partner_state = STATE_LACP_ACTIVE | STATE_AGGREGATION;
962         port->sm_flags = SM_FLAGS_BEGIN;
963
964         /* use this port as agregator */
965         port->aggregator_port_id = slave_id;
966         rte_eth_promiscuous_enable(slave_id);
967
968         timer_cancel(&port->warning_timer);
969
970         if (port->mbuf_pool != NULL)
971                 return;
972
973         RTE_ASSERT(port->rx_ring == NULL);
974         RTE_ASSERT(port->tx_ring == NULL);
975
976         socket_id = rte_eth_dev_socket_id(slave_id);
977         if (socket_id == (int)LCORE_ID_ANY)
978                 socket_id = rte_socket_id();
979
980         element_size = sizeof(struct slow_protocol_frame) +
981                                 RTE_PKTMBUF_HEADROOM;
982
983         /* The size of the mempool should be at least:
984          * the sum of the TX descriptors + BOND_MODE_8023AX_SLAVE_TX_PKTS */
985         total_tx_desc = BOND_MODE_8023AX_SLAVE_TX_PKTS;
986         for (q_id = 0; q_id < bond_dev->data->nb_tx_queues; q_id++) {
987                 bd_tx_q = (struct bond_tx_queue*)bond_dev->data->tx_queues[q_id];
988                 total_tx_desc += bd_tx_q->nb_tx_desc;
989         }
990
991         snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_pool", slave_id);
992         port->mbuf_pool = rte_pktmbuf_pool_create(mem_name, total_tx_desc,
993                 RTE_MEMPOOL_CACHE_MAX_SIZE >= 32 ?
994                         32 : RTE_MEMPOOL_CACHE_MAX_SIZE,
995                 0, element_size, socket_id);
996
997         /* Any memory allocation failure in initialization is critical because
998          * resources can't be free, so reinitialization is impossible. */
999         if (port->mbuf_pool == NULL) {
1000                 rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1001                         slave_id, mem_name, rte_strerror(rte_errno));
1002         }
1003
1004         snprintf(mem_name, RTE_DIM(mem_name), "slave_%u_rx", slave_id);
1005         port->rx_ring = rte_ring_create(mem_name,
1006                         rte_align32pow2(BOND_MODE_8023AX_SLAVE_RX_PKTS), socket_id, 0);
1007
1008         if (port->rx_ring == NULL) {
1009                 rte_panic("Slave %u: Failed to create rx ring '%s': %s\n", slave_id,
1010                         mem_name, rte_strerror(rte_errno));
1011         }
1012
1013         /* TX ring is at least one pkt longer to make room for marker packet. */
1014         snprintf(mem_name, RTE_DIM(mem_name), "slave_%u_tx", slave_id);
1015         port->tx_ring = rte_ring_create(mem_name,
1016                         rte_align32pow2(BOND_MODE_8023AX_SLAVE_TX_PKTS + 1), socket_id, 0);
1017
1018         if (port->tx_ring == NULL) {
1019                 rte_panic("Slave %u: Failed to create tx ring '%s': %s\n", slave_id,
1020                         mem_name, rte_strerror(rte_errno));
1021         }
1022 }
1023
1024 int
1025 bond_mode_8023ad_deactivate_slave(struct rte_eth_dev *bond_dev __rte_unused,
1026                 uint16_t slave_id)
1027 {
1028         void *pkt = NULL;
1029         struct port *port = NULL;
1030         uint8_t old_partner_state;
1031
1032         port = &mode_8023ad_ports[slave_id];
1033
1034         ACTOR_STATE_CLR(port, AGGREGATION);
1035         port->selected = UNSELECTED;
1036
1037         old_partner_state = port->partner_state;
1038         record_default(port);
1039
1040         /* If partner timeout state changes then disable timer */
1041         if (!((old_partner_state ^ port->partner_state) &
1042                         STATE_LACP_SHORT_TIMEOUT))
1043                 timer_cancel(&port->current_while_timer);
1044
1045         PARTNER_STATE_CLR(port, AGGREGATION);
1046         ACTOR_STATE_CLR(port, EXPIRED);
1047
1048         /* flush rx/tx rings */
1049         while (rte_ring_dequeue(port->rx_ring, &pkt) == 0)
1050                 rte_pktmbuf_free((struct rte_mbuf *)pkt);
1051
1052         while (rte_ring_dequeue(port->tx_ring, &pkt) == 0)
1053                         rte_pktmbuf_free((struct rte_mbuf *)pkt);
1054         return 0;
1055 }
1056
1057 void
1058 bond_mode_8023ad_mac_address_update(struct rte_eth_dev *bond_dev)
1059 {
1060         struct bond_dev_private *internals = bond_dev->data->dev_private;
1061         struct ether_addr slave_addr;
1062         struct port *slave, *agg_slave;
1063         uint16_t slave_id, i, j;
1064
1065         bond_mode_8023ad_stop(bond_dev);
1066
1067         for (i = 0; i < internals->active_slave_count; i++) {
1068                 slave_id = internals->active_slaves[i];
1069                 slave = &mode_8023ad_ports[slave_id];
1070                 rte_eth_macaddr_get(slave_id, &slave_addr);
1071
1072                 if (is_same_ether_addr(&slave_addr, &slave->actor.system))
1073                         continue;
1074
1075                 ether_addr_copy(&slave_addr, &slave->actor.system);
1076                 /* Do nothing if this port is not an aggregator. In other case
1077                  * Set NTT flag on every port that use this aggregator. */
1078                 if (slave->aggregator_port_id != slave_id)
1079                         continue;
1080
1081                 for (j = 0; j < internals->active_slave_count; j++) {
1082                         agg_slave = &mode_8023ad_ports[internals->active_slaves[j]];
1083                         if (agg_slave->aggregator_port_id == slave_id)
1084                                 SM_FLAG_SET(agg_slave, NTT);
1085                 }
1086         }
1087
1088         if (bond_dev->data->dev_started)
1089                 bond_mode_8023ad_start(bond_dev);
1090 }
1091
1092 static void
1093 bond_mode_8023ad_conf_get(struct rte_eth_dev *dev,
1094                 struct rte_eth_bond_8023ad_conf *conf)
1095 {
1096         struct bond_dev_private *internals = dev->data->dev_private;
1097         struct mode8023ad_private *mode4 = &internals->mode4;
1098         uint64_t ms_ticks = rte_get_tsc_hz() / 1000;
1099
1100         conf->fast_periodic_ms = mode4->fast_periodic_timeout / ms_ticks;
1101         conf->slow_periodic_ms = mode4->slow_periodic_timeout / ms_ticks;
1102         conf->short_timeout_ms = mode4->short_timeout / ms_ticks;
1103         conf->long_timeout_ms = mode4->long_timeout / ms_ticks;
1104         conf->aggregate_wait_timeout_ms = mode4->aggregate_wait_timeout / ms_ticks;
1105         conf->tx_period_ms = mode4->tx_period_timeout / ms_ticks;
1106         conf->update_timeout_ms = mode4->update_timeout_us / 1000;
1107         conf->rx_marker_period_ms = mode4->rx_marker_timeout / ms_ticks;
1108         conf->slowrx_cb = mode4->slowrx_cb;
1109         conf->agg_selection = mode4->agg_selection;
1110 }
1111
1112 static void
1113 bond_mode_8023ad_conf_get_default(struct rte_eth_bond_8023ad_conf *conf)
1114 {
1115         conf->fast_periodic_ms = BOND_8023AD_FAST_PERIODIC_MS;
1116         conf->slow_periodic_ms = BOND_8023AD_SLOW_PERIODIC_MS;
1117         conf->short_timeout_ms = BOND_8023AD_SHORT_TIMEOUT_MS;
1118         conf->long_timeout_ms = BOND_8023AD_LONG_TIMEOUT_MS;
1119         conf->aggregate_wait_timeout_ms = BOND_8023AD_AGGREGATE_WAIT_TIMEOUT_MS;
1120         conf->tx_period_ms = BOND_8023AD_TX_MACHINE_PERIOD_MS;
1121         conf->rx_marker_period_ms = BOND_8023AD_RX_MARKER_PERIOD_MS;
1122         conf->update_timeout_ms = BOND_MODE_8023AX_UPDATE_TIMEOUT_MS;
1123         conf->slowrx_cb = NULL;
1124         conf->agg_selection = AGG_STABLE;
1125 }
1126
1127 static void
1128 bond_mode_8023ad_conf_assign(struct mode8023ad_private *mode4,
1129                 struct rte_eth_bond_8023ad_conf *conf)
1130 {
1131         uint64_t ms_ticks = rte_get_tsc_hz() / 1000;
1132
1133         mode4->fast_periodic_timeout = conf->fast_periodic_ms * ms_ticks;
1134         mode4->slow_periodic_timeout = conf->slow_periodic_ms * ms_ticks;
1135         mode4->short_timeout = conf->short_timeout_ms * ms_ticks;
1136         mode4->long_timeout = conf->long_timeout_ms * ms_ticks;
1137         mode4->aggregate_wait_timeout = conf->aggregate_wait_timeout_ms * ms_ticks;
1138         mode4->tx_period_timeout = conf->tx_period_ms * ms_ticks;
1139         mode4->rx_marker_timeout = conf->rx_marker_period_ms * ms_ticks;
1140         mode4->update_timeout_us = conf->update_timeout_ms * 1000;
1141
1142         mode4->dedicated_queues.enabled = 0;
1143         mode4->dedicated_queues.rx_qid = UINT16_MAX;
1144         mode4->dedicated_queues.tx_qid = UINT16_MAX;
1145 }
1146
1147 void
1148 bond_mode_8023ad_setup(struct rte_eth_dev *dev,
1149                 struct rte_eth_bond_8023ad_conf *conf)
1150 {
1151         struct rte_eth_bond_8023ad_conf def_conf;
1152         struct bond_dev_private *internals = dev->data->dev_private;
1153         struct mode8023ad_private *mode4 = &internals->mode4;
1154
1155         if (conf == NULL) {
1156                 conf = &def_conf;
1157                 bond_mode_8023ad_conf_get_default(conf);
1158         }
1159
1160         bond_mode_8023ad_stop(dev);
1161         bond_mode_8023ad_conf_assign(mode4, conf);
1162         mode4->slowrx_cb = conf->slowrx_cb;
1163         mode4->agg_selection = AGG_STABLE;
1164
1165         if (dev->data->dev_started)
1166                 bond_mode_8023ad_start(dev);
1167 }
1168
1169 int
1170 bond_mode_8023ad_enable(struct rte_eth_dev *bond_dev)
1171 {
1172         struct bond_dev_private *internals = bond_dev->data->dev_private;
1173         uint8_t i;
1174
1175         for (i = 0; i < internals->active_slave_count; i++)
1176                 bond_mode_8023ad_activate_slave(bond_dev, i);
1177
1178         return 0;
1179 }
1180
1181 int
1182 bond_mode_8023ad_start(struct rte_eth_dev *bond_dev)
1183 {
1184         struct bond_dev_private *internals = bond_dev->data->dev_private;
1185         struct mode8023ad_private *mode4 = &internals->mode4;
1186         static const uint64_t us = BOND_MODE_8023AX_UPDATE_TIMEOUT_MS * 1000;
1187
1188         if (mode4->slowrx_cb)
1189                 return rte_eal_alarm_set(us, &bond_mode_8023ad_ext_periodic_cb,
1190                                          bond_dev);
1191
1192         return rte_eal_alarm_set(us, &bond_mode_8023ad_periodic_cb, bond_dev);
1193 }
1194
1195 void
1196 bond_mode_8023ad_stop(struct rte_eth_dev *bond_dev)
1197 {
1198         struct bond_dev_private *internals = bond_dev->data->dev_private;
1199         struct mode8023ad_private *mode4 = &internals->mode4;
1200
1201         if (mode4->slowrx_cb) {
1202                 rte_eal_alarm_cancel(&bond_mode_8023ad_ext_periodic_cb,
1203                                      bond_dev);
1204                 return;
1205         }
1206         rte_eal_alarm_cancel(&bond_mode_8023ad_periodic_cb, bond_dev);
1207 }
1208
1209 void
1210 bond_mode_8023ad_handle_slow_pkt(struct bond_dev_private *internals,
1211                                   uint16_t slave_id, struct rte_mbuf *pkt)
1212 {
1213         struct mode8023ad_private *mode4 = &internals->mode4;
1214         struct port *port = &mode_8023ad_ports[slave_id];
1215         struct marker_header *m_hdr;
1216         uint64_t marker_timer, old_marker_timer;
1217         int retval;
1218         uint8_t wrn, subtype;
1219         /* If packet is a marker, we send response now by reusing given packet
1220          * and update only source MAC, destination MAC is multicast so don't
1221          * update it. Other frames will be handled later by state machines */
1222         subtype = rte_pktmbuf_mtod(pkt,
1223                         struct slow_protocol_frame *)->slow_protocol.subtype;
1224
1225         if (subtype == SLOW_SUBTYPE_MARKER) {
1226                 m_hdr = rte_pktmbuf_mtod(pkt, struct marker_header *);
1227
1228                 if (likely(m_hdr->marker.tlv_type_marker != MARKER_TLV_TYPE_INFO)) {
1229                         wrn = WRN_UNKNOWN_MARKER_TYPE;
1230                         goto free_out;
1231                 }
1232
1233                 /* Setup marker timer. Do it in loop in case concurrent access. */
1234                 do {
1235                         old_marker_timer = port->rx_marker_timer;
1236                         if (!timer_is_expired(&old_marker_timer)) {
1237                                 wrn = WRN_RX_MARKER_TO_FAST;
1238                                 goto free_out;
1239                         }
1240
1241                         timer_set(&marker_timer, mode4->rx_marker_timeout);
1242                         retval = rte_atomic64_cmpset(&port->rx_marker_timer,
1243                                 old_marker_timer, marker_timer);
1244                 } while (unlikely(retval == 0));
1245
1246                 m_hdr->marker.tlv_type_marker = MARKER_TLV_TYPE_RESP;
1247                 rte_eth_macaddr_get(slave_id, &m_hdr->eth_hdr.s_addr);
1248
1249                 if (internals->mode4.dedicated_queues.enabled == 0) {
1250                         int retval = rte_ring_enqueue(port->tx_ring, pkt);
1251                         if (retval != 0) {
1252                                 /* reset timer */
1253                                 port->rx_marker_timer = 0;
1254                                 wrn = WRN_TX_QUEUE_FULL;
1255                                 goto free_out;
1256                         }
1257                 } else {
1258                         /* Send packet directly to the slow queue */
1259                         uint16_t tx_count = rte_eth_tx_burst(slave_id,
1260                                         internals->mode4.dedicated_queues.tx_qid,
1261                                         &pkt, 1);
1262                         if (tx_count != 1) {
1263                                 /* reset timer */
1264                                 port->rx_marker_timer = 0;
1265                                 wrn = WRN_TX_QUEUE_FULL;
1266                                 goto free_out;
1267                         }
1268                 }
1269         } else if (likely(subtype == SLOW_SUBTYPE_LACP)) {
1270                 if (internals->mode4.dedicated_queues.enabled == 0) {
1271                         int retval = rte_ring_enqueue(port->rx_ring, pkt);
1272                         if (retval != 0) {
1273                                 /* If RX fing full free lacpdu message and drop packet */
1274                                 wrn = WRN_RX_QUEUE_FULL;
1275                                 goto free_out;
1276                         }
1277                 } else
1278                         rx_machine_update(internals, slave_id, pkt);
1279         } else {
1280                 wrn = WRN_UNKNOWN_SLOW_TYPE;
1281                 goto free_out;
1282         }
1283
1284         return;
1285
1286 free_out:
1287         set_warning_flags(port, wrn);
1288         rte_pktmbuf_free(pkt);
1289 }
1290
1291 int
1292 rte_eth_bond_8023ad_conf_get(uint16_t port_id,
1293                 struct rte_eth_bond_8023ad_conf *conf)
1294 {
1295         struct rte_eth_dev *bond_dev;
1296
1297         if (valid_bonded_port_id(port_id) != 0)
1298                 return -EINVAL;
1299
1300         if (conf == NULL)
1301                 return -EINVAL;
1302
1303         bond_dev = &rte_eth_devices[port_id];
1304         bond_mode_8023ad_conf_get(bond_dev, conf);
1305         return 0;
1306 }
1307
1308 int
1309 rte_eth_bond_8023ad_agg_selection_set(uint16_t port_id,
1310                 enum rte_bond_8023ad_agg_selection agg_selection)
1311 {
1312         struct rte_eth_dev *bond_dev;
1313         struct bond_dev_private *internals;
1314         struct mode8023ad_private *mode4;
1315
1316         bond_dev = &rte_eth_devices[port_id];
1317         internals = bond_dev->data->dev_private;
1318
1319         if (valid_bonded_port_id(port_id) != 0)
1320                 return -EINVAL;
1321         if (internals->mode != 4)
1322                 return -EINVAL;
1323
1324         mode4 = &internals->mode4;
1325         if (agg_selection == AGG_COUNT || agg_selection == AGG_BANDWIDTH
1326                         || agg_selection == AGG_STABLE)
1327                 mode4->agg_selection = agg_selection;
1328         return 0;
1329 }
1330
1331 int rte_eth_bond_8023ad_agg_selection_get(uint16_t port_id)
1332 {
1333         struct rte_eth_dev *bond_dev;
1334         struct bond_dev_private *internals;
1335         struct mode8023ad_private *mode4;
1336
1337         bond_dev = &rte_eth_devices[port_id];
1338         internals = bond_dev->data->dev_private;
1339
1340         if (valid_bonded_port_id(port_id) != 0)
1341                 return -EINVAL;
1342         if (internals->mode != 4)
1343                 return -EINVAL;
1344         mode4 = &internals->mode4;
1345
1346         return mode4->agg_selection;
1347 }
1348
1349
1350
1351 static int
1352 bond_8023ad_setup_validate(uint16_t port_id,
1353                 struct rte_eth_bond_8023ad_conf *conf)
1354 {
1355         if (valid_bonded_port_id(port_id) != 0)
1356                 return -EINVAL;
1357
1358         if (conf != NULL) {
1359                 /* Basic sanity check */
1360                 if (conf->slow_periodic_ms == 0 ||
1361                                 conf->fast_periodic_ms >= conf->slow_periodic_ms ||
1362                                 conf->long_timeout_ms == 0 ||
1363                                 conf->short_timeout_ms >= conf->long_timeout_ms ||
1364                                 conf->aggregate_wait_timeout_ms == 0 ||
1365                                 conf->tx_period_ms == 0 ||
1366                                 conf->rx_marker_period_ms == 0 ||
1367                                 conf->update_timeout_ms == 0) {
1368                         RTE_LOG(ERR, PMD, "given mode 4 configuration is invalid\n");
1369                         return -EINVAL;
1370                 }
1371         }
1372
1373         return 0;
1374 }
1375
1376
1377 int
1378 rte_eth_bond_8023ad_setup(uint16_t port_id,
1379                 struct rte_eth_bond_8023ad_conf *conf)
1380 {
1381         struct rte_eth_dev *bond_dev;
1382         int err;
1383
1384         err = bond_8023ad_setup_validate(port_id, conf);
1385         if (err != 0)
1386                 return err;
1387
1388         bond_dev = &rte_eth_devices[port_id];
1389         bond_mode_8023ad_setup(bond_dev, conf);
1390
1391         return 0;
1392 }
1393
1394
1395
1396
1397
1398 int
1399 rte_eth_bond_8023ad_slave_info(uint16_t port_id, uint16_t slave_id,
1400                 struct rte_eth_bond_8023ad_slave_info *info)
1401 {
1402         struct rte_eth_dev *bond_dev;
1403         struct bond_dev_private *internals;
1404         struct port *port;
1405
1406         if (info == NULL || valid_bonded_port_id(port_id) != 0 ||
1407                         rte_eth_bond_mode_get(port_id) != BONDING_MODE_8023AD)
1408                 return -EINVAL;
1409
1410         bond_dev = &rte_eth_devices[port_id];
1411
1412         internals = bond_dev->data->dev_private;
1413         if (find_slave_by_id(internals->active_slaves,
1414                         internals->active_slave_count, slave_id) ==
1415                                 internals->active_slave_count)
1416                 return -EINVAL;
1417
1418         port = &mode_8023ad_ports[slave_id];
1419         info->selected = port->selected;
1420
1421         info->actor_state = port->actor_state;
1422         rte_memcpy(&info->actor, &port->actor, sizeof(port->actor));
1423
1424         info->partner_state = port->partner_state;
1425         rte_memcpy(&info->partner, &port->partner, sizeof(port->partner));
1426
1427         info->agg_port_id = port->aggregator_port_id;
1428         return 0;
1429 }
1430
1431 static int
1432 bond_8023ad_ext_validate(uint16_t port_id, uint16_t slave_id)
1433 {
1434         struct rte_eth_dev *bond_dev;
1435         struct bond_dev_private *internals;
1436         struct mode8023ad_private *mode4;
1437
1438         if (rte_eth_bond_mode_get(port_id) != BONDING_MODE_8023AD)
1439                 return -EINVAL;
1440
1441         bond_dev = &rte_eth_devices[port_id];
1442
1443         if (!bond_dev->data->dev_started)
1444                 return -EINVAL;
1445
1446         internals = bond_dev->data->dev_private;
1447         if (find_slave_by_id(internals->active_slaves,
1448                         internals->active_slave_count, slave_id) ==
1449                                 internals->active_slave_count)
1450                 return -EINVAL;
1451
1452         mode4 = &internals->mode4;
1453         if (mode4->slowrx_cb == NULL)
1454                 return -EINVAL;
1455
1456         return 0;
1457 }
1458
1459 int
1460 rte_eth_bond_8023ad_ext_collect(uint16_t port_id, uint16_t slave_id,
1461                                 int enabled)
1462 {
1463         struct port *port;
1464         int res;
1465
1466         res = bond_8023ad_ext_validate(port_id, slave_id);
1467         if (res != 0)
1468                 return res;
1469
1470         port = &mode_8023ad_ports[slave_id];
1471
1472         if (enabled)
1473                 ACTOR_STATE_SET(port, COLLECTING);
1474         else
1475                 ACTOR_STATE_CLR(port, COLLECTING);
1476
1477         return 0;
1478 }
1479
1480 int
1481 rte_eth_bond_8023ad_ext_distrib(uint16_t port_id, uint16_t slave_id,
1482                                 int enabled)
1483 {
1484         struct port *port;
1485         int res;
1486
1487         res = bond_8023ad_ext_validate(port_id, slave_id);
1488         if (res != 0)
1489                 return res;
1490
1491         port = &mode_8023ad_ports[slave_id];
1492
1493         if (enabled)
1494                 ACTOR_STATE_SET(port, DISTRIBUTING);
1495         else
1496                 ACTOR_STATE_CLR(port, DISTRIBUTING);
1497
1498         return 0;
1499 }
1500
1501 int
1502 rte_eth_bond_8023ad_ext_distrib_get(uint16_t port_id, uint16_t slave_id)
1503 {
1504         struct port *port;
1505         int err;
1506
1507         err = bond_8023ad_ext_validate(port_id, slave_id);
1508         if (err != 0)
1509                 return err;
1510
1511         port = &mode_8023ad_ports[slave_id];
1512         return ACTOR_STATE(port, DISTRIBUTING);
1513 }
1514
1515 int
1516 rte_eth_bond_8023ad_ext_collect_get(uint16_t port_id, uint16_t slave_id)
1517 {
1518         struct port *port;
1519         int err;
1520
1521         err = bond_8023ad_ext_validate(port_id, slave_id);
1522         if (err != 0)
1523                 return err;
1524
1525         port = &mode_8023ad_ports[slave_id];
1526         return ACTOR_STATE(port, COLLECTING);
1527 }
1528
1529 int
1530 rte_eth_bond_8023ad_ext_slowtx(uint16_t port_id, uint16_t slave_id,
1531                 struct rte_mbuf *lacp_pkt)
1532 {
1533         struct port *port;
1534         int res;
1535
1536         res = bond_8023ad_ext_validate(port_id, slave_id);
1537         if (res != 0)
1538                 return res;
1539
1540         port = &mode_8023ad_ports[slave_id];
1541
1542         if (rte_pktmbuf_pkt_len(lacp_pkt) < sizeof(struct lacpdu_header))
1543                 return -EINVAL;
1544
1545         struct lacpdu_header *lacp;
1546
1547         /* only enqueue LACPDUs */
1548         lacp = rte_pktmbuf_mtod(lacp_pkt, struct lacpdu_header *);
1549         if (lacp->lacpdu.subtype != SLOW_SUBTYPE_LACP)
1550                 return -EINVAL;
1551
1552         MODE4_DEBUG("sending LACP frame\n");
1553
1554         return rte_ring_enqueue(port->tx_ring, lacp_pkt);
1555 }
1556
1557 static void
1558 bond_mode_8023ad_ext_periodic_cb(void *arg)
1559 {
1560         struct rte_eth_dev *bond_dev = arg;
1561         struct bond_dev_private *internals = bond_dev->data->dev_private;
1562         struct mode8023ad_private *mode4 = &internals->mode4;
1563         struct port *port;
1564         void *pkt = NULL;
1565         uint16_t i, slave_id;
1566
1567         for (i = 0; i < internals->active_slave_count; i++) {
1568                 slave_id = internals->active_slaves[i];
1569                 port = &mode_8023ad_ports[slave_id];
1570
1571                 if (rte_ring_dequeue(port->rx_ring, &pkt) == 0) {
1572                         struct rte_mbuf *lacp_pkt = pkt;
1573                         struct lacpdu_header *lacp;
1574
1575                         lacp = rte_pktmbuf_mtod(lacp_pkt,
1576                                                 struct lacpdu_header *);
1577                         RTE_VERIFY(lacp->lacpdu.subtype == SLOW_SUBTYPE_LACP);
1578
1579                         /* This is LACP frame so pass it to rx callback.
1580                          * Callback is responsible for freeing mbuf.
1581                          */
1582                         mode4->slowrx_cb(slave_id, lacp_pkt);
1583                 }
1584         }
1585
1586         rte_eal_alarm_set(internals->mode4.update_timeout_us,
1587                         bond_mode_8023ad_ext_periodic_cb, arg);
1588 }
1589
1590 int
1591 rte_eth_bond_8023ad_dedicated_queues_enable(uint16_t port)
1592 {
1593         int retval = 0;
1594         struct rte_eth_dev *dev = &rte_eth_devices[port];
1595         struct bond_dev_private *internals = (struct bond_dev_private *)
1596                 dev->data->dev_private;
1597
1598         if (check_for_bonded_ethdev(dev) != 0)
1599                 return -1;
1600
1601         if (bond_8023ad_slow_pkt_hw_filter_supported(port) != 0)
1602                 return -1;
1603
1604         /* Device must be stopped to set up slow queue */
1605         if (dev->data->dev_started)
1606                 return -1;
1607
1608         internals->mode4.dedicated_queues.enabled = 1;
1609
1610         bond_ethdev_mode_set(dev, internals->mode);
1611         return retval;
1612 }
1613
1614 int
1615 rte_eth_bond_8023ad_dedicated_queues_disable(uint16_t port)
1616 {
1617         int retval = 0;
1618         struct rte_eth_dev *dev = &rte_eth_devices[port];
1619         struct bond_dev_private *internals = (struct bond_dev_private *)
1620                 dev->data->dev_private;
1621
1622         if (check_for_bonded_ethdev(dev) != 0)
1623                 return -1;
1624
1625         /* Device must be stopped to set up slow queue */
1626         if (dev->data->dev_started)
1627                 return -1;
1628
1629         internals->mode4.dedicated_queues.enabled = 0;
1630
1631         bond_ethdev_mode_set(dev, internals->mode);
1632
1633         return retval;
1634 }