bond: Add bonding driver and LACP protocol 92/9892/58
authorSteven <sluong@cisco.com>
Wed, 20 Dec 2017 20:43:01 +0000 (12:43 -0800)
committerDamjan Marion <dmarion.lists@gmail.com>
Wed, 21 Mar 2018 21:02:15 +0000 (21:02 +0000)
Add bonding driver to support creation of bond interface which composes of
multiple slave interfaces. The slave interfaces could be physical interfaces,
or just any virtual interfaces. For example, memif interfaces.

The syntax to create a bond interface is
create bond mode <lacp | xor | acitve-backup | broadcast | round-robin>

To enslave an interface to the bond interface,
enslave interface TenGigabitEthernet6/0/0 to BondEthernet0

Please see src/plugins/lacp/lacp_doc.md for more examples and additional
options.

LACP is a control plane protocol which manages and monitors the status of
the slave interfaces. The protocol is part of 802.3ad standard. This patch
implements LACPv1. LACPv2 is not supported.
To enable LACP on the bond interface, specify "mode lacp" when the bond
interface is created. The syntax to enslave a slave interface is the same as
other bonding modes.

Change-Id: I06581d3b87635972f9f0e1ec50b67560fc13e26c
Signed-off-by: Steven <sluong@cisco.com>
38 files changed:
src/configure.ac
src/plugins/Makefile.am
src/plugins/lacp.am [new file with mode: 0644]
src/plugins/lacp/cli.c [new file with mode: 0644]
src/plugins/lacp/input.c [new file with mode: 0644]
src/plugins/lacp/lacp.api [new file with mode: 0644]
src/plugins/lacp/lacp.c [new file with mode: 0644]
src/plugins/lacp/lacp_all_api_h.h [new file with mode: 0644]
src/plugins/lacp/lacp_api.c [new file with mode: 0644]
src/plugins/lacp/lacp_doc.md [new file with mode: 0644]
src/plugins/lacp/lacp_msg_enum.h [new file with mode: 0644]
src/plugins/lacp/lacp_test.c [new file with mode: 0644]
src/plugins/lacp/machine.h [new file with mode: 0644]
src/plugins/lacp/mux_machine.c [new file with mode: 0644]
src/plugins/lacp/mux_machine.h [new file with mode: 0644]
src/plugins/lacp/node.c [new file with mode: 0644]
src/plugins/lacp/node.h [new file with mode: 0644]
src/plugins/lacp/protocol.h [new file with mode: 0644]
src/plugins/lacp/ptx_machine.c [new file with mode: 0644]
src/plugins/lacp/ptx_machine.h [new file with mode: 0644]
src/plugins/lacp/rx_machine.c [new file with mode: 0644]
src/plugins/lacp/rx_machine.h [new file with mode: 0644]
src/plugins/lacp/selection.c [new file with mode: 0644]
src/plugins/lacp/tx_machine.c [new file with mode: 0644]
src/plugins/lacp/tx_machine.h [new file with mode: 0644]
src/vat/api_format.c
src/vnet.am
src/vnet/bonding/bond.api [new file with mode: 0644]
src/vnet/bonding/bond_api.c [new file with mode: 0644]
src/vnet/bonding/cli.c [new file with mode: 0644]
src/vnet/bonding/device.c [new file with mode: 0644]
src/vnet/bonding/node.c [new file with mode: 0644]
src/vnet/bonding/node.h [new file with mode: 0644]
src/vnet/vnet_all_api_h.h
src/vpp/api/custom_dump.c
test/test_bond.py [new file with mode: 0644]
test/vpp_bond_interface.py [new file with mode: 0644]
test/vpp_papi_provider.py

index c455423..d0067c0 100644 (file)
@@ -222,6 +222,7 @@ PLUGIN_ENABLED(ioam)
 PLUGIN_ENABLED(ixge)
 PLUGIN_ENABLED(kubeproxy)
 PLUGIN_ENABLED(l2e)
+PLUGIN_ENABLED(lacp)
 PLUGIN_ENABLED(lb)
 PLUGIN_ENABLED(marvell)
 PLUGIN_ENABLED(memif)
index 37b2e25..03a39df 100644 (file)
@@ -71,6 +71,10 @@ if ENABLE_KUBEPROXY_PLUGIN
 include kubeproxy.am
 endif
 
+if ENABLE_LACP_PLUGIN
+include lacp.am
+endif
+
 if ENABLE_LB_PLUGIN
 include lb.am
 endif
diff --git a/src/plugins/lacp.am b/src/plugins/lacp.am
new file mode 100644 (file)
index 0000000..c7e571d
--- /dev/null
@@ -0,0 +1,47 @@
+# Copyright (c) 2017 Cisco Systems, Inc.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+vppplugins_LTLIBRARIES += lacp_plugin.la
+vppapitestplugins_LTLIBRARIES += lacp_test_plugin.la
+
+lacp_plugin_la_LIBADD =
+lacp_plugin_la_SOURCES = lacp/lacp.c   \
+  lacp/lacp_api.c                  \
+  lacp/selection.c                 \
+  lacp/rx_machine.c                \
+  lacp/tx_machine.c                \
+  lacp/mux_machine.c               \
+  lacp/ptx_machine.c               \
+  lacp/cli.c                      \
+  lacp/input.c                    \
+  lacp/node.c
+
+lacp_test_plugin_la_SOURCES = \
+  lacp/lacp_test.c
+
+noinst_HEADERS += lacp/protocol.h  \
+  lacp/machine.h                   \
+  lacp/rx_machine.h                \
+  lacp/tx_machine.h                \
+  lacp/mux_machine.h               \
+  lacp/ptx_machine.h               \
+  lacp/node.h
+
+nobase_apiinclude_HEADERS +=       \
+  lacp/lacp_all_api_h.h            \
+  lacp/lacp_msg_enum.h             \
+  lacp/lacp.api.h
+
+API_FILES += lacp/lacp.api
+
+# vi:syntax=automake
diff --git a/src/plugins/lacp/cli.c b/src/plugins/lacp/cli.c
new file mode 100644 (file)
index 0000000..1062777
--- /dev/null
@@ -0,0 +1,393 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define _GNU_SOURCE
+#include <vnet/bonding/node.h>
+#include <lacp/node.h>
+
+int
+lacp_dump_ifs (lacp_interface_details_t ** out_lacpifs)
+{
+  vnet_main_t *vnm = vnet_get_main ();
+  bond_main_t *bm = &bond_main;
+  slave_if_t *sif;
+  bond_if_t *bif;
+  vnet_hw_interface_t *hi;
+  lacp_interface_details_t *r_lacpifs = NULL;
+  lacp_interface_details_t *lacpif = NULL;
+
+  /* *INDENT-OFF* */
+  pool_foreach (sif, bm->neighbors,
+    if ((sif->port_enabled == 0) || (sif->lacp_enabled == 0))
+      continue;
+    vec_add2(r_lacpifs, lacpif, 1);
+    memset (lacpif, 0, sizeof (*lacpif));
+    lacpif->sw_if_index = sif->sw_if_index;
+    hi = vnet_get_hw_interface (vnm, sif->hw_if_index);
+    clib_memcpy(lacpif->interface_name, hi->name,
+                MIN (ARRAY_LEN (lacpif->interface_name) - 1,
+                     strlen ((const char *) hi->name)));
+    bif = bond_get_master_by_dev_instance (sif->bif_dev_instance);
+    hi = vnet_get_hw_interface (vnm, bif->hw_if_index);
+    clib_memcpy(lacpif->bond_interface_name, hi->name,
+                MIN (ARRAY_LEN (lacpif->bond_interface_name) - 1,
+                     strlen ((const char *) hi->name)));
+    clib_memcpy (lacpif->actor_system, sif->actor.system, 6);
+    lacpif->actor_system_priority = sif->actor.system_priority;
+    lacpif->actor_key = sif->actor.key;
+    lacpif->actor_port_priority = sif->actor.port_priority;
+    lacpif->actor_port_number = sif->actor.port_number;
+    lacpif->actor_state = sif->actor.state;
+    clib_memcpy (lacpif->partner_system, sif->partner.system, 6);
+    lacpif->partner_system_priority = sif->partner.system_priority;
+    lacpif->partner_key = sif->partner.key;
+    lacpif->partner_port_priority = sif->partner.port_priority;
+    lacpif->partner_port_number = sif->partner.port_number;
+    lacpif->partner_state = sif->partner.state;
+    lacpif->rx_state = sif->rx_state;
+    lacpif->tx_state = sif->tx_state;
+    lacpif->ptx_state = sif->ptx_state;
+    lacpif->mux_state = sif->mux_state;
+  );
+  /* *INDENT-ON* */
+
+  *out_lacpifs = r_lacpifs;
+
+  return 0;
+}
+
+static void
+show_lacp (vlib_main_t * vm, u32 * sw_if_indices)
+{
+  int i;
+  slave_if_t *sif;
+  bond_if_t *bif;
+
+  if (!sw_if_indices)
+    return;
+
+  vlib_cli_output (vm, "%-55s %-32s %-32s", " ", "actor state",
+                  "partner state");
+  vlib_cli_output (vm, "%-25s %-12s %-16s %-31s  %-31s", "interface name",
+                  "sw_if_index", "bond interface",
+                  "exp/def/dis/col/syn/agg/tim/act",
+                  "exp/def/dis/col/syn/agg/tim/act");
+
+  for (i = 0; i < vec_len (sw_if_indices); i++)
+    {
+      sif = bond_get_slave_by_sw_if_index (sw_if_indices[i]);
+      if (!sif || (sif->port_enabled == 0) || (sif->lacp_enabled == 0))
+       continue;
+      bif = bond_get_master_by_dev_instance (sif->bif_dev_instance);
+      vlib_cli_output (vm,
+                      "%-25U %-12d %-16U %3x %3x %3x %3x %3x %3x %3x %3x "
+                      "%4x %3x %3x %3x %3x %3x %3x %3x",
+                      format_vnet_sw_if_index_name, vnet_get_main (),
+                      sif->sw_if_index, sif->sw_if_index,
+                      format_vnet_sw_if_index_name, vnet_get_main (),
+                      bif->sw_if_index, lacp_bit_test (sif->actor.state, 7),
+                      lacp_bit_test (sif->actor.state, 6),
+                      lacp_bit_test (sif->actor.state, 5),
+                      lacp_bit_test (sif->actor.state, 4),
+                      lacp_bit_test (sif->actor.state, 3),
+                      lacp_bit_test (sif->actor.state, 2),
+                      lacp_bit_test (sif->actor.state, 1),
+                      lacp_bit_test (sif->actor.state, 0),
+                      lacp_bit_test (sif->partner.state, 7),
+                      lacp_bit_test (sif->partner.state, 6),
+                      lacp_bit_test (sif->partner.state, 5),
+                      lacp_bit_test (sif->partner.state, 4),
+                      lacp_bit_test (sif->partner.state, 3),
+                      lacp_bit_test (sif->partner.state, 2),
+                      lacp_bit_test (sif->partner.state, 1),
+                      lacp_bit_test (sif->partner.state, 0));
+      vlib_cli_output (vm,
+                      "  LAG ID: "
+                      "[(%04x,%02x-%02x-%02x-%02x-%02x-%02x,%04x,%04x,%04x), "
+                      "(%04x,%02x-%02x-%02x-%02x-%02x-%02x,%04x,%04x,%04x)]",
+                      ntohs (sif->actor.system_priority),
+                      sif->actor.system[0], sif->actor.system[1],
+                      sif->actor.system[2], sif->actor.system[3],
+                      sif->actor.system[4], sif->actor.system[5],
+                      ntohs (sif->actor.key),
+                      ntohs (sif->actor.port_priority),
+                      ntohs (sif->actor.port_number),
+                      ntohs (sif->partner.system_priority),
+                      sif->partner.system[0], sif->partner.system[1],
+                      sif->partner.system[2], sif->partner.system[3],
+                      sif->partner.system[4], sif->partner.system[5],
+                      ntohs (sif->partner.key),
+                      ntohs (sif->partner.port_priority),
+                      ntohs (sif->partner.port_number));
+      vlib_cli_output (vm,
+                      "  RX-state: %U, TX-state: %U, "
+                      "MUX-state: %U, PTX-state: %U",
+                      format_rx_sm_state, sif->rx_state, format_tx_sm_state,
+                      sif->tx_state, format_mux_sm_state, sif->mux_state,
+                      format_ptx_sm_state, sif->ptx_state);
+    }
+}
+
+static void
+show_lacp_details (vlib_main_t * vm, u32 * sw_if_indices)
+{
+  slave_if_t *sif;
+  lacp_state_struct *state_entry;
+  int i;
+  f64 now;
+
+  if (!sw_if_indices)
+    return;
+
+  now = vlib_time_now (vm);
+  for (i = 0; i < vec_len (sw_if_indices); i++)
+    {
+      sif = bond_get_slave_by_sw_if_index (sw_if_indices[i]);
+      if (!sif || (sif->port_enabled == 0) || (sif->lacp_enabled == 0))
+       continue;
+      vlib_cli_output (vm, "  %U", format_vnet_sw_if_index_name,
+                      vnet_get_main (), sif->sw_if_index);
+      vlib_cli_output (vm, "    debug: %d", sif->debug);
+      vlib_cli_output (vm, "    loopback port: %d", sif->loopback_port);
+      vlib_cli_output (vm, "    port moved: %d", sif->port_moved);
+      vlib_cli_output (vm, "    ready_n: %d", sif->ready_n);
+      vlib_cli_output (vm, "    ready: %d", sif->ready);
+      vlib_cli_output (vm, "    Actor");
+      vlib_cli_output (vm, "      system: %U",
+                      format_ethernet_address, sif->actor.system);
+      vlib_cli_output (vm, "      system priority: %u",
+                      ntohs (sif->actor.system_priority));
+      vlib_cli_output (vm, "      key: %u", ntohs (sif->actor.key));
+      vlib_cli_output (vm, "      port priority: %u",
+                      ntohs (sif->actor.port_priority));
+      vlib_cli_output (vm, "      port number: %u",
+                      ntohs (sif->actor.port_number));
+      vlib_cli_output (vm, "      state: 0x%x", sif->actor.state);
+
+      state_entry = (lacp_state_struct *) & lacp_state_array;
+      while (state_entry->str)
+       {
+         if (sif->actor.state & (1 << state_entry->bit))
+           vlib_cli_output (vm, "        %s (%d)", state_entry->str,
+                            state_entry->bit);
+         state_entry++;
+       }
+
+      vlib_cli_output (vm, "    Partner");
+      vlib_cli_output (vm, "      system: %U",
+                      format_ethernet_address, sif->partner.system);
+      vlib_cli_output (vm, "      system priority: %u",
+                      ntohs (sif->partner.system_priority));
+      vlib_cli_output (vm, "      key: %u", ntohs (sif->partner.key));
+      vlib_cli_output (vm, "      port priority: %u",
+                      ntohs (sif->partner.port_priority));
+      vlib_cli_output (vm, "      port number: %u",
+                      ntohs (sif->partner.port_number));
+      vlib_cli_output (vm, "      state: 0x%x", sif->partner.state);
+
+      state_entry = (lacp_state_struct *) & lacp_state_array;
+      while (state_entry->str)
+       {
+         if (sif->partner.state & (1 << state_entry->bit))
+           vlib_cli_output (vm, "        %s (%d)", state_entry->str,
+                            state_entry->bit);
+         state_entry++;
+       }
+
+      if (!lacp_timer_is_running (sif->wait_while_timer))
+       vlib_cli_output (vm, "      wait while timer: not running");
+      else
+       vlib_cli_output (vm, "      wait while timer: %=10.2f seconds",
+                        sif->wait_while_timer - now);
+      if (!lacp_timer_is_running (sif->current_while_timer))
+       vlib_cli_output (vm, "      current while timer: not running");
+      else
+       vlib_cli_output (vm, "      current while timer: %=10.2f seconds",
+                        sif->current_while_timer - now);
+      if (!lacp_timer_is_running (sif->periodic_timer))
+       vlib_cli_output (vm, "      periodic timer: not running");
+      else
+       vlib_cli_output (vm, "      periodic timer: %=10.2f seconds",
+                        sif->periodic_timer - now);
+      vlib_cli_output (vm, "    RX-state: %U", format_rx_sm_state,
+                      sif->rx_state);
+      vlib_cli_output (vm, "    TX-state: %U", format_tx_sm_state,
+                      sif->tx_state);
+      vlib_cli_output (vm, "    MUX-state: %U", format_mux_sm_state,
+                      sif->mux_state);
+      vlib_cli_output (vm, "    PTX-state: %U", format_ptx_sm_state,
+                      sif->ptx_state);
+      vlib_cli_output (vm, "\n");
+    }
+}
+
+static clib_error_t *
+show_lacp_fn (vlib_main_t * vm, unformat_input_t * input,
+             vlib_cli_command_t * cmd)
+{
+  bond_main_t *bm = &bond_main;
+  vnet_main_t *vnm = &vnet_main;
+  slave_if_t *sif;
+  clib_error_t *error = 0;
+  u8 details = 0;
+  u32 hw_if_index, *sw_if_indices = 0;
+  vnet_interface_main_t *im = &vnm->interface_main;
+  vnet_sw_interface_t *sw;
+
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat
+         (input, "%U", unformat_vnet_hw_interface, vnm, &hw_if_index))
+       {
+         sw = pool_elt_at_index (im->sw_interfaces, hw_if_index);
+         sif = bond_get_slave_by_sw_if_index (sw->sw_if_index);
+         if (!sif)
+           {
+             error = clib_error_return (0, "interface is not enslaved");
+             goto done;
+           }
+         vec_add1 (sw_if_indices, sif->sw_if_index);
+       }
+      else if (unformat (input, "details"))
+       details = 1;
+      else
+       {
+         error = clib_error_return (0, "unknown input `%U'",
+                                    format_unformat_error, input);
+         goto done;
+       }
+    }
+
+  if (vec_len (sw_if_indices) == 0)
+    {
+      pool_foreach (sif, bm->neighbors,
+                   vec_add1 (sw_if_indices, sif->sw_if_index);
+       );
+    }
+
+  if (details)
+    show_lacp_details (vm, sw_if_indices);
+  else
+    show_lacp (vm, sw_if_indices);
+
+done:
+  vec_free (sw_if_indices);
+  return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_lacp_command, static) = {
+  .path = "show lacp",
+  .short_help = "show lacp [<interface>] [details]",
+  .function = show_lacp_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+debug_lacp_command_fn (vlib_main_t * vm, unformat_input_t * input,
+                      vlib_cli_command_t * cmd)
+{
+  unformat_input_t _line_input, *line_input = &_line_input;
+  clib_error_t *error = NULL;
+  lacp_main_t *lm = &lacp_main;
+  u8 onoff = 0;
+  u8 input_found = 0;
+  u32 hw_if_index = ~0;
+  slave_if_t *sif;
+  vnet_main_t *vnm = vnet_get_main ();
+  vnet_interface_main_t *im = &vnm->interface_main;
+  vnet_sw_interface_t *sw;
+
+  /* Get a line of input. */
+  if (!unformat_user (input, unformat_line_input, line_input))
+    return clib_error_return (0, "missing argument");
+
+  while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (line_input, "%U",
+                   unformat_vnet_hw_interface, vnm, &hw_if_index))
+       ;
+      if (input_found)
+       {
+         error = clib_error_return (0, "unknown input `%U'",
+                                    format_unformat_error, line_input);
+         goto done;
+       }
+      else if (unformat (line_input, "on"))
+       {
+         input_found = 1;
+         onoff = 1;
+       }
+      else if (unformat (line_input, "off"))
+       {
+         input_found = 1;
+         onoff = 0;
+       }
+      else
+       {
+         error = clib_error_return (0, "unknown input `%U'",
+                                    format_unformat_error, line_input);
+         goto done;
+       }
+    }
+
+  if (!input_found)
+    return clib_error_return (0, "must specify on or off");
+
+  if (hw_if_index != ~0)
+    {
+      sw = pool_elt_at_index (im->sw_interfaces, hw_if_index);
+      sif = bond_get_slave_by_sw_if_index (sw->sw_if_index);
+      if (!sif)
+       return (clib_error_return (0, "Please enslave the interface first"));
+      sif->debug = onoff;
+    }
+  else
+    lm->debug = onoff;
+
+done:
+  unformat_free (line_input);
+
+  return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (debug_lacp_command, static) = {
+    .path = "debug lacp",
+    .short_help = "debug lacp <interface> <on | off>",
+    .function = debug_lacp_command_fn,
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+lacp_cli_init (vlib_main_t * vm)
+{
+  lacp_main_t *lm = &lacp_main;
+
+  lm->vlib_main = vm;
+  lm->vnet_main = vnet_get_main ();
+
+  return 0;
+}
+
+VLIB_INIT_FUNCTION (lacp_cli_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/lacp/input.c b/src/plugins/lacp/input.c
new file mode 100644 (file)
index 0000000..45db3b8
--- /dev/null
@@ -0,0 +1,359 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define _GNU_SOURCE
+#include <vnet/bonding/node.h>
+#include <lacp/node.h>
+
+static int
+lacp_packet_scan (vlib_main_t * vm, slave_if_t * sif)
+{
+  lacp_pdu_t *lacpdu = (lacp_pdu_t *) sif->last_rx_pkt;
+
+  if (lacpdu->subtype != LACP_SUBTYPE)
+    return LACP_ERROR_UNSUPPORTED;
+
+  /*
+   * According to the spec, no checking on the version number and tlv types.
+   * But we may check the tlv lengths.
+   */
+  if ((lacpdu->actor.tlv_length != sizeof (lacp_actor_partner_t)) ||
+      (lacpdu->partner.tlv_length != sizeof (lacp_actor_partner_t)) ||
+      (lacpdu->collector.tlv_length != sizeof (lacp_collector_t)) ||
+      (lacpdu->terminator.tlv_length != 0))
+    return (LACP_ERROR_BAD_TLV);
+
+  lacp_machine_dispatch (&lacp_rx_machine, vm, sif,
+                        LACP_RX_EVENT_PDU_RECEIVED, &sif->rx_state);
+
+  return LACP_ERROR_NONE;
+}
+
+static void
+marker_fill_pdu (marker_pdu_t * marker, slave_if_t * sif)
+{
+  marker_pdu_t *pkt = (marker_pdu_t *) sif->last_marker_pkt;
+
+  marker->marker_info = pkt->marker_info;
+  marker->marker_info.tlv_type = MARKER_RESPONSE_INFORMATION;
+}
+
+void
+marker_fill_request_pdu (marker_pdu_t * marker, slave_if_t * sif)
+{
+  marker->marker_info.tlv_type = MARKER_INFORMATION;
+  marker->marker_info.requester_port = sif->actor.port_number;
+  clib_memcpy (marker->marker_info.requester_system, sif->actor.system, 6);
+  marker->marker_info.requester_transaction_id = sif->marker_tx_id;
+  sif->marker_tx_id++;
+}
+
+static void
+send_ethernet_marker_response_pdu (slave_if_t * sif)
+{
+  lacp_main_t *lm = &lacp_main;
+  u32 *to_next;
+  ethernet_marker_pdu_t *h0;
+  vnet_hw_interface_t *hw;
+  u32 bi0;
+  vlib_buffer_t *b0;
+  vlib_frame_t *f;
+  vlib_main_t *vm = lm->vlib_main;
+  vnet_main_t *vnm = lm->vnet_main;
+
+  /*
+   * see lacp_periodic_init() to understand what's already painted
+   * into the buffer by the packet template mechanism
+   */
+  h0 = vlib_packet_template_get_packet
+    (vm, &lm->marker_packet_templates[sif->packet_template_index], &bi0);
+
+  if (!h0)
+    return;
+
+  /* Add the interface's ethernet source address */
+  hw = vnet_get_sup_hw_interface (vnm, sif->sw_if_index);
+
+  clib_memcpy (h0->ethernet.src_address, hw->hw_address,
+              vec_len (hw->hw_address));
+
+  marker_fill_pdu (&h0->marker, sif);
+
+  /* Set the outbound packet length */
+  b0 = vlib_get_buffer (vm, bi0);
+  b0->current_length = sizeof (ethernet_marker_pdu_t);
+  b0->current_data = 0;
+  b0->total_length_not_including_first_buffer = 0;
+
+  /* And the outbound interface */
+  vnet_buffer (b0)->sw_if_index[VLIB_TX] = hw->sw_if_index;
+
+  /* And output the packet on the correct interface */
+  f = vlib_get_frame_to_node (vm, hw->output_node_index);
+
+  to_next = vlib_frame_vector_args (f);
+  to_next[0] = bi0;
+  f->n_vectors = 1;
+
+  vlib_put_frame_to_node (vm, hw->output_node_index, f);
+}
+
+static int
+handle_marker_protocol (vlib_main_t * vm, slave_if_t * sif)
+{
+  marker_pdu_t *marker = (marker_pdu_t *) sif->last_marker_pkt;
+
+  /*
+   * According to the spec, no checking on the version number and tlv types.
+   * But we may check the tlv lengths.
+   */
+  if ((marker->marker_info.tlv_length != sizeof (marker_information_t)) ||
+      (marker->terminator.tlv_length != 0))
+    return (LACP_ERROR_BAD_TLV);
+
+  send_ethernet_marker_response_pdu (sif);
+
+  return LACP_ERROR_NONE;
+}
+
+/*
+ * lacp input routine
+ */
+lacp_error_t
+lacp_input (vlib_main_t * vm, vlib_buffer_t * b0, u32 bi0)
+{
+  lacp_main_t *lm = &lacp_main;
+  slave_if_t *sif;
+  uword nbytes;
+  lacp_error_t e;
+  marker_pdu_t *marker;
+  uword last_packet_signature;
+  bond_if_t *bif;
+
+  sif =
+    bond_get_slave_by_sw_if_index (vnet_buffer (b0)->sw_if_index[VLIB_RX]);
+  if ((sif == 0) || (sif->mode != BOND_MODE_LACP))
+    {
+      return LACP_ERROR_DISABLED;
+    }
+
+  /* Handle marker protocol */
+  marker = (marker_pdu_t *) (b0->data + b0->current_data);
+  if (marker->subtype == MARKER_SUBTYPE)
+    {
+      if (sif->last_marker_pkt)
+       _vec_len (sif->last_marker_pkt) = 0;
+      vec_validate (sif->last_marker_pkt,
+                   vlib_buffer_length_in_chain (vm, b0) - 1);
+      nbytes = vlib_buffer_contents (vm, bi0, sif->last_marker_pkt);
+      ASSERT (nbytes <= vec_len (sif->last_marker_pkt));
+      if (nbytes < sizeof (lacp_pdu_t))
+       return LACP_ERROR_TOO_SMALL;
+      return (handle_marker_protocol (vm, sif));
+    }
+
+  /*
+   * typical clib idiom. Don't repeatedly allocate and free
+   * the per-neighbor rx buffer. Reset its apparent length to zero
+   * and reuse it.
+   */
+  if (sif->last_rx_pkt)
+    _vec_len (sif->last_rx_pkt) = 0;
+
+  /*
+   * Make sure the per-neighbor rx buffer is big enough to hold
+   * the data we're about to copy
+   */
+  vec_validate (sif->last_rx_pkt, vlib_buffer_length_in_chain (vm, b0) - 1);
+
+  /*
+   * Coalesce / copy the buffer chain into the per-neighbor
+   * rx buffer
+   */
+  nbytes = vlib_buffer_contents (vm, bi0, sif->last_rx_pkt);
+  ASSERT (nbytes <= vec_len (sif->last_rx_pkt));
+
+  if (nbytes < sizeof (lacp_pdu_t))
+    {
+      return LACP_ERROR_TOO_SMALL;
+    }
+
+  last_packet_signature =
+    hash_memory (sif->last_rx_pkt, vec_len (sif->last_rx_pkt), 0xd00b);
+
+  bif = bond_get_master_by_dev_instance (sif->bif_dev_instance);
+  if (sif->last_packet_signature_valid &&
+      (sif->last_packet_signature == last_packet_signature) &&
+      hash_get (bif->active_slave_by_sw_if_index, sif->sw_if_index))
+    {
+      lacp_start_current_while_timer (lm->vlib_main, sif,
+                                     sif->ttl_in_seconds);
+      e = LACP_ERROR_CACHE_HIT;
+    }
+  else
+    {
+      /* Actually scan the packet */
+      e = lacp_packet_scan (vm, sif);
+      sif->last_packet_signature_valid = 1;
+      sif->last_packet_signature = last_packet_signature;
+    }
+
+  if (sif->last_rx_pkt)
+    _vec_len (sif->last_rx_pkt) = 0;
+
+  return e;
+}
+
+/*
+ * setup neighbor hash table
+ */
+static clib_error_t *
+lacp_init (vlib_main_t * vm)
+{
+  clib_error_t *error;
+
+  if ((error = vlib_call_init_function (vm, lacp_periodic_init)))
+    return error;
+
+  return 0;
+}
+
+VLIB_INIT_FUNCTION (lacp_init);
+
+/*
+ * packet trace format function, very similar to
+ * lacp_packet_scan except that we call the per TLV format
+ * functions instead of the per TLV processing functions
+ */
+u8 *
+lacp_input_format_trace (u8 * s, va_list * args)
+{
+  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+  lacp_input_trace_t *t = va_arg (*args, lacp_input_trace_t *);
+  lacp_pdu_t *lacpdu = &t->pkt.lacpdu;
+  marker_pdu_t *marker = &t->pkt.marker;
+  int i, len;
+  u8 *p;
+  lacp_state_struct *state_entry;
+
+  s = format (s, "Length: %d\n", t->len);
+  if (t->len >= sizeof (lacp_pdu_t))
+    {
+      switch (lacpdu->subtype)
+       {
+       case MARKER_SUBTYPE:
+         if (marker->version_number == MARKER_PROTOCOL_VERSION)
+           s = format (s, "  Markerv1\n");
+         else
+           s = format (s, "  Subtype %u, Version %u\n", marker->subtype,
+                       marker->version_number);
+         s = format (s, "  Marker Information TLV: type %u\n",
+                     marker->marker_info.tlv_type);
+         s = format (s, "  Marker Information TLV: length %u\n",
+                     marker->marker_info.tlv_length);
+         s = format (s, "  Requester port: %u\n",
+                     marker->marker_info.requester_port);
+         s = format (s, "  Requester system: %U\n", format_ethernet_address,
+                     marker->marker_info.requester_system);
+         s = format (s, "  Requester transaction ID: %u\n",
+                     marker->marker_info.requester_transaction_id);
+         break;
+
+       case LACP_SUBTYPE:
+         if (lacpdu->version_number == LACP_ACTOR_LACP_VERSION)
+           s = format (s, "  LACPv1\n");
+         else
+           s = format (s, "  Subtype %u, Version %u\n", lacpdu->subtype,
+                       lacpdu->version_number);
+         s = format (s, "  Actor Information TLV: length %u\n",
+                     lacpdu->actor.tlv_length);
+         s = format (s, "    System %U\n", format_ethernet_address,
+                     lacpdu->actor.port_info.system);
+         s = format (s, "    System priority %u\n",
+                     ntohs (lacpdu->actor.port_info.system_priority));
+         s = format (s, "    Key %u\n", ntohs (lacpdu->actor.port_info.key));
+         s = format (s, "    Port priority %u\n",
+                     ntohs (lacpdu->actor.port_info.port_priority));
+         s = format (s, "    Port number %u\n",
+                     ntohs (lacpdu->actor.port_info.port_number));
+         s = format (s, "    State 0x%x\n", lacpdu->actor.port_info.state);
+         state_entry = (lacp_state_struct *) & lacp_state_array;
+         while (state_entry->str)
+           {
+             if (lacpdu->actor.port_info.state & (1 << state_entry->bit))
+               s = format (s, "      %s (%d)\n", state_entry->str,
+                           state_entry->bit);
+             state_entry++;
+           }
+
+         s = format (s, "  Partner Information TLV: length %u\n",
+                     lacpdu->partner.tlv_length);
+         s = format (s, "    System %U\n", format_ethernet_address,
+                     lacpdu->partner.port_info.system);
+         s = format (s, "    System priority %u\n",
+                     ntohs (lacpdu->partner.port_info.system_priority));
+         s =
+           format (s, "    Key %u\n", ntohs (lacpdu->partner.port_info.key));
+         s =
+           format (s, "    Port priority %u\n",
+                   ntohs (lacpdu->partner.port_info.port_priority));
+         s =
+           format (s, "    Port number %u\n",
+                   ntohs (lacpdu->partner.port_info.port_number));
+         s = format (s, "    State 0x%x\n", lacpdu->partner.port_info.state);
+         state_entry = (lacp_state_struct *) & lacp_state_array;
+         while (state_entry->str)
+           {
+             if (lacpdu->partner.port_info.state & (1 << state_entry->bit))
+               s = format (s, "      %s (%d)\n", state_entry->str,
+                           state_entry->bit);
+             state_entry++;
+           }
+         break;
+
+       default:
+         break;
+       }
+    }
+
+  if (t->len > sizeof (lacp_pdu_t))
+    len = sizeof (lacp_pdu_t);
+  else
+    len = t->len;
+  p = (u8 *) lacpdu;
+  for (i = 0; i < len; i++)
+    {
+      if ((i % 16) == 0)
+       {
+         if (i)
+           s = format (s, "\n");
+         s = format (s, "  0x%04x: ", i);
+       }
+      if ((i % 2) == 0)
+       s = format (s, " ");
+      s = format (s, "%02x", p[i]);
+    }
+
+  return s;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/lacp/lacp.api b/src/plugins/lacp/lacp.api
new file mode 100644 (file)
index 0000000..9eb5c7e
--- /dev/null
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** \file
+
+    This file defines vpe control-plane API messages for
+    the bonding device driver
+*/
+
+option version = "1.0.0";
+
+/** \brief Dump lacp interfaces request */
+define sw_interface_lacp_dump
+{
+  u32 client_index;
+  u32 context;
+};
+
+/** \brief Reply for lacp dump request
+    @param sw_if_index - software index of slave interface
+    @param interface_name - name of slave interface
+    @param rx_state - rx machine state
+    @param tx_state - tx machine state
+    @param mux_state - mux machine state
+    @param ptx_state - ptx machine state
+    @param bond_interface_name - name of bond interface
+    @param actor_system_priority - actor system priority
+    @param actor_system - actor system
+    @param actor_key - actor key
+    @param actor_port_priority - actor port priority
+    @param actor_port_number - actor port number
+    @param actor_state - actor state
+    @param partner_system_priority - partner system priority
+    @param partner_system - partner system
+    @param partner_key - partner key
+    @param partner_port_priority - partner port priority
+    @param partner_port_number - partner port number
+    @param partner_state - partner state
+*/
+define sw_interface_lacp_details
+{
+  u32 context;
+  u32 sw_if_index;
+  u8 interface_name[64];
+  u32 rx_state;
+  u32 tx_state;
+  u32 mux_state;
+  u32 ptx_state;
+  u8 bond_interface_name[64];
+  u16 actor_system_priority;
+  u8 actor_system[6];
+  u16 actor_key;
+  u16 actor_port_priority;
+  u16 actor_port_number;
+  u8 actor_state;
+  u16 partner_system_priority;
+  u8 partner_system[6];
+  u16 partner_key;
+  u16 partner_port_priority;
+  u16 partner_port_number;
+  u8 partner_state;
+};
+
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/lacp/lacp.c b/src/plugins/lacp/lacp.c
new file mode 100644 (file)
index 0000000..5fe505a
--- /dev/null
@@ -0,0 +1,428 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdint.h>
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+#include <vppinfra/hash.h>
+#include <vnet/bonding/node.h>
+#include <lacp/node.h>
+
+lacp_main_t lacp_main;
+
+/*
+ * Generate lacp pdu
+ */
+static void
+lacp_fill_pdu (lacp_pdu_t * lacpdu, slave_if_t * sif)
+{
+  /* Actor TLV */
+  lacpdu->actor.port_info = sif->actor;
+
+  /* Partner TLV */
+  lacpdu->partner.port_info = sif->partner;
+}
+
+/*
+ * send a lacp pkt on an ethernet interface
+ */
+static void
+lacp_send_ethernet_lacp_pdu (slave_if_t * sif)
+{
+  lacp_main_t *lm = &lacp_main;
+  u32 *to_next;
+  ethernet_lacp_pdu_t *h0;
+  vnet_hw_interface_t *hw;
+  u32 bi0;
+  vlib_buffer_t *b0;
+  vlib_frame_t *f;
+  vlib_main_t *vm = lm->vlib_main;
+  vnet_main_t *vnm = lm->vnet_main;
+
+  /*
+   * see lacp_periodic_init() to understand what's already painted
+   * into the buffer by the packet template mechanism
+   */
+  h0 = vlib_packet_template_get_packet
+    (vm, &lm->packet_templates[sif->packet_template_index], &bi0);
+
+  if (!h0)
+    return;
+
+  /* Add the interface's ethernet source address */
+  hw = vnet_get_sup_hw_interface (vnm, sif->sw_if_index);
+
+  clib_memcpy (h0->ethernet.src_address, hw->hw_address,
+              vec_len (hw->hw_address));
+
+  lacp_fill_pdu (&h0->lacp, sif);
+
+  /* Set the outbound packet length */
+  b0 = vlib_get_buffer (vm, bi0);
+  b0->current_length = sizeof (ethernet_lacp_pdu_t);
+  b0->current_data = 0;
+  b0->total_length_not_including_first_buffer = 0;
+
+  /* And the outbound interface */
+  vnet_buffer (b0)->sw_if_index[VLIB_TX] = hw->sw_if_index;
+
+  /* And output the packet on the correct interface */
+  f = vlib_get_frame_to_node (vm, hw->output_node_index);
+
+  to_next = vlib_frame_vector_args (f);
+  to_next[0] = bi0;
+  f->n_vectors = 1;
+
+  vlib_put_frame_to_node (vm, hw->output_node_index, f);
+
+  sif->last_lacpdu_time = vlib_time_now (vm);
+}
+
+/*
+ * Decide which lacp packet template to use
+ */
+static int
+lacp_pick_packet_template (slave_if_t * sif)
+{
+  sif->packet_template_index = LACP_PACKET_TEMPLATE_ETHERNET;
+
+  return 0;
+}
+
+void
+lacp_send_lacp_pdu (vlib_main_t * vm, slave_if_t * sif)
+{
+  lacp_main_t *lm = &lacp_main;
+
+  if (sif->mode != BOND_MODE_LACP)
+    {
+      lacp_stop_timer (&sif->periodic_timer);
+      return;
+    }
+
+  if (sif->packet_template_index == (u8) ~ 0)
+    {
+      /* If we don't know how to talk to this peer, don't try again */
+      if (lacp_pick_packet_template (sif))
+       {
+         lacp_stop_timer (&sif->periodic_timer);
+         return;
+       }
+    }
+
+  switch (sif->packet_template_index)
+    {
+    case LACP_PACKET_TEMPLATE_ETHERNET:
+      lacp_send_ethernet_lacp_pdu (sif);
+      break;
+
+    default:
+      ASSERT (0);
+    }
+
+  lacp_start_periodic_timer (lm->vlib_main, sif, sif->is_long_timeout ?
+                            LACP_SLOW_PERIODIC_TIMER :
+                            LACP_FAST_PERIODIC_TIMER);
+}
+
+void
+lacp_periodic (vlib_main_t * vm)
+{
+  bond_main_t *bm = &bond_main;
+  lacp_main_t *lm = &lacp_main;
+  slave_if_t *sif;
+
+  /* *INDENT-OFF* */
+  pool_foreach (sif, bm->neighbors,
+  ({
+    if (sif->port_enabled == 0)
+      continue;
+
+    if (lacp_timer_is_running (sif->current_while_timer) &&
+       lacp_timer_is_expired (lm->vlib_main, sif->current_while_timer))
+      {
+        lacp_machine_dispatch (&lacp_rx_machine, vm, sif,
+                              LACP_RX_EVENT_TIMER_EXPIRED, &sif->rx_state);
+      }
+
+    if (lacp_timer_is_running (sif->periodic_timer) &&
+       lacp_timer_is_expired (lm->vlib_main, sif->periodic_timer))
+      {
+        lacp_machine_dispatch (&lacp_ptx_machine, vm, sif,
+                              LACP_PTX_EVENT_TIMER_EXPIRED, &sif->ptx_state);
+      }
+    if (lacp_timer_is_running (sif->wait_while_timer) &&
+       lacp_timer_is_expired (lm->vlib_main, sif->wait_while_timer))
+      {
+       sif->ready_n = 1;
+        lacp_stop_timer (&sif->wait_while_timer);
+        lacp_selection_logic (vm, sif);
+      }
+  }));
+  /* *INDENT-ON* */
+}
+
+static void
+lacp_interface_enable_disable (vlib_main_t * vm, bond_if_t * bif,
+                              slave_if_t * sif, u8 enable)
+{
+  lacp_main_t *lm = &lacp_main;
+  uword port_number;
+
+  if (enable)
+    {
+      port_number = clib_bitmap_first_clear (bif->port_number_bitmap);
+      bif->port_number_bitmap = clib_bitmap_set (bif->port_number_bitmap,
+                                                port_number, 1);
+      // bitmap starts at 0. Our port number starts at 1.
+      lacp_init_neighbor (sif, bif->hw_address, port_number + 1, sif->group);
+      lacp_init_state_machines (vm, sif);
+      lm->lacp_int++;
+      if (lm->lacp_int == 1)
+       {
+         vlib_process_signal_event (vm, lm->lacp_process_node_index,
+                                    LACP_PROCESS_EVENT_START, 0);
+       }
+    }
+  else
+    {
+      lm->lacp_int--;
+      if (lm->lacp_int == 0)
+       {
+         vlib_process_signal_event (vm, lm->lacp_process_node_index,
+                                    LACP_PROCESS_EVENT_STOP, 0);
+       }
+    }
+}
+
+static clib_error_t *
+lacp_periodic_init (vlib_main_t * vm)
+{
+  lacp_main_t *lm = &lacp_main;
+  ethernet_lacp_pdu_t h;
+  ethernet_marker_pdu_t m;
+  u8 dst[] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x02 };
+
+  /* initialize binary API */
+  lacp_plugin_api_hookup (vm);
+
+  /* Create the ethernet lacp packet template */
+
+  memset (&h, 0, sizeof (h));
+
+  memcpy (h.ethernet.dst_address, dst, sizeof (h.ethernet.dst_address));
+
+  /* leave src address blank (fill in at send time) */
+
+  h.ethernet.type = htons (ETHERNET_TYPE_SLOW_PROTOCOLS);
+
+  h.lacp.subtype = LACP_SUBTYPE;
+  h.lacp.version_number = LACP_ACTOR_LACP_VERSION;
+
+  /* Actor TLV */
+  h.lacp.actor.tlv_type = LACP_ACTOR_INFORMATION;
+  h.lacp.actor.tlv_length = sizeof (lacp_actor_partner_t);
+
+  /* Partner TLV */
+  h.lacp.partner.tlv_type = LACP_PARTNER_INFORMATION;
+  h.lacp.partner.tlv_length = sizeof (lacp_actor_partner_t);
+
+  /* Collector TLV */
+  h.lacp.collector.tlv_type = LACP_COLLECTOR_INFORMATION;
+  h.lacp.collector.tlv_length = sizeof (lacp_collector_t);
+  h.lacp.collector.max_delay = 0;
+
+  /* Terminator TLV */
+  h.lacp.terminator.tlv_type = LACP_TERMINATOR_INFORMATION;
+  h.lacp.terminator.tlv_length = 0;
+
+  vlib_packet_template_init
+    (vm, &lm->packet_templates[LACP_PACKET_TEMPLATE_ETHERNET],
+     /* data */ &h,
+     sizeof (h),
+     /* alloc chunk size */ 8,
+     "lacp-ethernet");
+
+  /* Create the ethernet marker protocol packet template */
+
+  memset (&m, 0, sizeof (m));
+
+  memcpy (m.ethernet.dst_address, dst, sizeof (m.ethernet.dst_address));
+
+  /* leave src address blank (fill in at send time) */
+
+  m.ethernet.type = htons (ETHERNET_TYPE_SLOW_PROTOCOLS);
+
+  m.marker.subtype = MARKER_SUBTYPE;
+  m.marker.version_number = MARKER_PROTOCOL_VERSION;
+
+  m.marker.marker_info.tlv_length = sizeof (marker_information_t);
+
+  /* Terminator TLV */
+  m.marker.terminator.tlv_type = MARKER_TERMINATOR_INFORMATION;
+  m.marker.terminator.tlv_length = 0;
+
+  vlib_packet_template_init
+    (vm, &lm->marker_packet_templates[MARKER_PACKET_TEMPLATE_ETHERNET],
+     /* data */ &m,
+     sizeof (m),
+     /* alloc chunk size */ 8,
+     "marker-ethernet");
+
+  bond_register_callback (lacp_interface_enable_disable);
+
+  return 0;
+}
+
+int
+lacp_machine_dispatch (lacp_machine_t * machine, vlib_main_t * vm,
+                      slave_if_t * sif, int event, int *state)
+{
+  lacp_fsm_state_t *transition;
+  int rc = 0;
+
+  transition = &machine->tables[*state].state_table[event];
+  LACP_DBG2 (sif, event, *state, machine, transition);
+  *state = transition->next_state;
+  if (transition->action)
+    rc = (*transition->action) ((void *) vm, (void *) sif);
+
+  return rc;
+}
+
+void
+lacp_init_neighbor (slave_if_t * sif, u8 * hw_address, u16 port_number,
+                   u32 group)
+{
+  lacp_stop_timer (&sif->wait_while_timer);
+  lacp_stop_timer (&sif->current_while_timer);
+  lacp_stop_timer (&sif->actor_churn_timer);
+  lacp_stop_timer (&sif->partner_churn_timer);
+  lacp_stop_timer (&sif->periodic_timer);
+  lacp_stop_timer (&sif->last_lacpdu_time);
+  sif->lacp_enabled = 1;
+  sif->loopback_port = 0;
+  sif->ready = 0;
+  sif->ready_n = 0;
+  sif->port_moved = 0;
+  sif->ntt = 0;
+  sif->selected = LACP_PORT_UNSELECTED;
+  sif->actor.state = LACP_STATE_AGGREGATION;
+  if (sif->ttl_in_seconds == LACP_SHORT_TIMOUT_TIME)
+    sif->actor.state |= LACP_STATE_LACP_TIMEOUT;
+  if (sif->is_passive == 0)
+    sif->actor.state |= LACP_STATE_LACP_ACTIVITY;
+  clib_memcpy (sif->actor.system, hw_address, 6);
+  sif->actor.system_priority = htons (LACP_DEFAULT_SYSTEM_PRIORITY);
+  sif->actor.key = htons (group);
+  sif->actor.port_number = htons (port_number);
+  sif->actor.port_priority = htons (LACP_DEFAULT_PORT_PRIORITY);
+
+  sif->partner.system_priority = htons (LACP_DEFAULT_SYSTEM_PRIORITY);
+  sif->partner.key = htons (group);
+  sif->partner.port_number = htons (port_number);
+  sif->partner.port_priority = htons (LACP_DEFAULT_PORT_PRIORITY);
+  sif->partner.key = htons (group);
+  sif->partner.state = LACP_STATE_LACP_ACTIVITY;
+
+  sif->actor_admin = sif->actor;
+  sif->partner_admin = sif->partner;
+}
+
+void
+lacp_init_state_machines (vlib_main_t * vm, slave_if_t * sif)
+{
+  lacp_init_tx_machine (vm, sif);
+  lacp_init_mux_machine (vm, sif);
+  lacp_init_ptx_machine (vm, sif);
+  lacp_init_rx_machine (vm, sif);
+}
+
+VLIB_INIT_FUNCTION (lacp_periodic_init);
+
+static clib_error_t *
+lacp_sw_interface_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
+{
+  lacp_main_t *lm = &lacp_main;
+  slave_if_t *sif;
+  vlib_main_t *vm = lm->vlib_main;
+
+  sif = bond_get_slave_by_sw_if_index (sw_if_index);
+  if (sif)
+    {
+      sif->port_enabled = flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP;
+      if (sif->port_enabled == 0)
+       {
+         if (sif->lacp_enabled)
+           {
+             lacp_init_state_machines (vm, sif);
+             lacp_init_neighbor (sif, sif->actor_admin.system,
+                                 ntohs (sif->actor_admin.port_number),
+                                 ntohs (sif->actor_admin.key));
+           }
+       }
+    }
+
+  return 0;
+}
+
+VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (lacp_sw_interface_up_down);
+
+static clib_error_t *
+lacp_hw_interface_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
+{
+  lacp_main_t *lm = &lacp_main;
+  slave_if_t *sif;
+  vnet_sw_interface_t *sw;
+  vlib_main_t *vm = lm->vlib_main;
+  vnet_interface_main_t *im = &vnm->interface_main;
+
+  sw = pool_elt_at_index (im->sw_interfaces, hw_if_index);
+  sif = bond_get_slave_by_sw_if_index (sw->sw_if_index);
+  if (sif)
+    {
+      if (!(flags & VNET_HW_INTERFACE_FLAG_LINK_UP))
+       {
+         if (sif->lacp_enabled)
+           {
+             lacp_init_state_machines (vm, sif);
+             lacp_init_neighbor (sif, sif->actor_admin.system,
+                                 ntohs (sif->actor_admin.port_number),
+                                 ntohs (sif->actor_admin.key));
+           }
+       }
+    }
+
+  return 0;
+}
+
+VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION (lacp_hw_interface_up_down);
+
+/* *INDENT-OFF* */
+VLIB_PLUGIN_REGISTER () = {
+    .version = VPP_BUILD_VER,
+    .description = "Link Aggregation Control Protocol",
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/lacp/lacp_all_api_h.h b/src/plugins/lacp/lacp_all_api_h.h
new file mode 100644 (file)
index 0000000..188c8fd
--- /dev/null
@@ -0,0 +1,18 @@
+/*
+ * lacp_all_api_h.h - plug-in api #include file
+ *
+ * Copyright (c) <current-year> <your-organization>
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/* Include the generated file, see BUILT_SOURCES in Makefile.am */
+#include <lacp/lacp.api.h>
diff --git a/src/plugins/lacp/lacp_api.c b/src/plugins/lacp/lacp_api.c
new file mode 100644 (file)
index 0000000..129c360
--- /dev/null
@@ -0,0 +1,217 @@
+/*
+ *------------------------------------------------------------------
+ * lacp_api.c - lacp api
+ *
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vlib/unix/unix.h>
+#include <lacp/node.h>
+
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+
+
+/* define message IDs */
+#include <lacp/lacp_msg_enum.h>
+
+/* define message structures */
+#define vl_typedefs
+#include <lacp/lacp_all_api_h.h>
+#undef vl_typedefs
+
+/* define generated endian-swappers */
+#define vl_endianfun
+#include <lacp/lacp_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <lacp/lacp_all_api_h.h>
+#undef vl_printfun
+
+/* Get the API version number */
+#define vl_api_version(n,v) static u32 api_version=(v);
+#include <lacp/lacp_all_api_h.h>
+#undef vl_api_version
+
+/*
+ * A handy macro to set up a message reply.
+ * Assumes that the following variables are available:
+ * mp - pointer to request message
+ * rmp - pointer to reply message type
+ * rv - return value
+ */
+#define REPLY_MACRO(t)                                          \
+do {                                                            \
+    svm_queue_t * q =                            \
+    vl_api_client_index_to_input_queue (mp->client_index);      \
+    if (!q)                                                     \
+        return;                                                 \
+                                                                \
+    rmp = vl_msg_api_alloc (sizeof (*rmp));                     \
+    rmp->_vl_msg_id = htons ((t)+lm->msg_id_base);              \
+    rmp->context = mp->context;                                 \
+    rmp->retval = htonl (rv);                                   \
+                                                                \
+    vl_msg_api_send_shmem (q, (u8 *)&rmp);                      \
+} while(0);
+
+#define REPLY_MACRO2(t, body)                                   \
+do {                                                            \
+    svm_queue_t * q =                            \
+    vl_api_client_index_to_input_queue (mp->client_index);      \
+    if (!q)                                                     \
+        return;                                                 \
+                                                                \
+    rmp = vl_msg_api_alloc (sizeof (*rmp));                     \
+    rmp->_vl_msg_id = htons ((t)+lm->msg_id_base);              \
+    rmp->context = mp->context;                                 \
+    rmp->retval = htonl (rv);                                   \
+    do {body;} while (0);                                       \
+    vl_msg_api_send_shmem (q, (u8 *)&rmp);                      \
+} while(0);
+
+#define foreach_lacp_plugin_api_msg                            \
+_(SW_INTERFACE_LACP_DUMP, sw_interface_lacp_dump)
+
+static void
+lacp_send_sw_interface_details (vl_api_registration_t * reg,
+                               lacp_interface_details_t * lacp_if,
+                               u32 context)
+{
+  lacp_main_t *lm = &lacp_main;
+  vl_api_sw_interface_lacp_details_t *mp;
+
+  mp = vl_msg_api_alloc (sizeof (*mp));
+  memset (mp, 0, sizeof (*mp));
+  mp->_vl_msg_id = htons (VL_API_SW_INTERFACE_LACP_DETAILS + lm->msg_id_base);
+  mp->sw_if_index = htonl (lacp_if->sw_if_index);
+
+  /* These fields in network order already */
+  mp->actor_system_priority = lacp_if->actor_system_priority;
+  mp->actor_key = lacp_if->actor_key;
+  mp->actor_port_priority = lacp_if->actor_port_priority;
+  mp->actor_port_number = lacp_if->actor_port_number;
+  mp->actor_state = lacp_if->actor_state;
+  clib_memcpy (mp->actor_system, lacp_if->actor_system, 6);
+  mp->partner_system_priority = lacp_if->partner_system_priority;
+  mp->partner_key = lacp_if->partner_key;
+  mp->partner_port_priority = lacp_if->partner_port_priority;
+  mp->partner_port_number = lacp_if->partner_port_number;
+  mp->partner_state = lacp_if->partner_state;
+
+  clib_memcpy (mp->partner_system, lacp_if->partner_system, 6);
+  clib_memcpy (mp->interface_name, lacp_if->interface_name,
+              MIN (ARRAY_LEN (mp->interface_name) - 1,
+                   strlen ((const char *) lacp_if->interface_name)));
+  clib_memcpy (mp->bond_interface_name, lacp_if->bond_interface_name,
+              MIN (ARRAY_LEN (mp->bond_interface_name) - 1,
+                   strlen ((const char *) lacp_if->bond_interface_name)));
+  mp->rx_state = htonl (lacp_if->rx_state);
+  mp->tx_state = htonl (lacp_if->tx_state);
+  mp->mux_state = htonl (lacp_if->mux_state);
+  mp->ptx_state = htonl (lacp_if->ptx_state);
+
+  mp->context = context;
+  vl_api_send_msg (reg, (u8 *) mp);
+}
+
+/**
+ * @brief Message handler for lacp_dump API.
+ * @param mp vl_api_lacp_dump_t * mp the api message
+ */
+void
+vl_api_sw_interface_lacp_dump_t_handler (vl_api_sw_interface_lacp_dump_t * mp)
+{
+  int rv;
+  vl_api_registration_t *reg;
+  lacp_interface_details_t *lacpifs = NULL;
+  lacp_interface_details_t *lacp_if = NULL;
+
+  reg = vl_api_client_index_to_registration (mp->client_index);
+  if (!reg)
+    return;
+
+  rv = lacp_dump_ifs (&lacpifs);
+  if (rv)
+    return;
+
+  vec_foreach (lacp_if, lacpifs)
+  {
+    lacp_send_sw_interface_details (reg, lacp_if, mp->context);
+  }
+
+  vec_free (lacpifs);
+}
+
+#define vl_msg_name_crc_list
+#include <lacp/lacp_all_api_h.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (lacp_main_t * lm, api_main_t * am)
+{
+#define _(id,n,crc) \
+  vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id + lm->msg_id_base);
+  foreach_vl_msg_name_crc_lacp;
+#undef _
+}
+
+/* Set up the API message handling tables */
+clib_error_t *
+lacp_plugin_api_hookup (vlib_main_t * vm)
+{
+  lacp_main_t *lm = &lacp_main;
+  api_main_t *am = &api_main;
+  u8 *name;
+
+  /* Construct the API name */
+  name = format (0, "lacp_%08x%c", api_version, 0);
+
+  /* Ask for a correctly-sized block of API message decode slots */
+  lm->msg_id_base = vl_msg_api_get_msg_ids
+    ((char *) name, VL_MSG_FIRST_AVAILABLE);
+
+#define _(N,n)                                                  \
+    vl_msg_api_set_handlers((VL_API_##N + lm->msg_id_base),     \
+                           #n,                                  \
+                           vl_api_##n##_t_handler,              \
+                           vl_noop_handler,                     \
+                           vl_api_##n##_t_endian,               \
+                           vl_api_##n##_t_print,                \
+                           sizeof(vl_api_##n##_t), 1);
+  foreach_lacp_plugin_api_msg;
+#undef _
+
+  /*
+   * Set up the (msg_name, crc, message-id) table
+   */
+  setup_message_id_table (lm, am);
+
+  vec_free (name);
+  return 0;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/lacp/lacp_doc.md b/src/plugins/lacp/lacp_doc.md
new file mode 100644 (file)
index 0000000..f196e0a
--- /dev/null
@@ -0,0 +1,92 @@
+# VPP Link Aggregation Control Protocol (LACP) implementation    {#lacp_doc}
+
+This document is to describe the usage of VPP LACP implementation.
+
+
+## LACP
+
+The Link Aggregation Control Protocol (LACP) is an 802.3ad standard which
+provides a protocol for exchanging information between Partner Systems on a
+link to allow their protocol instances to reach agreement on the Link Aggregation
+Group to which the link belongs and enable transmission and reception for the
+higher layer. Multiple links may be bundled to the same Aggregation Group to form
+a high bandwidth transmission medium and create a fault-tolerant link.
+
+
+### Configuration
+
+1. Create the bond interface
+create bond mode lacp [hw-addr <mac-address>] [load-balance { l2 | l23 | l34 }]
+
+2. Enslave the physical interface to the bond
+enslave interface <interface> to <bond-interface-name> [passive] [long-timeout]"
+
+3. Delete the bond interface
+delete bond {<interface> | sw_if_index <sw_idx>}
+
+4. Detach the slave interface from the bond
+detach interface <interface>
+
+### Configuration example
+
+create bond mode lacp
+set interface state BondEthernet0 up
+enslave interface TenGigabitEthernet7/0/0 to BondEthernet1
+enslave interface TenGigabitEthernet7/0/1 to BondEthernet1
+enslave interface TenGigabitEthernet5/0/0 to BondEthernet1
+enslave interface TenGigabitEthernet5/0/1 to BondEthernet1
+
+detach interface TenGigabitEthernet5/0/1
+
+delete bond BondEthernet0
+
+### Operational data
+
+show lacp [<interface>] [details]
+
+Example:
+
+show lacp
+
+
+DBGvpp# sh lacp
+sh lacp
+                                                        actor state                      partner state
+interface name            sw_if_index  bond interface   exp/def/dis/col/syn/agg/tim/act  exp/def/dis/col/syn/agg/tim/act
+GigabitEthernet2/0/1      1            BondEthernet0      0   0   1   1   1   1   1   1    0   0   1   1   1   1   1   1
+  LAG ID: [(ffff,e4-c7-22-f3-26-71,0000,00ff,0001), (ffff,fc-99-47-4a-0c-8b,0009,00ff,0001)]
+  RX-state: CURRENT, TX-state: TRANSMIT, MUX-state: COLLECTING_DISTRIBUTING, PTX-state: PERIODIC_TX
+TenGigabitEthernet4/0/0   2            BondEthernet1      0   0   1   1   1   1   1   1    0   0   1   1   1   1   0   1
+  LAG ID: [(ffff,90-e2-ba-76-cf-2d,0001,00ff,0001), (8000,00-2a-6a-e5-50-c1,0140,8000,011d)]
+  RX-state: CURRENT, TX-state: TRANSMIT, MUX-state: COLLECTING_DISTRIBUTING, PTX-state: PERIODIC_TX
+TenGigabitEthernet4/0/1   3            BondEthernet1      0   0   1   1   1   1   1   1    0   0   1   1   1   1   0   1
+  LAG ID: [(ffff,90-e2-ba-76-cf-2d,0001,00ff,0002), (8000,00-2a-6a-e5-50-c1,0140,8000,011e)]
+  RX-state: CURRENT, TX-state: TRANSMIT, MUX-state: COLLECTING_DISTRIBUTING, PTX-state: PERIODIC_TX
+TenGigabitEthernet8/0/1   7            BondEthernet1      0   0   1   1   1   1   1   1    0   0   1   1   1   1   0   1
+  LAG ID: [(ffff,90-e2-ba-76-cf-2d,0001,00ff,0003), (8000,00-2a-6a-e5-50-01,007a,8000,0114)]
+  RX-state: CURRENT, TX-state: TRANSMIT, MUX-state: COLLECTING_DISTRIBUTING, PTX-state: PERIODIC_TX
+TenGigabitEthernet8/0/0   6            BondEthernet1      0   0   1   1   1   1   1   1    0   0   1   1   1   1   0   1
+  LAG ID: [(ffff,90-e2-ba-76-cf-2d,0001,00ff,0004), (8000,00-2a-6a-e5-50-01,007a,8000,0115)]
+  RX-state: CURRENT, TX-state: TRANSMIT, MUX-state: COLLECTING_DISTRIBUTING, PTX-state: PERIODIC_TX
+TenGigabitEthernet6/0/1   5            BondEthernet2      0   0   1   1   1   1   1   1    0   0   1   1   1   1   1   1
+  LAG ID: [(ffff,90-e2-ba-36-31-21,0002,00ff,0001), (ffff,90-e2-ba-29-f5-31,000f,00ff,0002)]
+  RX-state: CURRENT, TX-state: TRANSMIT, MUX-state: COLLECTING_DISTRIBUTING, PTX-state: PERIODIC_TX
+TenGigabitEthernet6/0/0   4            BondEthernet2      0   0   1   1   1   1   1   1    0   0   1   1   1   1   1   1
+  LAG ID: [(ffff,90-e2-ba-36-31-21,0002,00ff,0002), (ffff,90-e2-ba-29-f5-31,000f,00ff,0001)]
+  RX-state: CURRENT, TX-state: TRANSMIT, MUX-state: COLLECTING_DISTRIBUTING, PTX-state: PERIODIC_TX
+DBGvpp#
+
+show bond [details]
+
+
+DBGvpp# sh bond
+sh bond
+interface name   sw_if_index   mode         load balance  active slaves  slaves
+BondEthernet0    10            lacp         l2            1              1
+BondEthernet1    11            lacp         l34           4              4
+BondEthernet2    12            lacp         l23           2              2
+DBGvpp#
+
+### Debugging
+
+debug lacp [<interface>] <on | off>
\ No newline at end of file
diff --git a/src/plugins/lacp/lacp_msg_enum.h b/src/plugins/lacp/lacp_msg_enum.h
new file mode 100644 (file)
index 0000000..138683f
--- /dev/null
@@ -0,0 +1,31 @@
+/*
+ * lacp_msg_enum.h - vpp engine plug-in message enumeration
+ *
+ * Copyright (c) <current-year> <your-organization>
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_lacp_msg_enum_h
+#define included_lacp_msg_enum_h
+
+#include <vppinfra/byte_order.h>
+
+#define vl_msg_id(n,h) n,
+typedef enum
+{
+#include <lacp/lacp_all_api_h.h>
+  /* We'll want to know how many messages IDs we need... */
+  VL_MSG_FIRST_AVAILABLE,
+} vl_msg_id_t;
+#undef vl_msg_id
+
+#endif /* included_lacp_msg_enum_h */
diff --git a/src/plugins/lacp/lacp_test.c b/src/plugins/lacp/lacp_test.c
new file mode 100644 (file)
index 0000000..0a8631d
--- /dev/null
@@ -0,0 +1,231 @@
+/*
+ * lacp VAT support
+ *
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <inttypes.h>
+
+#include <vat/vat.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+
+#include <vppinfra/error.h>
+#include <lacp/node.h>
+
+#define __plugin_msg_base lacp_test_main.msg_id_base
+#include <vlibapi/vat_helper_macros.h>
+
+/* declare message IDs */
+#include <lacp/lacp_msg_enum.h>
+
+/* Get CRC codes of the messages defined outside of this plugin */
+#define vl_msg_name_crc_list
+#include <vpp/api/vpe_all_api_h.h>
+#undef vl_msg_name_crc_list
+
+/* define message structures */
+#define vl_typedefs
+#include <vpp/api/vpe_all_api_h.h>
+#include <lacp/lacp_all_api_h.h>
+#undef vl_typedefs
+
+/* declare message handlers for each api */
+
+#define vl_endianfun           /* define message structures */
+#include <lacp/lacp_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...)
+#define vl_printfun
+#include <lacp/lacp_all_api_h.h>
+#undef vl_printfun
+
+/* Get the API version number. */
+#define vl_api_version(n,v) static u32 api_version=(v);
+#include <lacp/lacp_all_api_h.h>
+#undef vl_api_version
+
+typedef struct
+{
+  /* API message ID base */
+  u16 msg_id_base;
+  u32 ping_id;
+  vat_main_t *vat_main;
+} lacp_test_main_t;
+
+lacp_test_main_t lacp_test_main;
+
+/*
+ * Table of message reply handlers, must include boilerplate handlers
+ * we just generated
+ */
+#define foreach_vpe_api_reply_msg                       \
+_(SW_INTERFACE_LACP_DETAILS, sw_interface_lacp_details)
+
+/* lacp-dump API */
+static void vl_api_sw_interface_lacp_details_t_handler
+  (vl_api_sw_interface_lacp_details_t * mp)
+{
+  vat_main_t *vam = &vat_main;
+
+  fformat (vam->ofp,
+          "%-25s %-12d %-16s %3x %3x %3x %3x %3x %3x %3x %3x "
+          "%4x %3x %3x %3x %3x %3x %3x %3x\n",
+          mp->interface_name, ntohl (mp->sw_if_index),
+          mp->bond_interface_name,
+          lacp_bit_test (mp->actor_state, 7),
+          lacp_bit_test (mp->actor_state, 6),
+          lacp_bit_test (mp->actor_state, 5),
+          lacp_bit_test (mp->actor_state, 4),
+          lacp_bit_test (mp->actor_state, 3),
+          lacp_bit_test (mp->actor_state, 2),
+          lacp_bit_test (mp->actor_state, 1),
+          lacp_bit_test (mp->actor_state, 0),
+          lacp_bit_test (mp->partner_state, 7),
+          lacp_bit_test (mp->partner_state, 6),
+          lacp_bit_test (mp->partner_state, 5),
+          lacp_bit_test (mp->partner_state, 4),
+          lacp_bit_test (mp->partner_state, 3),
+          lacp_bit_test (mp->partner_state, 2),
+          lacp_bit_test (mp->partner_state, 1),
+          lacp_bit_test (mp->partner_state, 0));
+  fformat (vam->ofp,
+          "  LAG ID: [(%04x,%02x-%02x-%02x-%02x-%02x-%02x,%04x,%04x,%04x), "
+          "(%04x,%02x-%02x-%02x-%02x-%02x-%02x,%04x,%04x,%04x)]\n",
+          ntohs (mp->actor_system_priority), mp->actor_system[0],
+          mp->actor_system[1], mp->actor_system[2], mp->actor_system[3],
+          mp->actor_system[4], mp->actor_system[5], ntohs (mp->actor_key),
+          ntohs (mp->actor_port_priority), ntohs (mp->actor_port_number),
+          ntohs (mp->partner_system_priority), mp->partner_system[0],
+          mp->partner_system[1], mp->partner_system[2],
+          mp->partner_system[3], mp->partner_system[4],
+          mp->partner_system[5], ntohs (mp->partner_key),
+          ntohs (mp->partner_port_priority),
+          ntohs (mp->partner_port_number));
+  fformat (vam->ofp,
+          "  RX-state: %U, TX-state: %U, MUX-state: %U, PTX-state: %U\n",
+          format_rx_sm_state, ntohl (mp->rx_state), format_tx_sm_state,
+          ntohl (mp->tx_state), format_mux_sm_state, ntohl (mp->mux_state),
+          format_ptx_sm_state, ntohl (mp->ptx_state));
+}
+
+static int
+api_sw_interface_lacp_dump (vat_main_t * vam)
+{
+  lacp_test_main_t *lm = &lacp_test_main;
+  vl_api_sw_interface_lacp_dump_t *mp;
+  vl_api_control_ping_t *mp_ping;
+  int ret;
+
+  if (vam->json_output)
+    {
+      clib_warning ("JSON output not supported for sw_interface_lacp_dump");
+      return -99;
+    }
+
+  fformat (vam->ofp, "%-55s %-32s %-32s\n", " ", "actor state",
+          "partner state");
+  fformat (vam->ofp, "%-25s %-12s %-16s %-31s  %-31s\n", "interface name",
+          "sw_if_index", "bond interface", "exp/def/dis/col/syn/agg/tim/act",
+          "exp/def/dis/col/syn/agg/tim/act");
+
+  /* Get list of lacp interfaces */
+  M (SW_INTERFACE_LACP_DUMP, mp);
+  S (mp);
+
+  /* Use a control ping for synchronization */
+  mp_ping = vl_msg_api_alloc_as_if_client (sizeof (*mp_ping));
+  mp_ping->_vl_msg_id = htons (lm->ping_id);
+  mp_ping->client_index = vam->my_client_index;
+
+  fformat (vam->ofp, "Sending ping id=%d\n", lm->ping_id);
+
+  vam->result_ready = 0;
+  S (mp_ping);
+
+  W (ret);
+  return ret;
+}
+
+/*
+ * List of messages that the api test plugin sends,
+ * and that the data plane plugin processes
+ */
+#define foreach_vpe_api_msg                                      \
+_(sw_interface_lacp_dump, "")
+
+static void
+lacp_vat_api_hookup (vat_main_t * vam)
+{
+  lacp_test_main_t *lm __attribute__ ((unused)) = &lacp_test_main;
+  /* Hook up handlers for replies from the data plane plug-in */
+#define _(N,n)                                                  \
+  vl_msg_api_set_handlers((VL_API_##N + lm->msg_id_base),       \
+                          #n,                                   \
+                          vl_api_##n##_t_handler,               \
+                          vl_noop_handler,                      \
+                          vl_api_##n##_t_endian,                \
+                          vl_api_##n##_t_print,                 \
+                          sizeof(vl_api_##n##_t), 1);
+  foreach_vpe_api_reply_msg;
+#undef _
+
+  /* API messages we can send */
+#define _(n,h)                                          \
+  hash_set_mem (vam->function_by_name, #n, api_##n);
+  foreach_vpe_api_msg;
+#undef _
+
+  /* Help strings */
+#define _(n,h) hash_set_mem (vam->help_by_name, #n, h);
+  foreach_vpe_api_msg;
+#undef _
+}
+
+clib_error_t *
+vat_plugin_register (vat_main_t * vam)
+{
+  lacp_test_main_t *lm = &lacp_test_main;
+  u8 *name;
+
+  lm->vat_main = vam;
+
+  /* Ask the vpp engine for the first assigned message-id */
+  name = format (0, "lacp_%08x%c", api_version, 0);
+  lm->msg_id_base = vl_client_get_first_plugin_msg_id ((char *) name);
+
+  /* Get the control ping ID */
+#define _(id,n,crc) \
+  const char *id ## _CRC __attribute__ ((unused)) = #n "_" #crc;
+  foreach_vl_msg_name_crc_vpe;
+#undef _
+  lm->ping_id = vl_msg_api_get_msg_index ((u8 *) (VL_API_CONTROL_PING_CRC));
+
+  if (lm->msg_id_base != (u16) ~ 0)
+    lacp_vat_api_hookup (vam);
+
+  vec_free (name);
+
+  return 0;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/lacp/machine.h b/src/plugins/lacp/machine.h
new file mode 100644 (file)
index 0000000..0590b6c
--- /dev/null
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef        __LACP_MACHINE_H__
+#define        __LACP_MACHINE_H__
+
+#include <stdint.h>
+
+#define LACP_NOACTION  ((int (*)(void *, void *))0)
+#define LACP_ACTION_ROUTINE(rtn) ((int(*)(void *, void *))rtn)
+
+typedef int (*action_func) (void *, void *);
+
+typedef struct
+{
+  action_func action;
+  int next_state;
+} lacp_fsm_state_t;
+
+typedef void (*debug_func) (slave_if_t * sif, int event, int state,
+                           lacp_fsm_state_t * transition);
+
+typedef struct
+{
+  lacp_fsm_state_t *state_table;
+} lacp_fsm_machine_t;
+
+typedef struct
+{
+  lacp_fsm_machine_t *tables;
+  debug_func debug;
+} lacp_machine_t;
+
+extern int lacp_machine_dispatch (lacp_machine_t * machine, vlib_main_t * vm,
+                                 slave_if_t * sif, int event, int *state);
+
+#endif /* __LACP_MACHINE_H__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/lacp/mux_machine.c b/src/plugins/lacp/mux_machine.c
new file mode 100644 (file)
index 0000000..f33c264
--- /dev/null
@@ -0,0 +1,236 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define _GNU_SOURCE
+
+#include <vlib/vlib.h>
+#include <vnet/bonding/node.h>
+#include <lacp/node.h>
+
+/*
+ *  LACP State = DETACHED
+ */
+static lacp_fsm_state_t lacp_mux_state_detached[] = {
+  {LACP_ACTION_DETACHED, LACP_MUX_STATE_DETACHED},     // event 0 BEGIN
+  {LACP_ACTION_WAITING, LACP_MUX_STATE_WAITING},       // event 1 SELECTED
+  {LACP_ACTION_WAITING, LACP_MUX_STATE_WAITING},       // event 2 STANDBY
+  {LACP_ACTION_DETACHED, LACP_MUX_STATE_DETACHED},     // event 3 UNSELECTED
+  {LACP_ACTION_DETACHED, LACP_MUX_STATE_DETACHED},     // event 4 READY
+  {LACP_ACTION_DETACHED, LACP_MUX_STATE_DETACHED},     // event 5 SYNC
+};
+
+/*
+ *  LACP State = WAITING
+ */
+static lacp_fsm_state_t lacp_mux_state_waiting[] = {
+  {LACP_ACTION_DETACHED, LACP_MUX_STATE_DETACHED},     // event 0 BEGIN
+  {LACP_ACTION_WAITING, LACP_MUX_STATE_WAITING},       // event 1 SELECTED
+  {LACP_ACTION_WAITING, LACP_MUX_STATE_WAITING},       // event 2 STANDBY
+  {LACP_ACTION_DETACHED, LACP_MUX_STATE_DETACHED},     // event 3 UNSELECTED
+  {LACP_ACTION_ATTACHED, LACP_MUX_STATE_ATTACHED},     // event 4 READY
+  {LACP_ACTION_WAITING, LACP_MUX_STATE_WAITING},       // event 5 SYNC
+};
+
+/*
+ *  LACP State = ATTACHED
+ */
+static lacp_fsm_state_t lacp_mux_state_attached[] = {
+  {LACP_ACTION_DETACHED, LACP_MUX_STATE_DETACHED},     // event 0 BEGIN
+  {LACP_ACTION_ATTACHED, LACP_MUX_STATE_ATTACHED},     // event 1 SELECTED
+  {LACP_ACTION_DETACHED, LACP_MUX_STATE_DETACHED},     // event 2 STANDBY
+  {LACP_ACTION_DETACHED, LACP_MUX_STATE_DETACHED},     // event 3 UNSELECTED
+  {LACP_ACTION_ATTACHED, LACP_MUX_STATE_ATTACHED},     // event 4 READY
+  {LACP_ACTION_COLLECTING_DISTRIBUTING, LACP_MUX_STATE_COLLECTING_DISTRIBUTING},       // event 5_SYNC
+};
+
+/*
+ *  LACP State = COLLECTING_DISTRIBUTING
+ */
+static lacp_fsm_state_t lacp_mux_state_collecting_distributing[] = {
+  {LACP_ACTION_DETACHED, LACP_MUX_STATE_DETACHED},     // event 0 BEGIN
+  {LACP_ACTION_COLLECTING_DISTRIBUTING, LACP_MUX_STATE_COLLECTING_DISTRIBUTING},       // event 1 SELECTED
+  {LACP_ACTION_COLLECTING_DISTRIBUTING, LACP_MUX_STATE_COLLECTING_DISTRIBUTING},       // event 2 STANDBY
+  {LACP_ACTION_ATTACHED, LACP_MUX_STATE_ATTACHED},     // event 3 UNSELECTED
+  {LACP_ACTION_COLLECTING_DISTRIBUTING, LACP_MUX_STATE_COLLECTING_DISTRIBUTING},       // event 4 READY
+  {LACP_ACTION_COLLECTING_DISTRIBUTING, LACP_MUX_STATE_COLLECTING_DISTRIBUTING},       // event 5 SYNC
+};
+
+static lacp_fsm_machine_t lacp_mux_fsm_table[] = {
+  {lacp_mux_state_detached},
+  {lacp_mux_state_waiting},
+  {lacp_mux_state_attached},
+  {lacp_mux_state_collecting_distributing},
+};
+
+lacp_machine_t lacp_mux_machine = {
+  lacp_mux_fsm_table,
+  lacp_mux_debug_func,
+};
+
+static void
+lacp_detach_mux_from_aggregator (vlib_main_t * vm, slave_if_t * sif)
+{
+  sif->actor.state &= ~LACP_STATE_SYNCHRONIZATION;
+  sif->ready = 0;
+  sif->ready_n = 0;
+}
+
+static void
+lacp_attach_mux_to_aggregator (vlib_main_t * vm, slave_if_t * sif)
+{
+  sif->actor.state |= LACP_STATE_SYNCHRONIZATION;
+}
+
+int
+lacp_mux_action_detached (void *p1, void *p2)
+{
+  vlib_main_t *vm = (vlib_main_t *) p1;
+  slave_if_t *sif = (slave_if_t *) p2;
+
+  lacp_detach_mux_from_aggregator (vm, sif);
+  sif->actor.state &= ~LACP_STATE_COLLECTING;
+  bond_disable_collecting_distributing (vm, sif);
+  sif->actor.state &= ~LACP_STATE_DISTRIBUTING;
+  sif->ntt = 1;
+  lacp_machine_dispatch (&lacp_tx_machine, vm, sif, LACP_TX_EVENT_NTT,
+                        &sif->tx_state);
+
+  if (sif->selected == LACP_PORT_SELECTED)
+    lacp_machine_dispatch (&lacp_mux_machine, vm, sif,
+                          LACP_MUX_EVENT_SELECTED, &sif->mux_state);
+
+  if (sif->selected == LACP_PORT_STANDBY)
+    lacp_machine_dispatch (&lacp_mux_machine, vm, sif, LACP_MUX_EVENT_STANDBY,
+                          &sif->mux_state);
+
+  return 0;
+}
+
+int
+lacp_mux_action_attached (void *p1, void *p2)
+{
+  vlib_main_t *vm = (vlib_main_t *) p1;
+  slave_if_t *sif = (slave_if_t *) p2;
+
+  lacp_attach_mux_to_aggregator (vm, sif);
+  sif->actor.state &= ~LACP_STATE_COLLECTING;
+  bond_disable_collecting_distributing (vm, sif);
+  sif->actor.state &= ~LACP_STATE_DISTRIBUTING;
+  sif->ntt = 1;
+  lacp_machine_dispatch (&lacp_tx_machine, vm, sif, LACP_TX_EVENT_NTT,
+                        &sif->tx_state);
+
+  if ((sif->selected == LACP_PORT_UNSELECTED) ||
+      (sif->selected == LACP_PORT_STANDBY))
+    lacp_machine_dispatch (&lacp_mux_machine, vm, sif,
+                          LACP_MUX_EVENT_UNSELECTED, &sif->mux_state);
+
+  if ((sif->selected == LACP_PORT_SELECTED) &&
+      (sif->partner.state & LACP_STATE_SYNCHRONIZATION))
+    lacp_machine_dispatch (&lacp_mux_machine, vm, sif, LACP_MUX_EVENT_SYNC,
+                          &sif->mux_state);
+  return 0;
+}
+
+int
+lacp_mux_action_waiting (void *p1, void *p2)
+{
+  vlib_main_t *vm = (vlib_main_t *) p1;
+  slave_if_t *sif = (slave_if_t *) p2;
+  lacp_main_t *lm = &lacp_main;
+
+  if (!lacp_timer_is_running (sif->wait_while_timer))
+    lacp_start_wait_while_timer (lm->vlib_main, sif,
+                                LACP_AGGREGATE_WAIT_TIME);
+
+  if ((sif->selected == LACP_PORT_SELECTED) && sif->ready)
+    lacp_machine_dispatch (&lacp_mux_machine, vm, sif,
+                          LACP_MUX_EVENT_READY, &sif->mux_state);
+
+  if (sif->selected == LACP_PORT_UNSELECTED)
+    lacp_machine_dispatch (&lacp_mux_machine, vm, sif,
+                          LACP_MUX_EVENT_UNSELECTED, &sif->mux_state);
+
+  return 0;
+}
+
+int
+lacp_mux_action_collecting_distributing (void *p1, void *p2)
+{
+  vlib_main_t *vm = (vlib_main_t *) p1;
+  slave_if_t *sif = (slave_if_t *) p2;
+
+  sif->actor.state |= LACP_STATE_SYNCHRONIZATION | LACP_STATE_COLLECTING |
+    LACP_STATE_DISTRIBUTING;
+  bond_enable_collecting_distributing (vm, sif);
+  sif->ntt = 1;
+  lacp_machine_dispatch (&lacp_tx_machine, vm, sif, LACP_TX_EVENT_NTT,
+                        &sif->tx_state);
+  if ((sif->selected == LACP_PORT_UNSELECTED) ||
+      (sif->selected == LACP_PORT_STANDBY) ||
+      !(sif->partner.state & LACP_STATE_SYNCHRONIZATION))
+    lacp_machine_dispatch (&lacp_mux_machine, vm, sif,
+                          LACP_MUX_EVENT_UNSELECTED, &sif->mux_state);
+
+
+  return 0;
+}
+
+static u8 *
+format_mux_event (u8 * s, va_list * args)
+{
+  static lacp_event_struct lacp_mux_event_array[] = {
+#define _(b, s, n) {.bit = b, .str = #s, },
+    foreach_lacp_mux_event
+#undef _
+    {.str = NULL}
+  };
+  int e = va_arg (*args, int);
+  lacp_event_struct *event_entry =
+    (lacp_event_struct *) & lacp_mux_event_array;
+
+  if (e >= (sizeof (lacp_mux_event_array) / sizeof (*event_entry)))
+    s = format (s, "Bad event %d", e);
+  else
+    s = format (s, "%s", event_entry[e].str);
+
+  return s;
+}
+
+void
+lacp_mux_debug_func (slave_if_t * sif, int event, int state,
+                    lacp_fsm_state_t * transition)
+{
+  clib_warning ("%U-MUX: event %U, old state %U, new state %U",
+               format_vnet_sw_if_index_name, vnet_get_main (),
+               sif->sw_if_index, format_mux_event,
+               event, format_mux_sm_state, state, format_mux_sm_state,
+               transition->next_state);
+}
+
+void
+lacp_init_mux_machine (vlib_main_t * vm, slave_if_t * sif)
+{
+  lacp_machine_dispatch (&lacp_mux_machine, vm, sif, LACP_MUX_EVENT_BEGIN,
+                        &sif->mux_state);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/lacp/mux_machine.h b/src/plugins/lacp/mux_machine.h
new file mode 100644 (file)
index 0000000..48e9a0b
--- /dev/null
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef        __LACP_MUX_MACHINE_H__
+#define        __LACP_MUX_MACHINE_H__
+
+#include <stdint.h>
+#include <lacp/machine.h>
+
+#define foreach_lacp_mux_event          \
+  _(0, BEGIN, "begin")                  \
+  _(1, SELECTED, "selected")            \
+  _(2, STANDBY, "standby")              \
+  _(3, UNSELECTED, "unselected")        \
+  _(4, READY, "ready")                  \
+  _(5, SYNC, "sync")
+
+typedef enum
+{
+#define _(a, b, c) LACP_MUX_EVENT_##b = (a),
+  foreach_lacp_mux_event
+#undef _
+} lacp_mux_event_t;
+
+#define foreach_lacp_mux_sm_state       \
+  _(0, DETACHED, "detached")            \
+  _(1, WAITING, "waiting")              \
+  _(2, ATTACHED, "attached")            \
+  _(3, COLLECTING_DISTRIBUTING, "collecting distributing")
+
+typedef enum
+{
+#define _(a, b, c) LACP_MUX_STATE_##b = (a),
+  foreach_lacp_mux_sm_state
+#undef _
+} lacp_mux_sm_state_t;
+
+extern lacp_machine_t lacp_mux_machine;
+
+int lacp_mux_action_detached (void *p1, void *p2);
+int lacp_mux_action_attached (void *p1, void *p2);
+int lacp_mux_action_waiting (void *p1, void *p2);
+int lacp_mux_action_collecting_distributing (void *p1, void *p2);
+void lacp_mux_debug_func (slave_if_t * sif, int event, int state,
+                         lacp_fsm_state_t * transition);
+
+#define LACP_ACTION_DETACHED LACP_ACTION_ROUTINE(lacp_mux_action_detached)
+#define LACP_ACTION_ATTACHED LACP_ACTION_ROUTINE(lacp_mux_action_attached)
+#define LACP_ACTION_WAITING LACP_ACTION_ROUTINE(lacp_mux_action_waiting)
+#define LACP_ACTION_COLLECTING_DISTRIBUTING \
+  LACP_ACTION_ROUTINE(lacp_mux_action_collecting_distributing)
+
+static inline void
+lacp_start_wait_while_timer (vlib_main_t * vm, slave_if_t * sif,
+                            u8 expiration)
+{
+  sif->wait_while_timer = vlib_time_now (vm) + expiration;
+}
+
+#endif /* __LACP_MUX_MACHINE_H__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/lacp/node.c b/src/plugins/lacp/node.c
new file mode 100644 (file)
index 0000000..8eb7887
--- /dev/null
@@ -0,0 +1,209 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define _GNU_SOURCE
+#include <vnet/bonding/node.h>
+#include <vnet/ethernet/packet.h>
+#include <lacp/node.h>
+
+lacp_state_struct lacp_state_array[] = {
+#define _(b, s, n) {.bit = b, .str = #s, },
+  foreach_lacp_state_flag
+#undef _
+  {.str = NULL}
+};
+
+static vlib_node_registration_t lacp_process_node;
+
+/** \file
+
+    2 x LACP graph nodes: an "interior" node to process
+    incoming announcements, and a "process" node to periodically
+    send announcements.
+
+    The interior node is neither pipelined nor dual-looped, because
+    it would be very unusual to see more than one LACP packet in
+    a given input frame. So, it's a very simple / straighforward
+    example.
+*/
+
+/*
+ * packet counter strings
+ * Dump these counters via the "show error" CLI command
+ */
+static char *lacp_error_strings[] = {
+#define _(sym,string) string,
+  foreach_lacp_error
+#undef _
+};
+
+/*
+ * We actually send all lacp pkts to the "error" node after scanning
+ * them, so the graph node has only one next-index. The "error-drop"
+ * node automatically bumps our per-node packet counters for us.
+ */
+typedef enum
+{
+  LACP_INPUT_NEXT_NORMAL,
+  LACP_INPUT_N_NEXT,
+} lacp_next_t;
+
+/*
+ * Process a frame of lacp packets
+ * Expect 1 packet / frame
+ */
+static uword
+lacp_node_fn (vlib_main_t * vm,
+             vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+  u32 n_left_from, *from;
+  lacp_input_trace_t *t0;
+  uword n_trace = vlib_get_trace_count (vm, node);
+
+  from = vlib_frame_vector_args (frame);       /* array of buffer indices */
+  n_left_from = frame->n_vectors;      /* number of buffer indices */
+
+  while (n_left_from > 0)
+    {
+      u32 bi0;
+      vlib_buffer_t *b0;
+      u32 next0, error0;
+
+      bi0 = from[0];
+      b0 = vlib_get_buffer (vm, bi0);
+
+      next0 = LACP_INPUT_NEXT_NORMAL;
+
+      /* scan this lacp pkt. error0 is the counter index to bump */
+      error0 = lacp_input (vm, b0, bi0);
+      b0->error = node->errors[error0];
+
+      /* If this pkt is traced, snapshoot the data */
+      if (PREDICT_FALSE (n_trace > 0))
+       {
+         int len;
+         vlib_trace_buffer (vm, node, next0, b0,
+                            /* follow_chain */ 0);
+         vlib_set_trace_count (vm, node, --n_trace);
+         t0 = vlib_add_trace (vm, node, b0, sizeof (*t0));
+         len = (b0->current_length < sizeof (t0->pkt))
+           ? b0->current_length : sizeof (t0->pkt);
+         t0->len = len;
+         clib_memcpy (&t0->pkt, vlib_buffer_get_current (b0), len);
+       }
+      /* push this pkt to the next graph node, always error-drop */
+      vlib_set_next_frame_buffer (vm, node, next0, bi0);
+
+      from += 1;
+      n_left_from -= 1;
+    }
+
+  return frame->n_vectors;
+}
+
+/*
+ * lacp input graph node declaration
+ */
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (lacp_input_node, static) = {
+  .function = lacp_node_fn,
+  .name = "lacp-input",
+  .vector_size = sizeof (u32),
+  .type = VLIB_NODE_TYPE_INTERNAL,
+
+  .n_errors = LACP_N_ERROR,
+  .error_strings = lacp_error_strings,
+
+  .format_trace = lacp_input_format_trace,
+
+  .n_next_nodes = LACP_INPUT_N_NEXT,
+  .next_nodes = {
+    [LACP_INPUT_NEXT_NORMAL] = "error-drop",
+  },
+};
+/* *INDENT-ON* */
+
+/*
+ * lacp periodic function
+ */
+static uword
+lacp_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
+{
+  lacp_main_t *lm = &lacp_main;
+  f64 poll_time_remaining;
+  uword event_type, *event_data = 0;
+  u8 enabled = 0;
+
+  /* So we can send events to the lacp process */
+  lm->lacp_process_node_index = lacp_process_node.index;
+
+  ethernet_register_input_type (vm, ETHERNET_TYPE_SLOW_PROTOCOLS /* LACP */ ,
+                               lacp_input_node.index);
+
+  poll_time_remaining = 0.2;
+  while (1)
+    {
+      if (enabled)
+       poll_time_remaining =
+         vlib_process_wait_for_event_or_clock (vm, poll_time_remaining);
+      else
+       vlib_process_wait_for_event (vm);
+
+      event_type = vlib_process_get_events (vm, &event_data);
+      switch (event_type)
+       {
+       case ~0:                /* no events => timeout */
+         break;
+       case LACP_PROCESS_EVENT_START:
+         enabled = 1;
+         break;
+       case LACP_PROCESS_EVENT_STOP:
+         enabled = 0;
+         continue;
+       default:
+         clib_warning ("BUG: event type 0x%wx", event_type);
+         break;
+       }
+      if (event_data)
+       _vec_len (event_data) = 0;
+
+      if (vlib_process_suspend_time_is_zero (poll_time_remaining))
+       {
+         lacp_periodic (vm);
+         poll_time_remaining = 0.2;
+       }
+    }
+
+  return 0;
+}
+
+/*
+ * lacp periodic node declaration
+ */
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (lacp_process_node, static) = {
+  .function = lacp_process,
+  .type = VLIB_NODE_TYPE_PROCESS,
+  .name = "lacp-process",
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/lacp/node.h b/src/plugins/lacp/node.h
new file mode 100644 (file)
index 0000000..26cf7a3
--- /dev/null
@@ -0,0 +1,276 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_lacp_node_h__
+#define __included_lacp_node_h__
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vppinfra/format.h>
+#include <vppinfra/hash.h>
+#include <lacp/protocol.h>
+#include <lacp/rx_machine.h>
+#include <lacp/tx_machine.h>
+#include <lacp/mux_machine.h>
+#include <lacp/ptx_machine.h>
+
+typedef enum
+{
+  LACP_PACKET_TEMPLATE_ETHERNET,
+  LACP_N_PACKET_TEMPLATES,
+} lacp_packet_template_id_t;
+
+typedef enum
+{
+  MARKER_PACKET_TEMPLATE_ETHERNET,
+  MARKER_N_PACKET_TEMPLATES,
+} marker_packet_template_id_t;
+
+enum
+{
+  LACP_PROCESS_EVENT_START = 1,
+  LACP_PROCESS_EVENT_STOP = 2,
+} lacp_process_event_t;
+
+#define LACP_DBG(n, args...)                   \
+  {                                            \
+    lacp_main_t *_lm = &lacp_main;              \
+    if (_lm->debug || n->debug)                        \
+      clib_warning (args);                     \
+  }
+
+#define LACP_DBG2(n, e, s, m, t)                 \
+  {                                              \
+    lacp_main_t *_lm = &lacp_main;                \
+    if ((m)->debug && (_lm->debug || (n)->debug)) \
+      (*m->debug)(n, e, s, t);                   \
+  }
+
+/* Packet counters */
+#define foreach_lacp_error                                               \
+_ (NONE, "good lacp packets -- consumed")                               \
+_ (CACHE_HIT, "good lacp packets -- cache hit")                          \
+_ (UNSUPPORTED, "unsupported slow protocol packets")                     \
+_ (TOO_SMALL, "bad lacp packets -- packet too small")                    \
+_ (BAD_TLV, "bad lacp packets -- bad TLV length")                        \
+_ (DISABLED, "lacp packets received on disabled interfaces")
+
+typedef enum
+{
+#define _(sym,str) LACP_ERROR_##sym,
+  foreach_lacp_error
+#undef _
+    LACP_N_ERROR,
+} lacp_error_t;
+
+/* lacp packet trace capture */
+typedef struct
+{
+  u32 len;
+  union
+  {
+    marker_pdu_t marker;
+    lacp_pdu_t lacpdu;
+  } pkt;
+} lacp_input_trace_t;
+
+/** LACP interface details struct */
+typedef struct
+{
+  u32 sw_if_index;
+  u8 interface_name[64];
+  u32 rx_state;
+  u32 tx_state;
+  u32 mux_state;
+  u32 ptx_state;
+  u8 bond_interface_name[64];
+  u16 actor_system_priority;
+  u8 actor_system[6];
+  u16 actor_key;
+  u16 actor_port_priority;
+  u16 actor_port_number;
+  u8 actor_state;
+  u16 partner_system_priority;
+  u8 partner_system[6];
+  u16 partner_key;
+  u16 partner_port_priority;
+  u16 partner_port_number;
+  u8 partner_state;
+} lacp_interface_details_t;
+
+typedef struct
+{
+  /** API message ID base */
+  u16 msg_id_base;
+
+  /* convenience variables */
+  vlib_main_t *vlib_main;
+  vnet_main_t *vnet_main;
+
+  /* Background process node index */
+  u32 lacp_process_node_index;
+
+  /* Packet templates for different encap types */
+  vlib_packet_template_t packet_templates[LACP_N_PACKET_TEMPLATES];
+
+  /* Packet templates for different encap types */
+  vlib_packet_template_t marker_packet_templates[MARKER_N_PACKET_TEMPLATES];
+
+  /* LACP interface count */
+  u32 lacp_int;
+
+  /* debug is on or off */
+  u8 debug;
+} lacp_main_t;
+
+extern lacp_state_struct lacp_state_array[];
+extern lacp_main_t lacp_main;
+
+clib_error_t *lacp_plugin_api_hookup (vlib_main_t * vm);
+int lacp_dump_ifs (lacp_interface_details_t ** out_bondids);
+lacp_error_t lacp_input (vlib_main_t * vm, vlib_buffer_t * b0, u32 bi0);
+void lacp_periodic (vlib_main_t * vm);
+u8 *lacp_input_format_trace (u8 * s, va_list * args);
+void lacp_init_neighbor (slave_if_t * sif, u8 * hw_address,
+                        u16 port_number, u32 group);
+void lacp_init_state_machines (vlib_main_t * vm, slave_if_t * sif);
+void lacp_init_rx_machine (vlib_main_t * vm, slave_if_t * sif);
+void lacp_init_tx_machine (vlib_main_t * vm, slave_if_t * sif);
+void lacp_init_ptx_machine (vlib_main_t * vm, slave_if_t * sif);
+void lacp_init_mux_machine (vlib_main_t * vm, slave_if_t * sif);
+void lacp_selection_logic (vlib_main_t * vm, slave_if_t * sif);
+void lacp_send_lacp_pdu (vlib_main_t * vm, slave_if_t * sif);
+
+static inline void
+lacp_stop_timer (f64 * timer)
+{
+  *timer = 0.0;
+}
+
+static inline u8
+lacp_timer_is_running (f64 timer)
+{
+  return (timer != 0.0);
+}
+
+static inline u8
+lacp_timer_is_expired (vlib_main_t * vm, f64 timer)
+{
+  f64 now = vlib_time_now (vm);
+
+  return (now >= timer);
+}
+
+static inline u8 *
+format_rx_sm_state (u8 * s, va_list * args)
+{
+  lacp_state_struct lacp_rx_sm_state_array[] = {
+#define _(b, s, n) {.bit = b, .str = #s, },
+    foreach_lacp_rx_sm_state
+#undef _
+    {.str = NULL}
+  };
+  int state = va_arg (*args, int);
+  lacp_state_struct *state_entry =
+    (lacp_state_struct *) & lacp_rx_sm_state_array;
+
+  if (state >= (sizeof (lacp_rx_sm_state_array) / sizeof (*state_entry)))
+    s = format (s, "Bad state %d", state);
+  else
+    s = format (s, "%s", state_entry[state].str);
+
+  return s;
+}
+
+static inline u8 *
+format_tx_sm_state (u8 * s, va_list * args)
+{
+  lacp_state_struct lacp_tx_sm_state_array[] = {
+#define _(b, s, n) {.bit = b, .str = #s, },
+    foreach_lacp_tx_sm_state
+#undef _
+    {.str = NULL}
+  };
+  int state = va_arg (*args, int);
+  lacp_state_struct *state_entry =
+    (lacp_state_struct *) & lacp_tx_sm_state_array;
+
+  if (state >= (sizeof (lacp_tx_sm_state_array) / sizeof (*state_entry)))
+    s = format (s, "Bad state %d", state);
+  else
+    s = format (s, "%s", state_entry[state].str);
+
+  return s;
+}
+
+static inline u8 *
+format_mux_sm_state (u8 * s, va_list * args)
+{
+  lacp_state_struct lacp_mux_sm_state_array[] = {
+#define _(b, s, n) {.bit = b, .str = #s, },
+    foreach_lacp_mux_sm_state
+#undef _
+    {.str = NULL}
+  };
+  int state = va_arg (*args, int);
+  lacp_state_struct *state_entry =
+    (lacp_state_struct *) & lacp_mux_sm_state_array;
+
+  if (state >= (sizeof (lacp_mux_sm_state_array) / sizeof (*state_entry)))
+    s = format (s, "Bad state %d", state);
+  else
+    s = format (s, "%s", state_entry[state].str);
+
+  return s;
+}
+
+static inline u8 *
+format_ptx_sm_state (u8 * s, va_list * args)
+{
+  lacp_state_struct lacp_ptx_sm_state_array[] = {
+#define _(b, s, n) {.bit = b, .str = #s, },
+    foreach_lacp_ptx_sm_state
+#undef _
+    {.str = NULL}
+  };
+  int state = va_arg (*args, int);
+  lacp_state_struct *state_entry =
+    (lacp_state_struct *) & lacp_ptx_sm_state_array;
+
+  if (state >= (sizeof (lacp_ptx_sm_state_array) / sizeof (*state_entry)))
+    s = format (s, "Bad state %d", state);
+  else
+    s = format (s, "%s", state_entry[state].str);
+
+  return s;
+}
+
+static inline int
+lacp_bit_test (u8 val, u8 bit)
+{
+  if (val & (1 << bit))
+    return 1;
+  else
+    return 0;
+}
+
+#endif /* __included_lacp_node_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/lacp/protocol.h b/src/plugins/lacp/protocol.h
new file mode 100644 (file)
index 0000000..05a3f04
--- /dev/null
@@ -0,0 +1,178 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __included_lacp_protocol_h__
+#define __included_lacp_protocol_h__
+
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/bonding/node.h>
+
+#define LACP_CHURN_DETECTION_TIME       60
+#define LACP_AGGREGATE_WAIT_TIME        2.0
+
+#define LACP_SUBTYPE                    1
+#define LACP_ACTOR_LACP_VERSION         1
+
+#define foreach_lacp_tlv        \
+  _ (TERMINATOR_INFORMATION, 0) \
+  _ (ACTOR_INFORMATION, 1)      \
+  _ (PARTNER_INFORMATION , 2)   \
+  _ (COLLECTOR_INFORMATION, 3)
+
+typedef enum
+{
+#define _(f,n) LACP_##f = (n),
+  foreach_lacp_tlv
+#undef _
+} lacp_tlv_t;
+
+#define foreach_lacp_port  \
+  _ (UNSELECTED, 0)        \
+  _ (SELECTED, 1)          \
+  _ (STANDBY, 2)
+
+typedef enum
+{
+#define _(f,n) LACP_PORT_##f = (n),
+  foreach_lacp_port
+#undef _
+} lacp_port_t;
+
+/* Port state */
+#define foreach_lacp_state                  \
+  _(0, LACP_ACTIVITY, "activity")           \
+  _(1, LACP_TIMEOUT, "lacp timeout")         \
+  _(2, AGGREGATION, "aggregation")           \
+  _(3, SYNCHRONIZATION, "synchronization")   \
+  _(4, COLLECTING, "collecting")            \
+  _(5, DISTRIBUTING, "distributing")         \
+  _(6, DEFAULTED, "defaulted")               \
+  _(7, EXPIRED, "expired")
+
+typedef enum
+{
+#define _(a, b, c) LACP_STATE_##b = (1 << a),
+  foreach_lacp_state
+#undef _
+} lacp_state_t;
+
+#define foreach_lacp_state_flag                                \
+  _(0, LACP_STATE_LACP_ACTIViTY, "activity")           \
+  _(1, LACP_STATE_LACP_TIMEOUT, "lacp timeout")         \
+  _(2, LACP_STATE_AGGREGATION, "aggregation")           \
+  _(3, LACP_STATE_SYNCHRONIZATION, "synchronization")   \
+  _(4, LACP_STATE_COLLECTIING, "collecting")            \
+  _(5, LACP_STATE_DISTRIBUTING, "distributing")         \
+  _(6, LACP_STATE_DEFAULTED, "defaulted")               \
+  _(7, LACP_STATE_EXPIRED, "expired")
+
+typedef struct
+{
+  u8 bit;
+  char *str;
+} lacp_state_struct;
+
+typedef struct
+{
+  u8 bit;
+  char *str;
+} lacp_event_struct;
+
+#define LACP_MAX_TX_IN_SECOND           3
+#define LACP_DEFAULT_PORT_PRIORITY      0x00ff
+#define LACP_DEFAULT_SYSTEM_PRIORITY    0xffff
+
+typedef CLIB_PACKED (struct
+                    {
+                    u8 tlv_type;
+                    u8 tlv_length;
+                    lacp_port_info_t port_info; u8 reserved[3];
+                    }) lacp_actor_partner_t;
+
+typedef CLIB_PACKED (struct
+                    {
+                    u8 tlv_type; u8 tlv_length; u16 max_delay;
+                    u8 reserved[12];
+                    }) lacp_collector_t;
+
+typedef CLIB_PACKED (struct
+                    {
+                    u8 tlv_type; u8 tlv_length;
+                    u8 pad[50];
+                    }) lacp_terminator_t;
+
+typedef CLIB_PACKED (struct
+                    {
+                    u8 subtype; u8 version_number;
+                    lacp_actor_partner_t actor; lacp_actor_partner_t partner;
+                    lacp_collector_t collector; lacp_terminator_t terminator;
+                    }) lacp_pdu_t;
+
+typedef CLIB_PACKED (struct
+                    {
+                    ethernet_header_t ethernet; lacp_pdu_t lacp;
+                    }) ethernet_lacp_pdu_t;
+
+#define MARKER_SUBTYPE                  2
+#define MARKER_PROTOCOL_VERSION         1
+
+#define foreach_marker_tlv      \
+  _ (TERMINATOR_INFORMATION, 0) \
+  _ (INFORMATION, 1)            \
+  _ (RESPONSE_INFORMATION , 2)
+
+typedef enum
+{
+#define _(f,n) MARKER_##f = (n),
+  foreach_marker_tlv
+#undef _
+} marker_tlv_t;
+
+typedef CLIB_PACKED (struct
+                    {
+                    u8 tlv_type; u8 tlv_length;
+                    u8 reserved[90];
+                    }) marker_terminator_t;
+
+typedef CLIB_PACKED (struct
+                    {
+                    u8 tlv_type;
+                    u8 tlv_length;
+                    u16 requester_port; u8 requester_system[6];
+                    u32 requester_transaction_id; u8 pad[2];
+                    }) marker_information_t;
+
+typedef CLIB_PACKED (struct
+                    {
+                    u8 subtype;
+                    u8 version_number;
+                    marker_information_t marker_info;
+                    marker_terminator_t terminator;
+                    }) marker_pdu_t;
+
+typedef CLIB_PACKED (struct
+                    {
+                    ethernet_header_t ethernet; marker_pdu_t marker;
+                    }) ethernet_marker_pdu_t;
+
+#endif /* __included_lacp_protocol_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/lacp/ptx_machine.c b/src/plugins/lacp/ptx_machine.c
new file mode 100644 (file)
index 0000000..ac83444
--- /dev/null
@@ -0,0 +1,202 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define _GNU_SOURCE
+
+#include <vnet/bonding/node.h>
+#include <lacp/node.h>
+
+/*
+ *  LACP State = NO_PERIODIC
+ */
+static lacp_fsm_state_t lacp_ptx_state_no_periodic[] = {
+  {LACP_ACTION_NO_PERIODIC, LACP_PTX_STATE_FAST_PERIODIC},     // event 0 BEGIN
+  {LACP_NOACTION, LACP_PTX_STATE_NO_PERIODIC}, // event 1 LONG_TIMEOUT
+  {LACP_NOACTION, LACP_PTX_STATE_NO_PERIODIC}, // event 2 TIMER_EXPIRED
+  {LACP_NOACTION, LACP_PTX_STATE_NO_PERIODIC}, // event 3 SHORT_TIMEOUT
+};
+
+/*
+ *  LACP State = FAST_PERIODIC
+ */
+static lacp_fsm_state_t lacp_ptx_state_fast_periodic[] = {
+  {LACP_ACTION_FAST_PERIODIC, LACP_PTX_STATE_FAST_PERIODIC},   // event 0 BEGIN
+  {LACP_ACTION_SLOW_PERIODIC, LACP_PTX_STATE_SLOW_PERIODIC},   // event 1 LONG_TIMEOUT
+  {LACP_ACTION_TIMER_EXPIRED, LACP_PTX_STATE_PERIODIC_TX},     // event 2 TIMER_EXPIRED
+  {LACP_ACTION_FAST_PERIODIC, LACP_PTX_STATE_FAST_PERIODIC},   // event 3 SHORT_TIMEOUT
+};
+
+/*
+ *  LACP State = SLOW_PERIODIC
+ */
+static lacp_fsm_state_t lacp_ptx_state_slow_periodic[] = {
+  {LACP_ACTION_NO_PERIODIC, LACP_PTX_STATE_NO_PERIODIC},       // event 0 BEGIN
+  {LACP_ACTION_SLOW_PERIODIC, LACP_PTX_STATE_SLOW_PERIODIC},   // event 1 LONG_TIMEOUT
+  {LACP_ACTION_TIMER_EXPIRED, LACP_PTX_STATE_PERIODIC_TX},     // event 2 TIMER_EXPIRED
+  {LACP_ACTION_FAST_PERIODIC, LACP_PTX_STATE_FAST_PERIODIC},   // event 3 SHORT_TIMEOUT
+};
+
+/*
+ *  LACP State = PERIODIC_TX
+ */
+static lacp_fsm_state_t lacp_ptx_state_periodic_tx[] = {
+  {LACP_ACTION_NO_PERIODIC, LACP_PTX_STATE_NO_PERIODIC},       // event 0 BEGIN
+  {LACP_NOACTION, LACP_PTX_STATE_PERIODIC_TX}, // event 1 LONG_TIMEOUT
+  {LACP_ACTION_TIMER_EXPIRED, LACP_PTX_STATE_PERIODIC_TX},     // event 2 TIMER_EXPIRED
+  {LACP_NOACTION, LACP_PTX_STATE_PERIODIC_TX}, // event 3 SHORT_TIMEOUT
+};
+
+
+static lacp_fsm_machine_t lacp_ptx_fsm_table[] = {
+  {lacp_ptx_state_no_periodic},
+  {lacp_ptx_state_fast_periodic},
+  {lacp_ptx_state_slow_periodic},
+  {lacp_ptx_state_periodic_tx},
+};
+
+lacp_machine_t lacp_ptx_machine = {
+  lacp_ptx_fsm_table,
+  lacp_ptx_debug_func,
+};
+
+int
+lacp_ptx_action_no_periodic (void *p1, void *p2)
+{
+  vlib_main_t *vm = (vlib_main_t *) p1;
+  slave_if_t *sif = (slave_if_t *) p2;
+
+  lacp_stop_timer (&sif->periodic_timer);
+
+  lacp_machine_dispatch (&lacp_ptx_machine, vm, sif,
+                        LACP_PTX_EVENT_BEGIN, &sif->ptx_state);
+
+  return 0;
+}
+
+int
+lacp_ptx_action_slow_periodic (void *p1, void *p2)
+{
+  vlib_main_t *vm = (vlib_main_t *) p1;
+  slave_if_t *sif = (slave_if_t *) p2;
+  u8 timer_expired;
+  lacp_main_t *lm = &lacp_main;
+
+  if (lacp_timer_is_running (sif->periodic_timer) &&
+      lacp_timer_is_expired (lm->vlib_main, sif->periodic_timer))
+    timer_expired = 1;
+  else
+    timer_expired = 0;
+
+  lacp_start_periodic_timer (lm->vlib_main, sif, LACP_SLOW_PERIODIC_TIMER);
+
+  if (timer_expired || (sif->partner.state & LACP_STATE_LACP_TIMEOUT))
+    lacp_machine_dispatch (&lacp_ptx_machine, vm, sif,
+                          LACP_PTX_EVENT_TIMER_EXPIRED, &sif->ptx_state);
+
+  return 0;
+}
+
+int
+lacp_ptx_action_fast_periodic (void *p1, void *p2)
+{
+  vlib_main_t *vm = (vlib_main_t *) p1;
+  slave_if_t *sif = (slave_if_t *) p2;
+  u8 timer_expired;
+  lacp_main_t *lm = &lacp_main;
+
+  if (lacp_timer_is_running (sif->periodic_timer) &&
+      lacp_timer_is_expired (lm->vlib_main, sif->periodic_timer))
+    timer_expired = 1;
+  else
+    timer_expired = 0;
+
+  lacp_start_periodic_timer (lm->vlib_main, sif, LACP_FAST_PERIODIC_TIMER);
+
+  if (timer_expired)
+    lacp_machine_dispatch (&lacp_ptx_machine, vm, sif,
+                          LACP_PTX_EVENT_TIMER_EXPIRED, &sif->ptx_state);
+
+  if (!(sif->partner.state & LACP_STATE_LACP_TIMEOUT))
+    lacp_machine_dispatch (&lacp_ptx_machine, vm, sif,
+                          LACP_PTX_EVENT_LONG_TIMEOUT, &sif->ptx_state);
+
+  return 0;
+}
+
+int
+lacp_ptx_action_timer_expired (void *p1, void *p2)
+{
+  vlib_main_t *vm = (vlib_main_t *) p1;
+  slave_if_t *sif = (slave_if_t *) p2;
+
+  sif->ntt = 1;
+  lacp_machine_dispatch (&lacp_tx_machine, vm, sif, LACP_TX_EVENT_NTT,
+                        &sif->tx_state);
+  if (sif->partner.state & LACP_STATE_LACP_TIMEOUT)
+    lacp_machine_dispatch (&lacp_ptx_machine, vm, sif,
+                          LACP_PTX_EVENT_SHORT_TIMEOUT, &sif->ptx_state);
+  else
+    lacp_machine_dispatch (&lacp_ptx_machine, vm, sif,
+                          LACP_PTX_EVENT_LONG_TIMEOUT, &sif->ptx_state);
+
+  return 0;
+}
+
+static u8 *
+format_ptx_event (u8 * s, va_list * args)
+{
+  static lacp_event_struct lacp_ptx_event_array[] = {
+#define _(b, s, n) {.bit = b, .str = #s, },
+    foreach_lacp_ptx_event
+#undef _
+    {.str = NULL}
+  };
+  int e = va_arg (*args, int);
+  lacp_event_struct *event_entry =
+    (lacp_event_struct *) & lacp_ptx_event_array;
+
+  if (e >= (sizeof (lacp_ptx_event_array) / sizeof (*event_entry)))
+    s = format (s, "Bad event %d", e);
+  else
+    s = format (s, "%s", event_entry[e].str);
+
+  return s;
+}
+
+void
+lacp_ptx_debug_func (slave_if_t * sif, int event, int state,
+                    lacp_fsm_state_t * transition)
+{
+  clib_warning ("%U-PTX: event %U, old state %U, new state %U",
+               format_vnet_sw_if_index_name, vnet_get_main (),
+               sif->sw_if_index, format_ptx_event,
+               event, format_ptx_sm_state, state, format_ptx_sm_state,
+               transition->next_state);
+}
+
+void
+lacp_init_ptx_machine (vlib_main_t * vm, slave_if_t * sif)
+{
+  lacp_machine_dispatch (&lacp_ptx_machine, vm, sif, LACP_PTX_EVENT_BEGIN,
+                        &sif->ptx_state);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/lacp/ptx_machine.h b/src/plugins/lacp/ptx_machine.h
new file mode 100644 (file)
index 0000000..a9af4bb
--- /dev/null
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef        ___LACP_PTX_MACHINE_H__
+#define        ___LACP_PTX_MACHINE_H__
+
+#include <stdint.h>
+#include <lacp/machine.h>
+
+#define foreach_lacp_ptx_event          \
+  _(0, BEGIN, "begin")                  \
+  _(1, LONG_TIMEOUT, "long tiemout")    \
+  _(2, TIMER_EXPIRED, "timer expired")  \
+  _(3, SHORT_TIMEOUT, "short timeout")
+
+typedef enum
+{
+#define _(a, b, c) LACP_PTX_EVENT_##b = (a),
+  foreach_lacp_ptx_event
+#undef _
+} lacp_ptx_event_t;
+
+#define foreach_lacp_ptx_sm_state       \
+  _(0, NO_PERIODIC, "no periodic")      \
+  _(1, FAST_PERIODIC, "fast periodic")  \
+  _(2, SLOW_PERIODIC, "slow periodic")  \
+  _(3, PERIODIC_TX, "periodic transmission")
+
+typedef enum
+{
+#define _(a, b, c) LACP_PTX_STATE_##b = (a),
+  foreach_lacp_ptx_sm_state
+#undef _
+} lacp_ptx_sm_state_t;
+
+extern lacp_machine_t lacp_ptx_machine;
+
+int lacp_ptx_action_no_periodic (void *p1, void *p2);
+int lacp_ptx_action_slow_periodic (void *p1, void *p2);
+int lacp_ptx_action_fast_periodic (void *p1, void *p2);
+int lacp_ptx_action_timer_expired (void *p1, void *p2);
+void lacp_ptx_debug_func (slave_if_t * sif, int event, int state,
+                         lacp_fsm_state_t * transition);
+
+#define LACP_ACTION_NO_PERIODIC \
+  LACP_ACTION_ROUTINE(lacp_ptx_action_no_periodic)
+#define LACP_ACTION_SLOW_PERIODIC \
+  LACP_ACTION_ROUTINE(lacp_ptx_action_slow_periodic)
+#define LACP_ACTION_FAST_PERIODIC \
+  LACP_ACTION_ROUTINE(lacp_ptx_action_fast_periodic)
+#define LACP_ACTION_TIMER_EXPIRED \
+  LACP_ACTION_ROUTINE(lacp_ptx_action_timer_expired)
+
+static inline void
+lacp_start_periodic_timer (vlib_main_t * vm, slave_if_t * sif, u8 expiration)
+{
+  sif->periodic_timer = vlib_time_now (vm) + expiration;
+}
+
+#endif /* __LACP_PTX_MACHINE_H__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/lacp/rx_machine.c b/src/plugins/lacp/rx_machine.c
new file mode 100644 (file)
index 0000000..374e3f8
--- /dev/null
@@ -0,0 +1,425 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define _GNU_SOURCE
+
+#include <vnet/bonding/node.h>
+#include <lacp/node.h>
+
+/*
+ *  LACP State = INITIALIZE
+ */
+static lacp_fsm_state_t lacp_rx_state_initialize[] = {
+  {LACP_ACTION_INITIALIZE, LACP_RX_STATE_PORT_DISABLED},       // event 0 BEGIN
+  {LACP_ACTION_INITIALIZE, LACP_RX_STATE_PORT_DISABLED},       // event 1 PORT_DISABLED
+  {LACP_ACTION_INITIALIZE, LACP_RX_STATE_PORT_DISABLED},       // event 2 PORT_MOVED
+  {LACP_NOACTION, LACP_RX_STATE_INITIALIZE},   // event 3 LACP_ENABLED
+  {LACP_NOACTION, LACP_RX_STATE_INITIALIZE},   // event 4 LACP_DISABLED
+  {LACP_NOACTION, LACP_RX_STATE_INITIALIZE},   // event 5 PDU_RECEIVED
+  {LACP_NOACTION, LACP_RX_STATE_INITIALIZE},   // event 6 TIMER_EXPIRED
+};
+
+/*
+ *  LACP State = PORT_DISABLED
+ */
+static lacp_fsm_state_t lacp_rx_state_port_disabled[] = {
+  {LACP_ACTION_PORT_DISABLED, LACP_RX_STATE_PORT_DISABLED},    // event 0 BEGIN
+  {LACP_ACTION_PORT_DISABLED, LACP_RX_STATE_PORT_DISABLED},    // event 1 PORT_DISABLED
+  {LACP_ACTION_INITIALIZE, LACP_RX_STATE_INITIALIZE},  // event 2 PORT_MOVED
+  {LACP_ACTION_EXPIRED, LACP_RX_STATE_EXPIRED},        // event 3 LACP_ENABLED
+  {LACP_ACTION_LACP_DISABLED, LACP_RX_STATE_LACP_DISABLED},    // event 4 LACP_DISABLED
+  {LACP_NOACTION, LACP_RX_STATE_PORT_DISABLED},        // event 5 PDU_RECEIVED
+  {LACP_NOACTION, LACP_RX_STATE_PORT_DISABLED},        // event 6 TIMER_EXPIRED
+};
+
+/*
+ *  LACP State = EXPIRED
+ */
+static lacp_fsm_state_t lacp_rx_state_expired[] = {
+  {LACP_ACTION_INITIALIZE, LACP_RX_STATE_INITIALIZE},  // event 0 BEGIN
+  {LACP_NOACTION, LACP_RX_STATE_EXPIRED},      // event 1 PORT_DISABLED
+  {LACP_NOACTION, LACP_RX_STATE_EXPIRED},      // event 2 PORT_MOVED
+  {LACP_NOACTION, LACP_RX_STATE_EXPIRED},      // event 3 LACP_ENABLED
+  {LACP_NOACTION, LACP_RX_STATE_EXPIRED},      // event 4 LACP_DISABLED
+  {LACP_ACTION_CURRENT, LACP_RX_STATE_CURRENT},        // event 5 PDU_RECEIVED
+  {LACP_ACTION_DEFAULTED, LACP_RX_STATE_DEFAULTED},    // event 6 TIMER_EXPIRED
+};
+
+/*
+ *  LACP State = LACP_DISABLED
+ */
+static lacp_fsm_state_t lacp_rx_state_lacp_disabled[] = {
+  {LACP_ACTION_INITIALIZE, LACP_RX_STATE_INITIALIZE},  // event 0 BEGIN
+  {LACP_NOACTION, LACP_RX_STATE_LACP_DISABLED},        // event 1 PORT_DISABLED
+  {LACP_NOACTION, LACP_RX_STATE_LACP_DISABLED},        // event 2 PORT_MOVED
+  {LACP_ACTION_EXPIRED, LACP_RX_STATE_EXPIRED},        // event 3 LACP_ENABLED XXX
+  {LACP_ACTION_LACP_DISABLED, LACP_RX_STATE_LACP_DISABLED},    // event 4 LACP_DISABLED
+  {LACP_NOACTION, LACP_RX_STATE_LACP_DISABLED},        // event 5 PDU_RECEIVED
+  {LACP_NOACTION, LACP_RX_STATE_LACP_DISABLED},        // event 6 TIMER_EXPIRED
+};
+
+/*
+ *  LACP State = DEFAULTED
+ */
+static lacp_fsm_state_t lacp_rx_state_defaulted[] = {
+  {LACP_ACTION_INITIALIZE, LACP_RX_STATE_INITIALIZE},  // event 0 BEGIN
+  {LACP_NOACTION, LACP_RX_STATE_DEFAULTED},    // event 1 PORT_DISABLED
+  {LACP_NOACTION, LACP_RX_STATE_DEFAULTED},    // event 2 PORT_MOVED
+  {LACP_NOACTION, LACP_RX_STATE_DEFAULTED},    // event 3 LACP_ENABLED
+  {LACP_ACTION_LACP_DISABLED, LACP_RX_STATE_LACP_DISABLED},    // event 4 LACP_DISABLED
+  {LACP_ACTION_CURRENT, LACP_RX_STATE_CURRENT},        // event 5 PDU_RECEIVED
+  {LACP_ACTION_DEFAULTED, LACP_RX_STATE_DEFAULTED},    // event 6 TIMER_EXPIRED
+};
+
+/*
+ *  LACP State = CURRENT
+ */
+static lacp_fsm_state_t lacp_rx_state_current[] = {
+  {LACP_ACTION_INITIALIZE, LACP_RX_STATE_INITIALIZE},  // event 0 BEGIN
+  {LACP_NOACTION, LACP_RX_STATE_CURRENT},      // event 1 PORT_DISABLED
+  {LACP_NOACTION, LACP_RX_STATE_CURRENT},      // event 1 PORT_MOVED
+  {LACP_NOACTION, LACP_RX_STATE_CURRENT},      // event 2 LACP_ENABLED
+  {LACP_ACTION_LACP_DISABLED, LACP_RX_STATE_LACP_DISABLED},    // event 3 LACP_DISABLED
+  {LACP_ACTION_CURRENT, LACP_RX_STATE_CURRENT},        // event 4 PDU_RECEIVED
+  {LACP_ACTION_EXPIRED, LACP_RX_STATE_EXPIRED},        // event 5 TIMER_EXPIRED
+};
+
+static lacp_fsm_machine_t lacp_rx_fsm_table[] = {
+  {lacp_rx_state_initialize},
+  {lacp_rx_state_port_disabled},
+  {lacp_rx_state_expired},
+  {lacp_rx_state_lacp_disabled},
+  {lacp_rx_state_defaulted},
+  {lacp_rx_state_current},
+};
+
+lacp_machine_t lacp_rx_machine = {
+  lacp_rx_fsm_table,
+  lacp_rx_debug_func,
+};
+
+static void
+lacp_set_port_unselected (vlib_main_t * vm, slave_if_t * sif)
+{
+  sif->selected = LACP_PORT_UNSELECTED;
+
+  switch (sif->mux_state)
+    {
+    case LACP_MUX_STATE_DETACHED:
+      break;
+    case LACP_MUX_STATE_WAITING:
+      break;
+    case LACP_MUX_STATE_ATTACHED:
+      return;
+      break;
+    case LACP_MUX_STATE_COLLECTING_DISTRIBUTING:
+      if (sif->partner.state & LACP_STATE_SYNCHRONIZATION)
+       return;
+      break;
+    default:
+      break;
+    }
+  lacp_machine_dispatch (&lacp_mux_machine, vm, sif,
+                        LACP_MUX_EVENT_UNSELECTED, &sif->mux_state);
+}
+
+static void
+lacp_update_default_selected (vlib_main_t * vm, slave_if_t * sif)
+{
+  if ((sif->partner_admin.state & LACP_STATE_AGGREGATION) !=
+      (sif->partner.state & LACP_STATE_AGGREGATION) ||
+      memcmp (&sif->partner, &sif->partner_admin,
+             sizeof (sif->partner) - sizeof (sif->partner.state)))
+    {
+      lacp_set_port_unselected (vm, sif);
+    }
+}
+
+static void
+lacp_record_default (slave_if_t * sif)
+{
+  sif->partner = sif->partner_admin;
+  sif->actor.state |= LACP_STATE_DEFAULTED;
+}
+
+static void
+lacp_update_selected (vlib_main_t * vm, slave_if_t * sif)
+{
+  lacp_pdu_t *lacpdu = (lacp_pdu_t *) sif->last_rx_pkt;
+
+  if ((lacpdu->actor.port_info.state & LACP_STATE_AGGREGATION) !=
+      (sif->partner.state & LACP_STATE_AGGREGATION) ||
+      memcmp (&sif->partner, &lacpdu->actor.port_info,
+             sizeof (sif->partner) - sizeof (sif->partner.state)))
+    {
+      lacp_set_port_unselected (vm, sif);
+    }
+}
+
+static void
+lacp_update_ntt (vlib_main_t * vm, slave_if_t * sif)
+{
+  lacp_pdu_t *lacpdu = (lacp_pdu_t *) sif->last_rx_pkt;
+  u8 states = LACP_STATE_LACP_ACTIVITY | LACP_STATE_LACP_TIMEOUT |
+    LACP_STATE_SYNCHRONIZATION | LACP_STATE_AGGREGATION;
+
+  if ((states & lacpdu->partner.port_info.state) !=
+      (states & sif->actor.state)
+      || memcmp (&sif->actor, &lacpdu->partner.port_info,
+                sizeof (sif->actor) - sizeof (sif->actor.state)))
+    {
+      sif->ntt = 1;
+      lacp_machine_dispatch (&lacp_tx_machine, vm, sif, LACP_TX_EVENT_NTT,
+                            &sif->tx_state);
+    }
+}
+
+/*
+ * compare lacpdu partner info against sif->partner. Return 1 if they match, 0
+ * otherwise.
+ */
+static u8
+lacp_compare_partner (slave_if_t * sif)
+{
+  lacp_pdu_t *lacpdu = (lacp_pdu_t *) sif->last_rx_pkt;
+
+  if ((!memcmp (&sif->partner, &lacpdu->actor.port_info,
+               sizeof (sif->partner) - sizeof (sif->partner.state)) &&
+       ((sif->actor.state & LACP_STATE_AGGREGATION) ==
+       (lacpdu->partner.port_info.state & LACP_STATE_AGGREGATION))) ||
+      ((lacpdu->actor.port_info.state & LACP_STATE_AGGREGATION) == 0))
+    return 1;
+
+  return 0;
+}
+
+static void
+lacp_record_pdu (slave_if_t * sif)
+{
+  lacp_pdu_t *lacpdu = (lacp_pdu_t *) sif->last_rx_pkt;
+  u8 match;
+
+  match = lacp_compare_partner (sif);
+  sif->partner = lacpdu->actor.port_info;
+  sif->actor.state &= ~LACP_STATE_DEFAULTED;
+  if (match && (lacpdu->actor.port_info.state & LACP_STATE_SYNCHRONIZATION))
+    sif->partner.state |= LACP_STATE_SYNCHRONIZATION;
+  else
+    sif->partner.state &= ~LACP_STATE_SYNCHRONIZATION;
+}
+
+static void
+lacp_set_port_moved (vlib_main_t * vm, slave_if_t * sif, u8 val)
+{
+  sif->port_moved = val;
+
+  if (sif->port_moved)
+    lacp_machine_dispatch (&lacp_rx_machine, vm, sif,
+                          LACP_RX_EVENT_PORT_MOVED, &sif->rx_state);
+  else if (!sif->port_enabled)
+    lacp_machine_dispatch (&lacp_rx_machine, vm, sif,
+                          LACP_RX_EVENT_PORT_DISABLED, &sif->rx_state);
+}
+
+int
+lacp_rx_action_initialize (void *p1, void *p2)
+{
+  vlib_main_t *vm = (vlib_main_t *) p1;
+  slave_if_t *sif = (slave_if_t *) p2;
+
+  lacp_set_port_unselected (vm, sif);
+  lacp_record_default (sif);
+  sif->actor.state &= ~LACP_STATE_EXPIRED;
+  lacp_set_port_moved (vm, sif, 0);
+  /* UCT */
+  lacp_machine_dispatch (&lacp_rx_machine, vm, sif,
+                        LACP_RX_EVENT_BEGIN, &sif->rx_state);
+
+  return 0;
+}
+
+int
+lacp_rx_action_port_disabled (void *p1, void *p2)
+{
+  vlib_main_t *vm = (vlib_main_t *) p1;
+  slave_if_t *sif = (slave_if_t *) p2;
+
+  sif->partner.state &= ~LACP_STATE_SYNCHRONIZATION;
+  if (sif->port_moved)
+    {
+      lacp_machine_dispatch (&lacp_rx_machine, vm, sif,
+                            LACP_RX_EVENT_PORT_MOVED, &sif->rx_state);
+    }
+  if (sif->port_enabled)
+    {
+      if (sif->lacp_enabled)
+       lacp_machine_dispatch (&lacp_rx_machine, vm, sif,
+                              LACP_RX_EVENT_LACP_ENABLED, &sif->rx_state);
+      else
+       lacp_machine_dispatch (&lacp_rx_machine, vm, sif,
+                              LACP_RX_EVENT_LACP_DISABLED, &sif->rx_state);
+    }
+
+  return 0;
+}
+
+int
+lacp_rx_action_expired (void *p1, void *p2)
+{
+  vlib_main_t *vm = (vlib_main_t *) p1;
+  slave_if_t *sif = (slave_if_t *) p2;
+  u8 timer_expired;
+  lacp_main_t *lm = &lacp_main;
+
+  sif->partner.state &= ~LACP_STATE_SYNCHRONIZATION;
+  sif->partner.state |= LACP_STATE_LACP_TIMEOUT;
+  lacp_machine_dispatch (&lacp_ptx_machine, vm, sif,
+                        LACP_PTX_EVENT_SHORT_TIMEOUT, &sif->ptx_state);
+  if (lacp_timer_is_running (sif->current_while_timer) &&
+      lacp_timer_is_expired (lm->vlib_main, sif->current_while_timer))
+    timer_expired = 1;
+  else
+    timer_expired = 0;
+  lacp_start_current_while_timer (lm->vlib_main, sif, LACP_SHORT_TIMOUT_TIME);
+  sif->actor.state |= LACP_STATE_EXPIRED;
+  if (timer_expired)
+    lacp_machine_dispatch (&lacp_rx_machine, vm, sif,
+                          LACP_RX_EVENT_TIMER_EXPIRED, &sif->rx_state);
+  if (sif->last_rx_pkt && vec_len (sif->last_rx_pkt))
+    lacp_machine_dispatch (&lacp_rx_machine, vm, sif,
+                          LACP_RX_EVENT_PDU_RECEIVED, &sif->rx_state);
+
+  return 0;
+}
+
+int
+lacp_rx_action_lacp_disabled (void *p1, void *p2)
+{
+  vlib_main_t *vm = (vlib_main_t *) p1;
+  slave_if_t *sif = (slave_if_t *) p2;
+
+  lacp_set_port_unselected (vm, sif);
+  lacp_record_default (sif);
+  sif->partner.state &= ~LACP_STATE_AGGREGATION;
+  sif->actor.state &= ~LACP_STATE_EXPIRED;
+
+  return 0;
+}
+
+int
+lacp_rx_action_defaulted (void *p1, void *p2)
+{
+  vlib_main_t *vm = (vlib_main_t *) p1;
+  slave_if_t *sif = (slave_if_t *) p2;
+
+  lacp_update_default_selected (vm, sif);
+  lacp_record_default (sif);
+  sif->actor.state &= ~LACP_STATE_EXPIRED;
+  if (sif->last_rx_pkt && vec_len (sif->last_rx_pkt))
+    lacp_machine_dispatch (&lacp_rx_machine, vm, sif,
+                          LACP_RX_EVENT_PDU_RECEIVED, &sif->rx_state);
+
+  return 0;
+}
+
+static int
+lacp_port_is_moved (vlib_main_t * vm, slave_if_t * sif)
+{
+  bond_main_t *bm = &bond_main;
+  slave_if_t *sif2;
+  lacp_pdu_t *lacpdu = (lacp_pdu_t *) sif->last_rx_pkt;
+
+  /* *INDENT-OFF* */
+  pool_foreach (sif2, bm->neighbors, {
+      {
+       if ((sif != sif2) && (sif2->rx_state == LACP_RX_STATE_PORT_DISABLED) &&
+           !memcmp (sif2->partner.system,
+                    lacpdu->partner.port_info.system, 6) &&
+           (sif2->partner.port_number == lacpdu->partner.port_info.port_number))
+         return 1;
+      }
+  });
+  /* *INDENT-ON* */
+
+  return 0;
+}
+
+int
+lacp_rx_action_current (void *p1, void *p2)
+{
+  vlib_main_t *vm = (vlib_main_t *) p1;
+  slave_if_t *sif = (slave_if_t *) p2;
+  lacp_main_t *lm = &lacp_main;
+
+  lacp_update_selected (vm, sif);
+  lacp_update_ntt (vm, sif);
+  lacp_record_pdu (sif);
+  lacp_start_current_while_timer (lm->vlib_main, sif, sif->ttl_in_seconds);
+  sif->actor.state &= ~LACP_STATE_EXPIRED;
+  if (lacp_port_is_moved (vm, sif))
+    lacp_set_port_moved (vm, sif, 1);
+  lacp_selection_logic (vm, sif);
+
+  return 0;
+}
+
+static u8 *
+format_rx_event (u8 * s, va_list * args)
+{
+  static lacp_event_struct lacp_rx_event_array[] = {
+#define _(b, s, n) {.bit = b, .str = #s, },
+    foreach_lacp_rx_event
+#undef _
+    {.str = NULL}
+  };
+  int e = va_arg (*args, int);
+  lacp_event_struct *event_entry =
+    (lacp_event_struct *) & lacp_rx_event_array;
+
+  if (e >= (sizeof (lacp_rx_event_array) / sizeof (*event_entry)))
+    s = format (s, "Bad event %d", e);
+  else
+    s = format (s, "%s", event_entry[e].str);
+
+  return s;
+}
+
+void
+lacp_rx_debug_func (slave_if_t * sif, int event, int state,
+                   lacp_fsm_state_t * transition)
+{
+  clib_warning ("%U-RX: event %U, old state %U, new state %U",
+               format_vnet_sw_if_index_name, vnet_get_main (),
+               sif->sw_if_index, format_rx_event,
+               event, format_rx_sm_state, state, format_rx_sm_state,
+               transition->next_state);
+}
+
+void
+lacp_init_rx_machine (vlib_main_t * vm, slave_if_t * sif)
+{
+  lacp_machine_dispatch (&lacp_rx_machine, vm, sif, LACP_RX_EVENT_BEGIN,
+                        &sif->rx_state);
+  lacp_machine_dispatch (&lacp_rx_machine, vm, sif,
+                        LACP_RX_EVENT_LACP_ENABLED, &sif->rx_state);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/lacp/rx_machine.h b/src/plugins/lacp/rx_machine.h
new file mode 100644 (file)
index 0000000..706dbd0
--- /dev/null
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef        __LACP_RX_MACHINE_H__
+#define        __LACP_RX_MACHINE_H__
+
+#include <stdint.h>
+#include <lacp/machine.h>
+
+#define foreach_lacp_rx_event          \
+  _(0, BEGIN, "begin")                 \
+  _(1, PORT_DISABLED, "port disabled") \
+  _(2, PORT_MOVED, "port moved")       \
+  _(3, LACP_ENABLED, "lacp enabled")   \
+  _(4, LACP_DISABLED, "lacp disabled") \
+  _(5, PDU_RECEIVED, "pdu received")   \
+  _(6, TIMER_EXPIRED, "timer expired")
+
+typedef enum
+{
+#define _(a, b, c) LACP_RX_EVENT_##b = (a),
+  foreach_lacp_rx_event
+#undef _
+} lacp_rx_event_t;
+
+#define foreach_lacp_rx_sm_state       \
+  _(0, INITIALIZE, "initialize")       \
+  _(1, PORT_DISABLED, "port disabled") \
+  _(2, EXPIRED, "expired")             \
+  _(3, LACP_DISABLED, "lacp disabled") \
+  _(4, DEFAULTED, "defaulted")         \
+  _(5, CURRENT, "current")
+
+typedef enum
+{
+#define _(a, b, c) LACP_RX_STATE_##b = (a),
+  foreach_lacp_rx_sm_state
+#undef _
+} lacp_rx_sm_state_t;
+
+extern lacp_machine_t lacp_rx_machine;
+
+int lacp_rx_action_initialize (void *, void *);
+int lacp_rx_action_port_disabled (void *, void *);
+int lacp_rx_action_pdu_received (void *, void *);
+int lacp_rx_action_expired (void *, void *);
+int lacp_rx_action_lacp_disabled (void *, void *);
+int lacp_rx_action_defaulted (void *, void *);
+int lacp_rx_action_current (void *, void *);
+void lacp_rx_debug_func (slave_if_t * sif, int event, int state,
+                        lacp_fsm_state_t * transition);
+
+#define LACP_ACTION_INITIALIZE \
+  LACP_ACTION_ROUTINE(lacp_rx_action_initialize)
+#define LACP_ACTION_PORT_DISABLED \
+  LACP_ACTION_ROUTINE(lacp_rx_action_port_disabled)
+#define LACP_ACTION_EXPIRED \
+  LACP_ACTION_ROUTINE(lacp_rx_action_expired)
+#define LACP_ACTION_LACP_DISABLED \
+  LACP_ACTION_ROUTINE(lacp_rx_action_lacp_disabled)
+#define LACP_ACTION_DEFAULTED LACP_ACTION_ROUTINE(lacp_rx_action_defaulted)
+#define LACP_ACTION_CURRENT LACP_ACTION_ROUTINE(lacp_rx_action_current)
+
+static inline void
+lacp_start_current_while_timer (vlib_main_t * vm, slave_if_t * sif,
+                               u8 expiration)
+{
+  sif->current_while_timer = vlib_time_now (vm) + expiration;
+}
+
+#endif /* __LACP_RX_MACHINE_H__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/lacp/selection.c b/src/plugins/lacp/selection.c
new file mode 100644 (file)
index 0000000..898b6a9
--- /dev/null
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdint.h>
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vnet/bonding/node.h>
+#include <lacp/node.h>
+
+static void
+lacp_set_port_selected (vlib_main_t * vm, slave_if_t * sif)
+{
+  /* Handle loopback port */
+  if (!memcmp (sif->partner.system, sif->actor.system, 6) &&
+      (sif->partner.key == sif->actor.key))
+    {
+      sif->loopback_port = 1;
+      sif->actor.state &= ~LACP_STATE_AGGREGATION;
+    }
+  sif->selected = LACP_PORT_SELECTED;
+
+  switch (sif->mux_state)
+    {
+    case LACP_MUX_STATE_DETACHED:
+      break;
+    case LACP_MUX_STATE_WAITING:
+      if (!sif->ready)
+       return;
+      break;
+    case LACP_MUX_STATE_ATTACHED:
+      if (!(sif->partner.state & LACP_STATE_SYNCHRONIZATION))
+       return;
+      break;
+    case LACP_MUX_STATE_COLLECTING_DISTRIBUTING:
+      break;
+    default:
+      break;
+    }
+  lacp_machine_dispatch (&lacp_mux_machine, vm, sif, LACP_MUX_EVENT_SELECTED,
+                        &sif->mux_state);
+}
+
+void
+lacp_selection_logic (vlib_main_t * vm, slave_if_t * sif)
+{
+  slave_if_t *sif2;
+  bond_if_t *bif;
+  u32 *sw_if_index;
+
+  bif = bond_get_master_by_dev_instance (sif->bif_dev_instance);
+  vec_foreach (sw_if_index, bif->slaves)
+  {
+    sif2 = bond_get_slave_by_sw_if_index (*sw_if_index);
+    if (sif2 && (sif2->actor.state & LACP_STATE_SYNCHRONIZATION) &&
+       (sif2->ready_n == 0))
+      goto out;
+  }
+
+  vec_foreach (sw_if_index, bif->slaves)
+  {
+    sif2 = bond_get_slave_by_sw_if_index (*sw_if_index);
+    if (sif2)
+      {
+       sif2->ready = 1;
+       if (sif2->selected == LACP_PORT_SELECTED)
+         lacp_machine_dispatch (&lacp_mux_machine, vm, sif2,
+                                LACP_MUX_EVENT_READY, &sif2->mux_state);
+      }
+  }
+out:
+  lacp_set_port_selected (vm, sif);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/lacp/tx_machine.c b/src/plugins/lacp/tx_machine.c
new file mode 100644 (file)
index 0000000..794b4f1
--- /dev/null
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define _GNU_SOURCE
+
+#include <vnet/bonding/node.h>
+#include <lacp/node.h>
+
+/*
+ *  LACP State = TRANSMIT
+ */
+static lacp_fsm_state_t lacp_tx_state_transmit[] = {
+  {LACP_ACTION_TRANSMIT, LACP_TX_STATE_TRANSMIT},      // event 0 BEGIN
+  {LACP_ACTION_TRANSMIT, LACP_TX_STATE_TRANSMIT},      // event 1 NTT
+};
+
+static lacp_fsm_machine_t lacp_tx_fsm_table[] = {
+  {lacp_tx_state_transmit},
+};
+
+lacp_machine_t lacp_tx_machine = {
+  lacp_tx_fsm_table,
+  lacp_tx_debug_func,
+};
+
+int
+lacp_tx_action_transmit (void *p1, void *p2)
+{
+  vlib_main_t *vm = (vlib_main_t *) p1;
+  slave_if_t *sif = (slave_if_t *) p2;
+  f64 now = vlib_time_now (vm);
+
+  if (!lacp_timer_is_running (sif->periodic_timer))
+    return 0;
+
+  // No more than 3 LACPDUs per fast interval
+  if (now <= (sif->last_lacpdu_time + 0.333))
+    return 0;
+
+  if (sif->ntt)
+    {
+      lacp_send_lacp_pdu (vm, sif);
+    }
+  sif->ntt = 0;
+
+  return 0;
+}
+
+static u8 *
+format_tx_event (u8 * s, va_list * args)
+{
+  static lacp_event_struct lacp_tx_event_array[] = {
+#define _(b, s, n) {.bit = b, .str = #s, },
+    foreach_lacp_tx_event
+#undef _
+    {.str = NULL}
+  };
+  int e = va_arg (*args, int);
+  lacp_event_struct *event_entry =
+    (lacp_event_struct *) & lacp_tx_event_array;
+
+  if (e >= (sizeof (lacp_tx_event_array) / sizeof (*event_entry)))
+    s = format (s, "Bad event %d", e);
+  else
+    s = format (s, "%s", event_entry[e].str);
+
+  return s;
+}
+
+void
+lacp_tx_debug_func (slave_if_t * sif, int event, int state,
+                   lacp_fsm_state_t * transition)
+{
+  clib_warning ("%U-TX: event %U, old state %U, new state %U",
+               format_vnet_sw_if_index_name, vnet_get_main (),
+               sif->sw_if_index, format_tx_event,
+               event, format_tx_sm_state, state, format_tx_sm_state,
+               transition->next_state);
+}
+
+void
+lacp_init_tx_machine (vlib_main_t * vm, slave_if_t * sif)
+{
+  lacp_machine_dispatch (&lacp_tx_machine, vm, sif, LACP_TX_EVENT_BEGIN,
+                        &sif->tx_state);
+  if (sif->is_passive == 0)
+    lacp_machine_dispatch (&lacp_tx_machine, vm, sif, LACP_TX_EVENT_NTT,
+                          &sif->tx_state);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/lacp/tx_machine.h b/src/plugins/lacp/tx_machine.h
new file mode 100644 (file)
index 0000000..428c19b
--- /dev/null
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef        __LACP_TX_MACHINE_H__
+#define        __LACP_TX_MACHINE_H__
+
+#include <stdint.h>
+#include <lacp/machine.h>
+
+#define foreach_lacp_tx_event          \
+  _(0, BEGIN, "begin")                 \
+  _(1, NTT, "Need To Transmit")
+
+typedef enum
+{
+#define _(a, b, c) LACP_TX_EVENT_##b = (a),
+  foreach_lacp_tx_event
+#undef _
+} lacp_tx_event_t;
+
+#define foreach_lacp_tx_sm_state       \
+  _(0, TRANSMIT, "transmit PDU")
+
+typedef enum
+{
+#define _(a, b, c) LACP_TX_STATE_##b = (a),
+  foreach_lacp_tx_sm_state
+#undef _
+} lacp_tx_sm_state_t;
+
+extern lacp_machine_t lacp_tx_machine;
+
+int lacp_tx_action_transmit (void *p1, void *p2);
+void lacp_tx_debug_func (slave_if_t * sif, int event, int state,
+                        lacp_fsm_state_t * transition);
+
+#define LACP_ACTION_TRANSMIT LACP_ACTION_ROUTINE(lacp_tx_action_transmit)
+
+#endif /* __LACP_TX_MACHINE_H__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
index 97e67f9..019d095 100644 (file)
@@ -50,7 +50,7 @@
 #include <vnet/policer/police.h>
 #include <vnet/mfib/mfib_types.h>
 #include <vnet/dhcp/dhcp_proxy.h>
-
+#include <vnet/bonding/node.h>
 #include "vat/json_format.h"
 
 #include <inttypes.h>
@@ -1775,6 +1775,275 @@ static void vl_api_tap_delete_v2_reply_t_handler_json
   vam->result_ready = 1;
 }
 
+static void
+vl_api_bond_create_reply_t_handler (vl_api_bond_create_reply_t * mp)
+{
+  vat_main_t *vam = &vat_main;
+  i32 retval = ntohl (mp->retval);
+
+  if (vam->async_mode)
+    {
+      vam->async_errors += (retval < 0);
+    }
+  else
+    {
+      vam->retval = retval;
+      vam->sw_if_index = ntohl (mp->sw_if_index);
+      vam->result_ready = 1;
+    }
+}
+
+static void vl_api_bond_create_reply_t_handler_json
+  (vl_api_bond_create_reply_t * mp)
+{
+  vat_main_t *vam = &vat_main;
+  vat_json_node_t node;
+
+  vat_json_init_object (&node);
+  vat_json_object_add_int (&node, "retval", ntohl (mp->retval));
+  vat_json_object_add_uint (&node, "sw_if_index", ntohl (mp->sw_if_index));
+
+  vat_json_print (vam->ofp, &node);
+  vat_json_free (&node);
+
+  vam->retval = ntohl (mp->retval);
+  vam->result_ready = 1;
+}
+
+static void
+vl_api_bond_delete_reply_t_handler (vl_api_bond_delete_reply_t * mp)
+{
+  vat_main_t *vam = &vat_main;
+  i32 retval = ntohl (mp->retval);
+
+  if (vam->async_mode)
+    {
+      vam->async_errors += (retval < 0);
+    }
+  else
+    {
+      vam->retval = retval;
+      vam->result_ready = 1;
+    }
+}
+
+static void vl_api_bond_delete_reply_t_handler_json
+  (vl_api_bond_delete_reply_t * mp)
+{
+  vat_main_t *vam = &vat_main;
+  vat_json_node_t node;
+
+  vat_json_init_object (&node);
+  vat_json_object_add_int (&node, "retval", ntohl (mp->retval));
+
+  vat_json_print (vam->ofp, &node);
+  vat_json_free (&node);
+
+  vam->retval = ntohl (mp->retval);
+  vam->result_ready = 1;
+}
+
+static void
+vl_api_bond_enslave_reply_t_handler (vl_api_bond_enslave_reply_t * mp)
+{
+  vat_main_t *vam = &vat_main;
+  i32 retval = ntohl (mp->retval);
+
+  if (vam->async_mode)
+    {
+      vam->async_errors += (retval < 0);
+    }
+  else
+    {
+      vam->retval = retval;
+      vam->result_ready = 1;
+    }
+}
+
+static void vl_api_bond_enslave_reply_t_handler_json
+  (vl_api_bond_enslave_reply_t * mp)
+{
+  vat_main_t *vam = &vat_main;
+  vat_json_node_t node;
+
+  vat_json_init_object (&node);
+  vat_json_object_add_int (&node, "retval", ntohl (mp->retval));
+
+  vat_json_print (vam->ofp, &node);
+  vat_json_free (&node);
+
+  vam->retval = ntohl (mp->retval);
+  vam->result_ready = 1;
+}
+
+static void
+vl_api_bond_detach_slave_reply_t_handler (vl_api_bond_detach_slave_reply_t *
+                                         mp)
+{
+  vat_main_t *vam = &vat_main;
+  i32 retval = ntohl (mp->retval);
+
+  if (vam->async_mode)
+    {
+      vam->async_errors += (retval < 0);
+    }
+  else
+    {
+      vam->retval = retval;
+      vam->result_ready = 1;
+    }
+}
+
+static void vl_api_bond_detach_slave_reply_t_handler_json
+  (vl_api_bond_detach_slave_reply_t * mp)
+{
+  vat_main_t *vam = &vat_main;
+  vat_json_node_t node;
+
+  vat_json_init_object (&node);
+  vat_json_object_add_int (&node, "retval", ntohl (mp->retval));
+
+  vat_json_print (vam->ofp, &node);
+  vat_json_free (&node);
+
+  vam->retval = ntohl (mp->retval);
+  vam->result_ready = 1;
+}
+
+static void vl_api_sw_interface_bond_details_t_handler
+  (vl_api_sw_interface_bond_details_t * mp)
+{
+  vat_main_t *vam = &vat_main;
+
+  print (vam->ofp,
+        "%-16s %-12d %-12U %-13U %-14u %-14u",
+        mp->interface_name, ntohl (mp->sw_if_index),
+        format_bond_mode, mp->mode, format_bond_load_balance, mp->lb,
+        ntohl (mp->active_slaves), ntohl (mp->slaves));
+}
+
+static void vl_api_sw_interface_bond_details_t_handler_json
+  (vl_api_sw_interface_bond_details_t * mp)
+{
+  vat_main_t *vam = &vat_main;
+  vat_json_node_t *node = NULL;
+
+  if (VAT_JSON_ARRAY != vam->json_tree.type)
+    {
+      ASSERT (VAT_JSON_NONE == vam->json_tree.type);
+      vat_json_init_array (&vam->json_tree);
+    }
+  node = vat_json_array_add (&vam->json_tree);
+
+  vat_json_init_object (node);
+  vat_json_object_add_uint (node, "sw_if_index", ntohl (mp->sw_if_index));
+  vat_json_object_add_string_copy (node, "interface_name",
+                                  mp->interface_name);
+  vat_json_object_add_uint (node, "mode", mp->mode);
+  vat_json_object_add_uint (node, "load_balance", mp->lb);
+  vat_json_object_add_uint (node, "active_slaves", ntohl (mp->active_slaves));
+  vat_json_object_add_uint (node, "slaves", ntohl (mp->slaves));
+}
+
+static int
+api_sw_interface_bond_dump (vat_main_t * vam)
+{
+  vl_api_sw_interface_bond_dump_t *mp;
+  vl_api_control_ping_t *mp_ping;
+  int ret;
+
+  print (vam->ofp,
+        "\n%-16s %-12s %-12s %-13s %-14s %-14s",
+        "interface name", "sw_if_index", "mode", "load balance",
+        "active slaves", "slaves");
+
+  /* Get list of bond interfaces */
+  M (SW_INTERFACE_BOND_DUMP, mp);
+  S (mp);
+
+  /* Use a control ping for synchronization */
+  MPING (CONTROL_PING, mp_ping);
+  S (mp_ping);
+
+  W (ret);
+  return ret;
+}
+
+static void vl_api_sw_interface_slave_details_t_handler
+  (vl_api_sw_interface_slave_details_t * mp)
+{
+  vat_main_t *vam = &vat_main;
+
+  print (vam->ofp,
+        "%-25s %-12d %-12d %d", mp->interface_name,
+        ntohl (mp->sw_if_index), mp->is_passive, mp->is_long_timeout);
+}
+
+static void vl_api_sw_interface_slave_details_t_handler_json
+  (vl_api_sw_interface_slave_details_t * mp)
+{
+  vat_main_t *vam = &vat_main;
+  vat_json_node_t *node = NULL;
+
+  if (VAT_JSON_ARRAY != vam->json_tree.type)
+    {
+      ASSERT (VAT_JSON_NONE == vam->json_tree.type);
+      vat_json_init_array (&vam->json_tree);
+    }
+  node = vat_json_array_add (&vam->json_tree);
+
+  vat_json_init_object (node);
+  vat_json_object_add_uint (node, "sw_if_index", ntohl (mp->sw_if_index));
+  vat_json_object_add_string_copy (node, "interface_name",
+                                  mp->interface_name);
+  vat_json_object_add_uint (node, "passive", mp->is_passive);
+  vat_json_object_add_uint (node, "long_timeout", mp->is_long_timeout);
+}
+
+static int
+api_sw_interface_slave_dump (vat_main_t * vam)
+{
+  unformat_input_t *i = vam->input;
+  vl_api_sw_interface_slave_dump_t *mp;
+  vl_api_control_ping_t *mp_ping;
+  u32 sw_if_index = ~0;
+  u8 sw_if_index_set = 0;
+  int ret;
+
+  /* Parse args required to build the message */
+  while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+       sw_if_index_set = 1;
+      else if (unformat (i, "sw_if_index %d", &sw_if_index))
+       sw_if_index_set = 1;
+      else
+       break;
+    }
+
+  if (sw_if_index_set == 0)
+    {
+      errmsg ("missing vpp interface name. ");
+      return -99;
+    }
+
+  print (vam->ofp,
+        "\n%-25s %-12s %-12s %s",
+        "slave interface name", "sw_if_index", "passive", "long_timeout");
+
+  /* Get list of bond interfaces */
+  M (SW_INTERFACE_SLAVE_DUMP, mp);
+  mp->sw_if_index = ntohl (sw_if_index);
+  S (mp);
+
+  /* Use a control ping for synchronization */
+  MPING (CONTROL_PING, mp_ping);
+  S (mp_ping);
+
+  W (ret);
+  return ret;
+}
+
 static void vl_api_mpls_tunnel_add_del_reply_t_handler
   (vl_api_mpls_tunnel_add_del_reply_t * mp)
 {
@@ -5466,6 +5735,12 @@ _(SW_INTERFACE_TAP_DETAILS, sw_interface_tap_details)                   \
 _(TAP_CREATE_V2_REPLY, tap_create_v2_reply)                            \
 _(TAP_DELETE_V2_REPLY, tap_delete_v2_reply)                            \
 _(SW_INTERFACE_TAP_V2_DETAILS, sw_interface_tap_v2_details)             \
+_(BOND_CREATE_REPLY, bond_create_reply)                                        \
+_(BOND_DELETE_REPLY, bond_delete_reply)                                        \
+_(BOND_ENSLAVE_REPLY, bond_enslave_reply)                              \
+_(BOND_DETACH_SLAVE_REPLY, bond_detach_slave_reply)                    \
+_(SW_INTERFACE_BOND_DETAILS, sw_interface_bond_details)                 \
+_(SW_INTERFACE_SLAVE_DETAILS, sw_interface_slave_details)               \
 _(IP_ADD_DEL_ROUTE_REPLY, ip_add_del_route_reply)                      \
 _(IP_TABLE_ADD_DEL_REPLY, ip_table_add_del_reply)                      \
 _(IP_MROUTE_ADD_DEL_REPLY, ip_mroute_add_del_reply)                    \
@@ -7953,6 +8228,194 @@ api_tap_delete_v2 (vat_main_t * vam)
   return ret;
 }
 
+static int
+api_bond_create (vat_main_t * vam)
+{
+  unformat_input_t *i = vam->input;
+  vl_api_bond_create_t *mp;
+  u8 mac_address[6];
+  u8 custom_mac = 0;
+  int ret;
+  u8 mode;
+  u8 lb;
+  u8 mode_is_set = 0;
+
+  memset (mac_address, 0, sizeof (mac_address));
+  lb = BOND_LB_L2;
+
+  /* Parse args required to build the message */
+  while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (i, "mode %U", unformat_bond_mode, &mode))
+       mode_is_set = 1;
+      else if (((mode == BOND_MODE_LACP) || (mode == BOND_MODE_XOR))
+              && unformat (i, "lb %U", unformat_bond_load_balance, &lb))
+       ;
+      else if (unformat (i, "hw-addr %U", unformat_ethernet_address,
+                        mac_address))
+       custom_mac = 1;
+      else
+       break;
+    }
+
+  if (mode_is_set == 0)
+    {
+      errmsg ("Missing bond mode. ");
+      return -99;
+    }
+
+  /* Construct the API message */
+  M (BOND_CREATE, mp);
+
+  mp->use_custom_mac = custom_mac;
+
+  mp->mode = mode;
+  mp->lb = lb;
+
+  if (custom_mac)
+    clib_memcpy (mp->mac_address, mac_address, 6);
+
+  /* send it... */
+  S (mp);
+
+  /* Wait for a reply... */
+  W (ret);
+  return ret;
+}
+
+static int
+api_bond_delete (vat_main_t * vam)
+{
+  unformat_input_t *i = vam->input;
+  vl_api_bond_delete_t *mp;
+  u32 sw_if_index = ~0;
+  u8 sw_if_index_set = 0;
+  int ret;
+
+  /* Parse args required to build the message */
+  while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+       sw_if_index_set = 1;
+      else if (unformat (i, "sw_if_index %d", &sw_if_index))
+       sw_if_index_set = 1;
+      else
+       break;
+    }
+
+  if (sw_if_index_set == 0)
+    {
+      errmsg ("missing vpp interface name. ");
+      return -99;
+    }
+
+  /* Construct the API message */
+  M (BOND_DELETE, mp);
+
+  mp->sw_if_index = ntohl (sw_if_index);
+
+  /* send it... */
+  S (mp);
+
+  /* Wait for a reply... */
+  W (ret);
+  return ret;
+}
+
+static int
+api_bond_enslave (vat_main_t * vam)
+{
+  unformat_input_t *i = vam->input;
+  vl_api_bond_enslave_t *mp;
+  u32 bond_sw_if_index;
+  int ret;
+  u8 is_passive;
+  u8 is_long_timeout;
+  u32 bond_sw_if_index_is_set = 0;
+  u32 sw_if_index;
+  u8 sw_if_index_is_set = 0;
+
+  /* Parse args required to build the message */
+  while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (i, "sw_if_index %d", &sw_if_index))
+       sw_if_index_is_set = 1;
+      else if (unformat (i, "bond %u", &bond_sw_if_index))
+       bond_sw_if_index_is_set = 1;
+      else if (unformat (i, "passive %d", &is_passive))
+       ;
+      else if (unformat (i, "long-timeout %d", &is_long_timeout))
+       ;
+      else
+       break;
+    }
+
+  if (bond_sw_if_index_is_set == 0)
+    {
+      errmsg ("Missing bond sw_if_index. ");
+      return -99;
+    }
+  if (sw_if_index_is_set == 0)
+    {
+      errmsg ("Missing slave sw_if_index. ");
+      return -99;
+    }
+
+  /* Construct the API message */
+  M (BOND_ENSLAVE, mp);
+
+  mp->bond_sw_if_index = ntohl (bond_sw_if_index);
+  mp->sw_if_index = ntohl (sw_if_index);
+  mp->is_long_timeout = is_long_timeout;
+  mp->is_passive = is_passive;
+
+  /* send it... */
+  S (mp);
+
+  /* Wait for a reply... */
+  W (ret);
+  return ret;
+}
+
+static int
+api_bond_detach_slave (vat_main_t * vam)
+{
+  unformat_input_t *i = vam->input;
+  vl_api_bond_detach_slave_t *mp;
+  u32 sw_if_index = ~0;
+  u8 sw_if_index_set = 0;
+  int ret;
+
+  /* Parse args required to build the message */
+  while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+       sw_if_index_set = 1;
+      else if (unformat (i, "sw_if_index %d", &sw_if_index))
+       sw_if_index_set = 1;
+      else
+       break;
+    }
+
+  if (sw_if_index_set == 0)
+    {
+      errmsg ("missing vpp interface name. ");
+      return -99;
+    }
+
+  /* Construct the API message */
+  M (BOND_DETACH_SLAVE, mp);
+
+  mp->sw_if_index = ntohl (sw_if_index);
+
+  /* send it... */
+  S (mp);
+
+  /* Wait for a reply... */
+  W (ret);
+  return ret;
+}
+
 static int
 api_ip_table_add_del (vat_main_t * vam)
 {
@@ -22782,6 +23245,18 @@ _(tap_create_v2,                                                        \
 _(tap_delete_v2,                                                        \
   "<vpp-if-name> | sw_if_index <id>")                                   \
 _(sw_interface_tap_v2_dump, "")                                         \
+_(bond_create,                                                          \
+  "[hw-addr <mac-addr>] {round-robin | active-backup | "                \
+  "broadcast | {lacp | xor} [load-balance { l2 | l23 | l34 }]}")        \
+_(bond_delete,                                                          \
+  "<vpp-if-name> | sw_if_index <id>")                                   \
+_(bond_enslave,                                                         \
+  "sw_if_index <n> bond <sw_if_index> [is_passive] [is_long_timeout]") \
+_(bond_detach_slave,                                                    \
+  "sw_if_index <n>")                                                   \
+_(sw_interface_bond_dump, "")                                           \
+_(sw_interface_slave_dump,                                              \
+  "<vpp-if-name> | sw_if_index <id>")                                   \
 _(ip_table_add_del,                                                     \
   "table-id <n> [ipv6]\n")                                              \
 _(ip_add_del_route,                                                     \
index d201241..a58bdca 100644 (file)
@@ -272,6 +272,21 @@ nobase_include_HEADERS +=                  \
 
 API_FILES += vnet/geneve/geneve.api
 
+########################################
+# Layer 2 / Bonding
+########################################
+libvnet_la_SOURCES +=                          \
+  vnet/bonding/cli.c                           \
+  vnet/bonding/node.c                          \
+  vnet/bonding/device.c                        \
+  vnet/bonding/bond_api.c
+
+nobase_include_HEADERS +=                      \
+  vnet/bonding/node.h                           \
+  vnet/bonding/bond.api.h
+
+API_FILES += vnet/bonding/bond.api
+
 ########################################
 # Layer 2 / LLDP
 ########################################
diff --git a/src/vnet/bonding/bond.api b/src/vnet/bonding/bond.api
new file mode 100644 (file)
index 0000000..e8919e1
--- /dev/null
@@ -0,0 +1,163 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** \file
+
+    This file defines vpe control-plane API messages for
+    the bonding device driver
+*/
+
+option version = "1.0.0";
+
+/** \brief Initialize a new bond interface with the given paramters
+    @param client_index - opaque cookie to identify the sender
+    @param context - sender context, to match reply w/ request
+    @param use_custom_mac - if set, mac_address is valid
+    @param mac_address - mac addr to assign to the interface if use_custom_mac is set
+    @param mode - mode, required (1=round-robin, 2=active-backup, 3=xor, 4=broadcastcast, 5=lacp)
+    @param lb - load balance, optional (0=l2, 1=l34, 2=l23) valid for xor and lacp modes. Otherwise ignored
+*/
+define bond_create
+{
+  u32 client_index;
+  u32 context;
+  u8 use_custom_mac;
+  u8 mac_address[6];
+  u8 mode;
+  u8 lb;
+};
+
+/** \brief Reply for bond create reply
+    @param context - returned sender context, to match reply w/ request
+    @param retval - return code
+    @param sw_if_index - software index allocated for the new tap interface
+*/
+define bond_create_reply
+{
+  u32 context;
+  i32 retval;
+  u32 sw_if_index;
+};
+
+/** \brief Delete bond interface
+    @param client_index - opaque cookie to identify the sender
+    @param context - sender context, to match reply w/ request
+    @param sw_if_index - interface index of slave interface
+*/
+autoreply define bond_delete
+{
+  u32 client_index;
+  u32 context;
+  u32 sw_if_index;
+};
+
+/** \brief Initialize a new bond interface with the given paramters
+    @param client_index - opaque cookie to identify the sender
+    @param context - sender context, to match reply w/ request
+    @param sw_if_index - slave sw_if_index
+    @param bond_sw_if_index - bond sw_if_index
+    @param is_passive - interface does not initiate the lacp protocol, remote must be active speaker
+    @param is_long_timeout - 90 seconds vs default 3 seconds neighbor timeout
+*/
+define bond_enslave
+{
+  u32 client_index;
+  u32 context;
+  u32 sw_if_index;
+  u32 bond_sw_if_index;
+  u8 is_passive;
+  u8 is_long_timeout;
+};
+
+/** \brief Reply for bond enslave reply
+    @param context - returned sender context, to match reply w/ request
+    @param retval - return code
+*/
+define bond_enslave_reply
+{
+  u32 context;
+  i32 retval;
+};
+
+/** \brief bond detach slave
+    @param client_index - opaque cookie to identify the sender
+    @param context - sender context, to match reply w/ request
+    @param sw_if_index - interface index of slave interface
+*/
+autoreply define bond_detach_slave
+{
+  u32 client_index;
+  u32 context;
+  u32 sw_if_index;
+};
+
+/** \brief Dump bond interfaces request */
+define sw_interface_bond_dump
+{
+  u32 client_index;
+  u32 context;
+};
+
+/** \brief Reply for bond dump request
+    @param sw_if_index - software index of bond interface
+    @param interface_name - name of interface
+    @param mode - bonding mode
+    @param lb - load balance algo
+    @param active_slaves - active slaves count
+    @param slaves - config slave count
+*/
+define sw_interface_bond_details
+{
+  u32 context;
+  u32 sw_if_index;
+  u8 interface_name[64];
+  u8 mode;
+  u8 lb;
+  u32 active_slaves;
+  u32 slaves;
+};
+
+/** \brief bond slave dump
+    @param client_index - opaque cookie to identify the sender
+    @param context - sender context, to match reply w/ request
+    @param sw_if_index - interface index of bond interface
+*/
+define sw_interface_slave_dump
+{
+  u32 client_index;
+  u32 context;
+  u32 sw_if_index;
+};
+
+/** \brief Reply for slave dump request
+    @param sw_if_index - software index of slave interface
+    @param interface_name - name of interface
+    @param is_passve - interface does not initiate the lacp protocol, remote must be active speaker
+    @param is_long_timeout - 90 seconds vs default 3 seconds neighbor timeout
+*/
+define sw_interface_slave_details
+{
+  u32 context;
+  u32 sw_if_index;
+  u8 interface_name[64];
+  u8 is_passive;
+  u8 is_long_timeout;
+};
+
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/bonding/bond_api.c b/src/vnet/bonding/bond_api.c
new file mode 100644 (file)
index 0000000..02536e9
--- /dev/null
@@ -0,0 +1,328 @@
+/*
+ *------------------------------------------------------------------
+ * bond_api.c - vnet bonding device driver API support
+ *
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+
+#include <vnet/interface.h>
+#include <vnet/api_errno.h>
+#include <vnet/ethernet/ethernet.h>
+
+#include <vnet/vnet_msg_enum.h>
+
+#define vl_typedefs            /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_typedefs
+
+#define vl_endianfun           /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <vnet/vnet_all_api_h.h>
+#undef vl_printfun
+
+#include <vlibapi/api_helper_macros.h>
+#include <vnet/bonding/node.h>
+
+#define foreach_bond_api_msg                     \
+_(BOND_CREATE, bond_create)                      \
+_(BOND_DELETE, bond_delete)                      \
+_(BOND_ENSLAVE, bond_enslave)                    \
+_(BOND_DETACH_SLAVE, bond_detach_slave)          \
+_(SW_INTERFACE_BOND_DUMP, sw_interface_bond_dump)\
+_(SW_INTERFACE_SLAVE_DUMP, sw_interface_slave_dump)
+
+static void
+bond_send_sw_interface_event_deleted (vpe_api_main_t * am,
+                                     unix_shared_memory_queue_t * q,
+                                     u32 sw_if_index)
+{
+  vl_api_sw_interface_event_t *mp;
+
+  mp = vl_msg_api_alloc (sizeof (*mp));
+  memset (mp, 0, sizeof (*mp));
+  mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_EVENT);
+  mp->sw_if_index = ntohl (sw_if_index);
+
+  mp->admin_up_down = 0;
+  mp->link_up_down = 0;
+  mp->deleted = 1;
+  vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+static void
+vl_api_bond_delete_t_handler (vl_api_bond_delete_t * mp)
+{
+  vlib_main_t *vm = vlib_get_main ();
+  int rv;
+  vpe_api_main_t *vam = &vpe_api_main;
+  vl_api_bond_delete_reply_t *rmp;
+  unix_shared_memory_queue_t *q;
+  u32 sw_if_index = ntohl (mp->sw_if_index);
+
+  rv = bond_delete_if (vm, sw_if_index);
+
+  q = vl_api_client_index_to_input_queue (mp->client_index);
+  if (!q)
+    return;
+
+  rmp = vl_msg_api_alloc (sizeof (*rmp));
+  rmp->_vl_msg_id = ntohs (VL_API_BOND_DELETE_REPLY);
+  rmp->context = mp->context;
+  rmp->retval = ntohl (rv);
+
+  vl_msg_api_send_shmem (q, (u8 *) & rmp);
+
+  if (!rv)
+    bond_send_sw_interface_event_deleted (vam, q, sw_if_index);
+}
+
+static void
+vl_api_bond_create_t_handler (vl_api_bond_create_t * mp)
+{
+  vlib_main_t *vm = vlib_get_main ();
+  vl_api_bond_create_reply_t *rmp;
+  unix_shared_memory_queue_t *q;
+  bond_create_if_args_t _a, *ap = &_a;
+
+  memset (ap, 0, sizeof (*ap));
+
+  if (mp->use_custom_mac)
+    {
+      clib_memcpy (ap->hw_addr, mp->mac_address, 6);
+      ap->hw_addr_set = 1;
+    }
+
+  ap->mode = mp->mode;
+  ap->lb = mp->lb;
+  bond_create_if (vm, ap);
+
+  q = vl_api_client_index_to_input_queue (mp->client_index);
+  if (!q)
+    return;
+
+  if (ap->rv != 0)
+    return;
+  rmp = vl_msg_api_alloc (sizeof (*rmp));
+  rmp->_vl_msg_id = ntohs (VL_API_BOND_CREATE_REPLY);
+  rmp->context = mp->context;
+  rmp->retval = ntohl (ap->rv);
+  rmp->sw_if_index = ntohl (ap->sw_if_index);
+
+  vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+vl_api_bond_enslave_t_handler (vl_api_bond_enslave_t * mp)
+{
+  vlib_main_t *vm = vlib_get_main ();
+  vl_api_bond_enslave_reply_t *rmp;
+  unix_shared_memory_queue_t *q;
+  bond_enslave_args_t _a, *ap = &_a;
+
+  memset (ap, 0, sizeof (*ap));
+
+  ap->group = ntohl (mp->bond_sw_if_index);
+  ap->slave = ntohl (mp->sw_if_index);
+  ap->is_passive = mp->is_passive;
+  ap->is_long_timeout = mp->is_long_timeout;
+
+  bond_enslave (vm, ap);
+
+  q = vl_api_client_index_to_input_queue (mp->client_index);
+  if (!q)
+    return;
+
+  rmp = vl_msg_api_alloc (sizeof (*rmp));
+  rmp->_vl_msg_id = ntohs (VL_API_BOND_ENSLAVE_REPLY);
+  rmp->context = mp->context;
+  rmp->retval = ntohl (ap->rv);
+
+  vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+vl_api_bond_detach_slave_t_handler (vl_api_bond_detach_slave_t * mp)
+{
+  vlib_main_t *vm = vlib_get_main ();
+  vl_api_bond_detach_slave_reply_t *rmp;
+  unix_shared_memory_queue_t *q;
+  bond_detach_slave_args_t _a, *ap = &_a;
+
+  memset (ap, 0, sizeof (*ap));
+
+  ap->slave = ntohl (mp->sw_if_index);
+  bond_detach_slave (vm, ap);
+
+  q = vl_api_client_index_to_input_queue (mp->client_index);
+  if (!q)
+    return;
+
+  rmp = vl_msg_api_alloc (sizeof (*rmp));
+  rmp->_vl_msg_id = ntohs (VL_API_BOND_DETACH_SLAVE_REPLY);
+  rmp->context = mp->context;
+  rmp->retval = htonl (ap->rv);
+
+  vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+bond_send_sw_interface_details (vpe_api_main_t * am,
+                               vl_api_registration_t * reg,
+                               bond_interface_details_t * bond_if,
+                               u32 context)
+{
+  vl_api_sw_interface_bond_details_t *mp;
+
+  mp = vl_msg_api_alloc (sizeof (*mp));
+  memset (mp, 0, sizeof (*mp));
+  mp->_vl_msg_id = htons (VL_API_SW_INTERFACE_BOND_DETAILS);
+  mp->sw_if_index = htonl (bond_if->sw_if_index);
+  clib_memcpy (mp->interface_name, bond_if->interface_name,
+              MIN (ARRAY_LEN (mp->interface_name) - 1,
+                   strlen ((const char *) bond_if->interface_name)));
+  mp->mode = bond_if->mode;
+  mp->lb = bond_if->lb;
+  mp->active_slaves = htonl (bond_if->active_slaves);
+  mp->slaves = htonl (bond_if->slaves);
+
+  mp->context = context;
+  vl_api_send_msg (reg, (u8 *) mp);
+}
+
+static void
+vl_api_sw_interface_bond_dump_t_handler (vl_api_sw_interface_bond_dump_t * mp)
+{
+  int rv;
+  vpe_api_main_t *am = &vpe_api_main;
+  vl_api_registration_t *reg;
+  bond_interface_details_t *bondifs = NULL;
+  bond_interface_details_t *bond_if = NULL;
+
+  reg = vl_api_client_index_to_registration (mp->client_index);
+  if (!reg)
+    return;
+
+  rv = bond_dump_ifs (&bondifs);
+  if (rv)
+    return;
+
+  vec_foreach (bond_if, bondifs)
+  {
+    bond_send_sw_interface_details (am, reg, bond_if, mp->context);
+  }
+
+  vec_free (bondifs);
+}
+
+static void
+bond_send_sw_interface_slave_details (vpe_api_main_t * am,
+                                     vl_api_registration_t * reg,
+                                     slave_interface_details_t * slave_if,
+                                     u32 context)
+{
+  vl_api_sw_interface_slave_details_t *mp;
+
+  mp = vl_msg_api_alloc (sizeof (*mp));
+  memset (mp, 0, sizeof (*mp));
+  mp->_vl_msg_id = htons (VL_API_SW_INTERFACE_SLAVE_DETAILS);
+  mp->sw_if_index = htonl (slave_if->sw_if_index);
+  clib_memcpy (mp->interface_name, slave_if->interface_name,
+              MIN (ARRAY_LEN (mp->interface_name) - 1,
+                   strlen ((const char *) slave_if->interface_name)));
+  mp->is_passive = slave_if->is_passive;
+  mp->is_long_timeout = slave_if->is_long_timeout;
+
+  mp->context = context;
+  vl_api_send_msg (reg, (u8 *) mp);
+}
+
+static void
+vl_api_sw_interface_slave_dump_t_handler (vl_api_sw_interface_slave_dump_t *
+                                         mp)
+{
+  int rv;
+  vpe_api_main_t *am = &vpe_api_main;
+  vl_api_registration_t *reg;
+  slave_interface_details_t *slaveifs = NULL;
+  slave_interface_details_t *slave_if = NULL;
+
+  reg = vl_api_client_index_to_registration (mp->client_index);
+  if (!reg)
+    return;
+
+  rv = bond_dump_slave_ifs (&slaveifs, ntohl (mp->sw_if_index));
+  if (rv)
+    return;
+
+  vec_foreach (slave_if, slaveifs)
+  {
+    bond_send_sw_interface_slave_details (am, reg, slave_if, mp->context);
+  }
+
+  vec_free (slaveifs);
+}
+
+#define vl_msg_name_crc_list
+#include <vnet/vnet_all_api_h.h>
+#undef vl_msg_name_crc_list
+
+static void
+bond_setup_message_id_table (api_main_t * am)
+{
+#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id);
+  foreach_vl_msg_name_crc_bond;
+#undef _
+}
+
+static clib_error_t *
+bond_api_hookup (vlib_main_t * vm)
+{
+  api_main_t *am = &api_main;
+
+#define _(N,n)                                                  \
+    vl_msg_api_set_handlers(VL_API_##N, #n,                     \
+                           vl_api_##n##_t_handler,              \
+                           vl_noop_handler,                     \
+                           vl_api_##n##_t_endian,               \
+                           vl_api_##n##_t_print,                \
+                           sizeof(vl_api_##n##_t), 1);
+  foreach_bond_api_msg;
+#undef _
+
+  /*
+   * Set up the (msg_name, crc, message-id) table
+   */
+  bond_setup_message_id_table (am);
+
+  return 0;
+}
+
+VLIB_API_INIT_FUNCTION (bond_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/bonding/cli.c b/src/vnet/bonding/cli.c
new file mode 100644 (file)
index 0000000..b2d66f9
--- /dev/null
@@ -0,0 +1,706 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <stdint.h>
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/bonding/node.h>
+
+void
+bond_disable_collecting_distributing (vlib_main_t * vm, slave_if_t * sif)
+{
+  bond_if_t *bif;
+  int i;
+  uword p;
+
+  bif = bond_get_master_by_dev_instance (sif->bif_dev_instance);
+  vec_foreach_index (i, bif->active_slaves)
+  {
+    p = *vec_elt_at_index (bif->active_slaves, i);
+    if (p == sif->sw_if_index)
+      {
+       vec_del1 (bif->active_slaves, i);
+       hash_unset (bif->active_slave_by_sw_if_index, sif->sw_if_index);
+       break;
+      }
+  }
+}
+
+void
+bond_enable_collecting_distributing (vlib_main_t * vm, slave_if_t * sif)
+{
+  bond_if_t *bif;
+
+  bif = bond_get_master_by_dev_instance (sif->bif_dev_instance);
+  if (!hash_get (bif->active_slave_by_sw_if_index, sif->sw_if_index))
+    {
+      hash_set (bif->active_slave_by_sw_if_index, sif->sw_if_index,
+               sif->sw_if_index);
+      vec_add1 (bif->active_slaves, sif->sw_if_index);
+    }
+}
+
+int
+bond_dump_ifs (bond_interface_details_t ** out_bondifs)
+{
+  vnet_main_t *vnm = vnet_get_main ();
+  bond_main_t *bm = &bond_main;
+  bond_if_t *bif;
+  vnet_hw_interface_t *hi;
+  bond_interface_details_t *r_bondifs = NULL;
+  bond_interface_details_t *bondif = NULL;
+
+  /* *INDENT-OFF* */
+  pool_foreach (bif, bm->interfaces,
+    vec_add2(r_bondifs, bondif, 1);
+    memset (bondif, 0, sizeof (*bondif));
+    bondif->sw_if_index = bif->sw_if_index;
+    hi = vnet_get_hw_interface (vnm, bif->hw_if_index);
+    clib_memcpy(bondif->interface_name, hi->name,
+                MIN (ARRAY_LEN (bondif->interface_name) - 1,
+                     strlen ((const char *) hi->name)));
+    bondif->mode = bif->mode;
+    bondif->lb = bif->lb;
+    bondif->active_slaves = vec_len (bif->active_slaves);
+    bondif->slaves = vec_len (bif->slaves);
+  );
+  /* *INDENT-ON* */
+
+  *out_bondifs = r_bondifs;
+
+  return 0;
+}
+
+int
+bond_dump_slave_ifs (slave_interface_details_t ** out_slaveifs,
+                    u32 bond_sw_if_index)
+{
+  vnet_main_t *vnm = vnet_get_main ();
+  bond_if_t *bif;
+  vnet_hw_interface_t *hi;
+  vnet_sw_interface_t *sw;
+  slave_interface_details_t *r_slaveifs = NULL;
+  slave_interface_details_t *slaveif = NULL;
+  u32 *sw_if_index = NULL;
+  slave_if_t *sif;
+
+  bif = bond_get_master_by_sw_if_index (bond_sw_if_index);
+  if (!bif)
+    return 1;
+
+  vec_foreach (sw_if_index, bif->slaves)
+  {
+    vec_add2 (r_slaveifs, slaveif, 1);
+    memset (slaveif, 0, sizeof (*slaveif));
+    sif = bond_get_slave_by_sw_if_index (*sw_if_index);
+    if (sif)
+      {
+       sw = vnet_get_sw_interface (vnm, sif->sw_if_index);
+       hi = vnet_get_hw_interface (vnm, sw->hw_if_index);
+       clib_memcpy (slaveif->interface_name, hi->name,
+                    MIN (ARRAY_LEN (slaveif->interface_name) - 1,
+                         strlen ((const char *) hi->name)));
+       slaveif->sw_if_index = sif->sw_if_index;
+       slaveif->is_passive = sif->is_passive;
+       slaveif->is_long_timeout = sif->is_long_timeout;
+      }
+  }
+  *out_slaveifs = r_slaveifs;
+
+  return 0;
+}
+
+static void
+bond_delete_neighbor (vlib_main_t * vm, bond_if_t * bif, slave_if_t * sif)
+{
+  bond_main_t *bm = &bond_main;
+  vnet_main_t *vnm = vnet_get_main ();
+  int i;
+  vnet_hw_interface_t *hw;
+
+  bif->port_number_bitmap =
+    clib_bitmap_set (bif->port_number_bitmap,
+                    ntohs (sif->actor_admin.port_number) - 1, 0);
+  hash_unset (bm->neighbor_by_sw_if_index, sif->sw_if_index);
+  vec_free (sif->last_marker_pkt);
+  vec_free (sif->last_rx_pkt);
+  vec_foreach_index (i, bif->slaves)
+  {
+    uword p = *vec_elt_at_index (bif->slaves, i);
+    if (p == sif->sw_if_index)
+      {
+       vec_del1 (bif->slaves, i);
+       break;
+      }
+  }
+
+  bond_disable_collecting_distributing (vm, sif);
+
+  /* Put back the old mac */
+  hw = vnet_get_sup_hw_interface (vnm, sif->sw_if_index);
+  vnet_hw_interface_change_mac_address (vnm, hw->hw_if_index,
+                                       sif->persistent_hw_address);
+
+  pool_put (bm->neighbors, sif);
+
+  if ((bif->mode == BOND_MODE_LACP) && bm->lacp_enable_disable)
+    (*bm->lacp_enable_disable) (vm, bif, sif, 0);
+}
+
+int
+bond_delete_if (vlib_main_t * vm, u32 sw_if_index)
+{
+  bond_main_t *bm = &bond_main;
+  vnet_main_t *vnm = vnet_get_main ();
+  bond_if_t *bif;
+  slave_if_t *sif;
+  vnet_hw_interface_t *hw;
+  u32 *sif_sw_if_index;
+
+  hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
+  if (hw == NULL || bond_dev_class.index != hw->dev_class_index)
+    return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+
+  bif = bond_get_master_by_dev_instance (hw->dev_instance);
+
+  vec_foreach (sif_sw_if_index, bif->slaves)
+  {
+    sif = bond_get_slave_by_sw_if_index (*sif_sw_if_index);
+    if (sif)
+      bond_delete_neighbor (vm, bif, sif);
+  }
+
+  /* bring down the interface */
+  vnet_hw_interface_set_flags (vnm, bif->hw_if_index, 0);
+  vnet_sw_interface_set_flags (vnm, bif->sw_if_index, 0);
+
+  ethernet_delete_interface (vnm, bif->hw_if_index);
+
+  clib_bitmap_free (bif->port_number_bitmap);
+  hash_unset (bm->bond_by_sw_if_index, bif->sw_if_index);
+  memset (bif, 0, sizeof (*bif));
+  pool_put (bm->interfaces, bif);
+
+  return 0;
+}
+
+void
+bond_create_if (vlib_main_t * vm, bond_create_if_args_t * args)
+{
+  bond_main_t *bm = &bond_main;
+  vnet_main_t *vnm = vnet_get_main ();
+  vnet_sw_interface_t *sw;
+  bond_if_t *bif;
+
+  if ((args->mode == BOND_MODE_LACP) && bm->lacp_plugin_loaded == 0)
+    {
+      args->rv = VNET_API_ERROR_FEATURE_DISABLED;
+      args->error = clib_error_return (0, "LACP plugin is not loaded");
+      return;
+    }
+  if (args->mode > BOND_MODE_LACP || args->mode < BOND_MODE_ROUND_ROBIN)
+    {
+      args->rv = VNET_API_ERROR_INVALID_ARGUMENT;
+      args->error = clib_error_return (0, "Invalid mode");
+      return;
+    }
+  if (args->lb > BOND_LB_L23)
+    {
+      args->rv = VNET_API_ERROR_INVALID_ARGUMENT;
+      args->error = clib_error_return (0, "Invalid load-balance");
+      return;
+    }
+  pool_get (bm->interfaces, bif);
+  memset (bif, 0, sizeof (*bif));
+  bif->dev_instance = bif - bm->interfaces;
+  bif->lb = args->lb;
+  bif->mode = args->mode;
+
+  // Special load-balance mode used for rr and bc
+  if (bif->mode == BOND_MODE_ROUND_ROBIN)
+    bif->lb = BOND_LB_RR;
+  else if (bif->mode == BOND_MODE_BROADCAST)
+    bif->lb = BOND_LB_BC;
+
+  bif->use_custom_mac = args->hw_addr_set;
+  if (!args->hw_addr_set)
+    {
+      f64 now = vlib_time_now (vm);
+      u32 rnd;
+      rnd = (u32) (now * 1e6);
+      rnd = random_u32 (&rnd);
+
+      memcpy (args->hw_addr + 2, &rnd, sizeof (rnd));
+      args->hw_addr[0] = 2;
+      args->hw_addr[1] = 0xfe;
+    }
+  memcpy (bif->hw_address, args->hw_addr, 6);
+  args->error = ethernet_register_interface
+    (vnm, bond_dev_class.index, bif - bm->interfaces /* device instance */ ,
+     bif->hw_address /* ethernet address */ ,
+     &bif->hw_if_index, 0 /* flag change */ );
+
+  if (args->error)
+    {
+      args->rv = VNET_API_ERROR_INVALID_REGISTRATION;
+      pool_put (bm->interfaces, bif);
+      return;
+    }
+
+  sw = vnet_get_hw_sw_interface (vnm, bif->hw_if_index);
+  bif->sw_if_index = sw->sw_if_index;
+  bif->group = bif->sw_if_index;
+
+  vnet_hw_interface_set_flags (vnm, bif->hw_if_index,
+                              VNET_HW_INTERFACE_FLAG_LINK_UP);
+
+  hash_set (bm->bond_by_sw_if_index, bif->sw_if_index, bif->dev_instance);
+
+  // for return
+  args->sw_if_index = bif->sw_if_index;
+}
+
+static clib_error_t *
+bond_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
+                       vlib_cli_command_t * cmd)
+{
+  unformat_input_t _line_input, *line_input = &_line_input;
+  bond_create_if_args_t args = { 0 };
+  u8 mode_is_set = 0;
+
+  /* Get a line of input. */
+  if (!unformat_user (input, unformat_line_input, line_input))
+    return clib_error_return (0, "Missing required arguments.");
+
+  args.mode = -1;
+  args.lb = BOND_LB_L2;
+  while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (line_input, "mode %U", unformat_bond_mode, &args.mode))
+       mode_is_set = 1;
+      else if (((args.mode == BOND_MODE_LACP) || (args.mode == BOND_MODE_XOR))
+              && unformat (line_input, "load-balance %U",
+                           unformat_bond_load_balance, &args.lb))
+       ;
+      else if (unformat (line_input, "hw-addr %U",
+                        unformat_ethernet_address, args.hw_addr))
+       args.hw_addr_set = 1;
+      else
+       return clib_error_return (0, "unknown input `%U'",
+                                 format_unformat_error, input);
+    }
+  unformat_free (line_input);
+
+  if (mode_is_set == 0)
+    return clib_error_return (0, "Missing bond mode");
+
+  bond_create_if (vm, &args);
+
+  return args.error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (bond_create_command, static) = {
+  .path = "create bond",
+  .short_help = "create bond mode {round-robin | active-backup | broadcast | "
+    "{lacp | xor} [load-balance { l2 | l23 | l34 }]} [hw-addr <mac-address>]",
+  .function = bond_create_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+bond_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
+                       vlib_cli_command_t * cmd)
+{
+  unformat_input_t _line_input, *line_input = &_line_input;
+  u32 sw_if_index = ~0;
+  vnet_main_t *vnm = vnet_get_main ();
+  int rv;
+
+  /* Get a line of input. */
+  if (!unformat_user (input, unformat_line_input, line_input))
+    return clib_error_return (0, "Missing <interface>");
+
+  while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (line_input, "sw_if_index %d", &sw_if_index))
+       ;
+      else if (unformat (line_input, "%U", unformat_vnet_sw_interface,
+                        vnm, &sw_if_index))
+       ;
+      else
+       return clib_error_return (0, "unknown input `%U'",
+                                 format_unformat_error, input);
+    }
+  unformat_free (line_input);
+
+  if (sw_if_index == ~0)
+    return clib_error_return (0,
+                             "please specify interface name or sw_if_index");
+
+  rv = bond_delete_if (vm, sw_if_index);
+  if (rv == VNET_API_ERROR_INVALID_SW_IF_INDEX)
+    return clib_error_return (0, "not a bond interface");
+  else if (rv != 0)
+    return clib_error_return (0, "error on deleting bond interface");
+
+  return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (bond_delete__command, static) =
+{
+  .path = "delete bond",
+  .short_help = "delete bond {<interface> | sw_if_index <sw_idx>}",
+  .function = bond_delete_command_fn,
+};
+/* *INDENT-ON* */
+
+void
+bond_enslave (vlib_main_t * vm, bond_enslave_args_t * args)
+{
+  bond_main_t *bm = &bond_main;
+  vnet_main_t *vnm = vnet_get_main ();
+  bond_if_t *bif;
+  slave_if_t *sif;
+  vnet_interface_main_t *im = &vnm->interface_main;
+  vnet_hw_interface_t *hw, *hw2;
+  vnet_sw_interface_t *sw;
+
+  bif = bond_get_master_by_sw_if_index (args->group);
+  if (!bif)
+    {
+      args->rv = VNET_API_ERROR_INVALID_INTERFACE;
+      args->error = clib_error_return (0, "bond interface not found");
+      return;
+    }
+  // make sure the interface is not already enslaved
+  if (bond_get_slave_by_sw_if_index (args->slave))
+    {
+      args->rv = VNET_API_ERROR_VALUE_EXIST;
+      args->error = clib_error_return (0, "interface was already enslaved");
+      return;
+    }
+  hw = vnet_get_sup_hw_interface (vnm, args->slave);
+  if (hw->dev_class_index == bond_dev_class.index)
+    {
+      args->rv = VNET_API_ERROR_INVALID_INTERFACE;
+      args->error =
+       clib_error_return (0, "bond interface cannot be enslaved");
+      return;
+    }
+  pool_get (bm->neighbors, sif);
+  memset (sif, 0, sizeof (*sif));
+  clib_spinlock_init (&sif->lockp);
+  sw = pool_elt_at_index (im->sw_interfaces, args->slave);
+  sif->port_enabled = sw->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP;
+  sif->sw_if_index = sw->sw_if_index;
+  sif->hw_if_index = sw->hw_if_index;
+  sif->packet_template_index = (u8) ~ 0;
+  sif->is_passive = args->is_passive;
+  sif->group = args->group;
+  sif->bif_dev_instance = bif->dev_instance;
+  sif->mode = bif->mode;
+
+  sif->is_long_timeout = args->is_long_timeout;
+  if (args->is_long_timeout)
+    sif->ttl_in_seconds = LACP_LONG_TIMOUT_TIME;
+  else
+    sif->ttl_in_seconds = LACP_SHORT_TIMOUT_TIME;
+
+  hash_set (bm->neighbor_by_sw_if_index, sif->sw_if_index,
+           sif - bm->neighbors);
+  vec_add1 (bif->slaves, sif->sw_if_index);
+
+  hw = vnet_get_sup_hw_interface (vnm, sif->sw_if_index);
+  /* Save the old mac */
+  memcpy (sif->persistent_hw_address, hw->hw_address, 6);
+  if (bif->use_custom_mac)
+    {
+      vnet_hw_interface_change_mac_address (vnm, hw->hw_if_index,
+                                           bif->hw_address);
+    }
+  else
+    {
+      // bond interface gets the mac address from the first slave
+      if (vec_len (bif->slaves) == 1)
+       {
+         memcpy (bif->hw_address, hw->hw_address, 6);
+         hw2 = vnet_get_sup_hw_interface (vnm, bif->sw_if_index);
+         vnet_hw_interface_change_mac_address (vnm, hw2->hw_if_index,
+                                               hw->hw_address);
+       }
+      else
+       {
+         // subsequent slaves gets the mac address of the bond interface
+         vnet_hw_interface_change_mac_address (vnm, hw->hw_if_index,
+                                               bif->hw_address);
+       }
+    }
+
+  if ((bif->mode == BOND_MODE_LACP) && bm->lacp_enable_disable)
+    {
+      (*bm->lacp_enable_disable) (vm, bif, sif, 1);
+    }
+  else
+    {
+      bond_enable_collecting_distributing (vm, sif);
+    }
+
+  args->rv = vnet_feature_enable_disable ("device-input", "bond-input",
+                                         hw->hw_if_index, 1, 0, 0);
+
+  if (args->rv)
+    {
+      args->error =
+       clib_error_return (0,
+                          "Error encountered on input feature arc enable");
+    }
+}
+
+static clib_error_t *
+enslave_interface_command_fn (vlib_main_t * vm, unformat_input_t * input,
+                             vlib_cli_command_t * cmd)
+{
+  bond_enslave_args_t args = { 0 };
+  unformat_input_t _line_input, *line_input = &_line_input;
+  vnet_main_t *vnm = vnet_get_main ();
+
+  /* Get a line of input. */
+  if (!unformat_user (input, unformat_line_input, line_input))
+    return clib_error_return (0, "Missing required arguments.");
+
+  args.slave = ~0;
+  args.group = ~0;
+  while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (line_input, "interface %U",
+                   unformat_vnet_sw_interface, vnm, &args.slave))
+       ;
+      else if (unformat (line_input, "to %U", unformat_vnet_sw_interface, vnm,
+                        &args.group))
+       ;
+      else if (unformat (line_input, "passive"))
+       args.is_passive = 1;
+      else if (unformat (line_input, "long-timeout"))
+       args.is_long_timeout = 1;
+      else
+       {
+         args.error = clib_error_return (0, "unknown input `%U'",
+                                         format_unformat_error, input);
+         break;
+       }
+    }
+  unformat_free (line_input);
+
+  if (args.error)
+    return args.error;
+  if (args.group == ~0)
+    return clib_error_return (0, "Missing bond interface");
+  if (args.slave == ~0)
+    return clib_error_return (0, "please specify valid interface name");
+
+  bond_enslave (vm, &args);
+
+  return args.error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (enslave_interface_command, static) = {
+  .path = "enslave",
+  .short_help = "enslave interface <interface> to <BondEthernetx> [passive] [long-timeout]",
+  .function = enslave_interface_command_fn,
+};
+/* *INDENT-ON* */
+
+void
+bond_detach_slave (vlib_main_t * vm, bond_detach_slave_args_t * args)
+{
+  bond_if_t *bif;
+  slave_if_t *sif;
+
+  sif = bond_get_slave_by_sw_if_index (args->slave);
+  if (!sif)
+    {
+      args->rv = VNET_API_ERROR_INVALID_INTERFACE;
+      args->error = clib_error_return (0, "interface was not enslaved");
+      return;
+    }
+  bif = bond_get_master_by_dev_instance (sif->bif_dev_instance);
+  bond_delete_neighbor (vm, bif, sif);
+}
+
+static clib_error_t *
+detach_interface_command_fn (vlib_main_t * vm, unformat_input_t * input,
+                            vlib_cli_command_t * cmd)
+{
+  bond_detach_slave_args_t args = { 0 };
+  unformat_input_t _line_input, *line_input = &_line_input;
+  vnet_main_t *vnm = vnet_get_main ();
+
+  /* Get a line of input. */
+  if (!unformat_user (input, unformat_line_input, line_input))
+    return clib_error_return (0, "Missing required arguments.");
+
+  args.slave = ~0;
+  while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (line_input, "interface %U",
+                   unformat_vnet_sw_interface, vnm, &args.slave))
+       ;
+      else
+       {
+         args.error = clib_error_return (0, "unknown input `%U'",
+                                         format_unformat_error, input);
+         break;
+       }
+    }
+  unformat_free (line_input);
+
+  if (args.error)
+    return args.error;
+  if (args.slave == ~0)
+    return clib_error_return (0, "please specify valid interface name");
+
+  bond_detach_slave (vm, &args);
+
+  return args.error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (detach_interface_command, static) = {
+  .path = "detach",
+  .short_help = "detach interface <interface>",
+  .function = detach_interface_command_fn,
+};
+/* *INDENT-ON* */
+
+static void
+show_bond (vlib_main_t * vm)
+{
+  bond_main_t *bm = &bond_main;
+  bond_if_t *bif;
+
+  vlib_cli_output (vm, "%-16s %-12s %-12s %-13s %-14s %s",
+                  "interface name", "sw_if_index", "mode",
+                  "load balance", "active slaves", "slaves");
+
+  /* *INDENT-OFF* */
+  pool_foreach (bif, bm->interfaces,
+  ({
+    vlib_cli_output (vm, "%-16U %-12d %-12U %-13U %-14u %u",
+                    format_bond_interface_name, bif->dev_instance,
+                    bif->sw_if_index, format_bond_mode, bif->mode,
+                    format_bond_load_balance, bif->lb,
+                    vec_len (bif->active_slaves), vec_len (bif->slaves));
+  }));
+  /* *INDENT-ON* */
+}
+
+static void
+show_bond_details (vlib_main_t * vm)
+{
+  bond_main_t *bm = &bond_main;
+  bond_if_t *bif;
+  u32 *sw_if_index;
+
+  /* *INDENT-OFF* */
+  pool_foreach (bif, bm->interfaces,
+  ({
+    vlib_cli_output (vm, "%U", format_bond_interface_name, bif->dev_instance);
+    vlib_cli_output (vm, "  mode: %U",
+                    format_bond_mode, bif->mode);
+    vlib_cli_output (vm, "  load balance: %U",
+                    format_bond_load_balance, bif->lb);
+    if (bif->mode == BOND_MODE_ROUND_ROBIN)
+      vlib_cli_output (vm, "  last xmit slave index: %u",
+                      bif->lb_rr_last_index);
+    vlib_cli_output (vm, "  number of active slaves: %d",
+                    vec_len (bif->active_slaves));
+    vec_foreach (sw_if_index, bif->active_slaves)
+      {
+        vlib_cli_output (vm, "    %U", format_vnet_sw_if_index_name,
+                        vnet_get_main (), *sw_if_index);
+      }
+    vlib_cli_output (vm, "  number of slaves: %d", vec_len (bif->slaves));
+    vec_foreach (sw_if_index, bif->slaves)
+      {
+        vlib_cli_output (vm, "    %U", format_vnet_sw_if_index_name,
+                        vnet_get_main (), *sw_if_index);
+      }
+    vlib_cli_output (vm, "  device instance: %d", bif->dev_instance);
+    vlib_cli_output (vm, "  sw_if_index: %d", bif->sw_if_index);
+    vlib_cli_output (vm, "  hw_if_index: %d", bif->hw_if_index);
+  }));
+  /* *INDENT-ON* */
+}
+
+static clib_error_t *
+show_bond_fn (vlib_main_t * vm, unformat_input_t * input,
+             vlib_cli_command_t * cmd)
+{
+  u8 details = 0;
+
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (input, "details"))
+       details = 1;
+      else
+       {
+         return clib_error_return (0, "unknown input `%U'",
+                                   format_unformat_error, input);
+       }
+    }
+
+  if (details)
+    show_bond_details (vm);
+  else
+    show_bond (vm);
+
+  return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_bond_command, static) = {
+  .path = "show bond",
+  .short_help = "show bond [details]",
+  .function = show_bond_fn,
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+bond_cli_init (vlib_main_t * vm)
+{
+  bond_main_t *bm = &bond_main;
+
+  bm->vlib_main = vm;
+  bm->vnet_main = vnet_get_main ();
+  bm->neighbor_by_sw_if_index = hash_create (0, sizeof (uword));
+
+  return 0;
+}
+
+VLIB_INIT_FUNCTION (bond_cli_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/bonding/device.c b/src/vnet/bonding/device.c
new file mode 100644 (file)
index 0000000..8f9b3a9
--- /dev/null
@@ -0,0 +1,610 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#define _GNU_SOURCE
+#include <stdint.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/ip/ip6_hop_by_hop_packet.h>
+#include <vnet/bonding/node.h>
+
+#define foreach_bond_tx_error     \
+  _(NONE, "no error")             \
+  _(IF_DOWN, "interface down")    \
+  _(NO_SLAVE, "no slave")
+
+typedef enum
+{
+#define _(f,s) BOND_TX_ERROR_##f,
+  foreach_bond_tx_error
+#undef _
+    BOND_TX_N_ERROR,
+} bond_tx_error_t;
+
+static char *bond_tx_error_strings[] = {
+#define _(n,s) s,
+  foreach_bond_tx_error
+#undef _
+};
+
+static u8 *
+format_bond_tx_trace (u8 * s, va_list * args)
+{
+  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+  bond_packet_trace_t *t = va_arg (*args, bond_packet_trace_t *);
+  vnet_hw_interface_t *hw, *hw1;
+  vnet_main_t *vnm = vnet_get_main ();
+
+  hw = vnet_get_sup_hw_interface (vnm, t->sw_if_index);
+  hw1 = vnet_get_sup_hw_interface (vnm, t->bond_sw_if_index);
+  s = format (s, "src %U, dst %U, %s -> %s",
+             format_ethernet_address, t->ethernet.src_address,
+             format_ethernet_address, t->ethernet.dst_address,
+             hw->name, hw1->name);
+
+  return s;
+}
+
+u8 *
+format_bond_interface_name (u8 * s, va_list * args)
+{
+  u32 dev_instance = va_arg (*args, u32);
+  bond_main_t *bm = &bond_main;
+  bond_if_t *bif = pool_elt_at_index (bm->interfaces, dev_instance);
+
+  s = format (s, "BondEthernet%lu", bif->dev_instance);
+
+  return s;
+}
+
+static __clib_unused clib_error_t *
+bond_subif_add_del_function (vnet_main_t * vnm, u32 hw_if_index,
+                            struct vnet_sw_interface_t *st, int is_add)
+{
+  /* Nothing for now */
+  return 0;
+}
+
+static clib_error_t *
+bond_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
+{
+  vnet_hw_interface_t *hif = vnet_get_hw_interface (vnm, hw_if_index);
+  uword is_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
+  bond_main_t *bm = &bond_main;
+  bond_if_t *bif = pool_elt_at_index (bm->interfaces, hif->dev_instance);
+
+  bif->admin_up = is_up;
+  if (is_up && vec_len (bif->active_slaves))
+    vnet_hw_interface_set_flags (vnm, bif->hw_if_index,
+                                VNET_HW_INTERFACE_FLAG_LINK_UP);
+  return 0;
+}
+
+static inline u32
+bond_load_balance_broadcast (vlib_main_t * vm, vlib_node_runtime_t * node,
+                            bond_if_t * bif, vlib_buffer_t * b0)
+{
+  vnet_main_t *vnm = vnet_get_main ();
+  vlib_buffer_t *c0;
+  int i;
+  u32 *to_next = 0;
+  u32 sw_if_index;
+  vlib_frame_t *f;
+
+
+  for (i = 1; i < vec_len (bif->active_slaves); i++)
+    {
+      sw_if_index = *vec_elt_at_index (bif->active_slaves, i);
+      f = vnet_get_frame_to_sw_interface (vnm, sw_if_index);
+      to_next = vlib_frame_vector_args (f);
+      to_next += f->n_vectors;
+      c0 = vlib_buffer_copy (vm, b0);
+      if (PREDICT_TRUE (c0 != 0))
+       {
+         vnet_buffer (c0)->sw_if_index[VLIB_TX] = sw_if_index;
+         to_next[0] = vlib_get_buffer_index (vm, c0);
+         f->n_vectors++;
+         vnet_put_frame_to_sw_interface (vnm, sw_if_index, f);
+       }
+    }
+
+  return 0;
+}
+
+static inline u32
+bond_load_balance_l2 (vlib_main_t * vm, vlib_node_runtime_t * node,
+                     bond_if_t * bif, vlib_buffer_t * b0)
+{
+  ethernet_header_t *eth = (ethernet_header_t *) vlib_buffer_get_current (b0);
+  u32 a = 0, b = 0, c = 0, t1, t2;
+  u16 t11, t22;
+
+  memcpy (&t1, eth->src_address, sizeof (t1));
+  memcpy (&t11, &eth->src_address[4], sizeof (t11));
+  a = t1 ^ t11;
+
+  memcpy (&t2, eth->dst_address, sizeof (t2));
+  memcpy (&t22, &eth->dst_address[4], sizeof (t22));
+  b = t2 ^ t22;
+
+  hash_v3_mix32 (a, b, c);
+  hash_v3_finalize32 (a, b, c);
+
+  return c % vec_len (bif->active_slaves);
+}
+
+static inline u16 *
+bond_locate_ethertype (ethernet_header_t * eth)
+{
+  u16 *ethertype_p;
+  ethernet_vlan_header_t *vlan;
+
+  if (!ethernet_frame_is_tagged (clib_net_to_host_u16 (eth->type)))
+    {
+      ethertype_p = &eth->type;
+    }
+  else
+    {
+      vlan = (void *) (eth + 1);
+      ethertype_p = &vlan->type;
+      if (*ethertype_p == ntohs (ETHERNET_TYPE_VLAN))
+       {
+         vlan++;
+         ethertype_p = &vlan->type;
+       }
+    }
+  return ethertype_p;
+}
+
+static inline u32
+bond_load_balance_l23 (vlib_main_t * vm, vlib_node_runtime_t * node,
+                      bond_if_t * bif, vlib_buffer_t * b0)
+{
+  ethernet_header_t *eth = (ethernet_header_t *) vlib_buffer_get_current (b0);
+  u8 ip_version;
+  ip4_header_t *ip4;
+  u16 ethertype, *ethertype_p;
+
+  ethertype_p = bond_locate_ethertype (eth);
+  ethertype = *ethertype_p;
+
+  if ((ethertype != htons (ETHERNET_TYPE_IP4)) &&
+      (ethertype != htons (ETHERNET_TYPE_IP6)))
+    return (bond_load_balance_l2 (vm, node, bif, b0));
+
+  ip4 = (ip4_header_t *) (ethertype_p + 1);
+  ip_version = (ip4->ip_version_and_header_length >> 4);
+
+  if (ip_version == 0x4)
+    {
+      u16 t11, t22;
+      u32 a = 0, b = 0, c = 0, t1, t2;
+
+      memcpy (&t1, eth->src_address, sizeof (t1));
+      memcpy (&t11, &eth->src_address[4], sizeof (t11));
+      a = t1 ^ t11;
+
+      memcpy (&t2, eth->dst_address, sizeof (t2));
+      memcpy (&t22, &eth->dst_address[4], sizeof (t22));
+      b = t2 ^ t22;
+
+      c = ip4->src_address.data_u32 ^ ip4->dst_address.data_u32;
+
+      hash_v3_mix32 (a, b, c);
+      hash_v3_finalize32 (a, b, c);
+
+      return c % vec_len (bif->active_slaves);
+    }
+  else if (ip_version == 0x6)
+    {
+      u64 a, b, c;
+      u64 t1 = 0, t2 = 0;
+      ip6_header_t *ip6 = (ip6_header_t *) (eth + 1);
+
+      memcpy (&t1, eth->src_address, sizeof (eth->src_address));
+      memcpy (&t2, eth->dst_address, sizeof (eth->dst_address));
+      a = t1 ^ t2;
+
+      b = (ip6->src_address.as_u64[0] ^ ip6->src_address.as_u64[1]);
+      c = (ip6->dst_address.as_u64[0] ^ ip6->dst_address.as_u64[1]);
+
+      hash_mix64 (a, b, c);
+      return c % vec_len (bif->active_slaves);
+    }
+  return (bond_load_balance_l2 (vm, node, bif, b0));
+}
+
+static inline u32
+bond_load_balance_l34 (vlib_main_t * vm, vlib_node_runtime_t * node,
+                      bond_if_t * bif, vlib_buffer_t * b0)
+{
+  ethernet_header_t *eth = (ethernet_header_t *) vlib_buffer_get_current (b0);
+  u8 ip_version;
+  uword is_tcp_udp = 0;
+  ip4_header_t *ip4;
+  u16 ethertype, *ethertype_p;
+
+  ethertype_p = bond_locate_ethertype (eth);
+  ethertype = *ethertype_p;
+
+  if ((ethertype != htons (ETHERNET_TYPE_IP4)) &&
+      (ethertype != htons (ETHERNET_TYPE_IP6)))
+    return (bond_load_balance_l2 (vm, node, bif, b0));
+
+  ip4 = (ip4_header_t *) (ethertype_p + 1);
+  ip_version = (ip4->ip_version_and_header_length >> 4);
+
+  if (ip_version == 0x4)
+    {
+      u32 a = 0, b = 0, c = 0, t1, t2;
+      tcp_header_t *tcp = (void *) (ip4 + 1);
+      is_tcp_udp = (ip4->protocol == IP_PROTOCOL_TCP) ||
+       (ip4->protocol == IP_PROTOCOL_UDP);
+
+      a = ip4->src_address.data_u32 ^ ip4->dst_address.data_u32;
+
+      t1 = is_tcp_udp ? tcp->src : 0;
+      t2 = is_tcp_udp ? tcp->dst : 0;
+      b = t1 + (t2 << 16);
+
+      hash_v3_mix32 (a, b, c);
+      hash_v3_finalize32 (a, b, c);
+
+      return c % vec_len (bif->active_slaves);
+    }
+  else if (ip_version == 0x6)
+    {
+      u64 a, b, c;
+      u64 t1, t2;
+      ip6_header_t *ip6 = (ip6_header_t *) (eth + 1);
+      tcp_header_t *tcp = (void *) (ip6 + 1);
+
+      if (PREDICT_TRUE ((ip6->protocol == IP_PROTOCOL_TCP) ||
+                       (ip6->protocol == IP_PROTOCOL_UDP)))
+       {
+         is_tcp_udp = 1;
+         tcp = (void *) (ip6 + 1);
+       }
+      else if (ip6->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
+       {
+         ip6_hop_by_hop_header_t *hbh =
+           (ip6_hop_by_hop_header_t *) (ip6 + 1);
+         if ((hbh->protocol == IP_PROTOCOL_TCP)
+             || (hbh->protocol == IP_PROTOCOL_UDP))
+           {
+             is_tcp_udp = 1;
+             tcp = (tcp_header_t *) ((u8 *) hbh + ((hbh->length + 1) << 3));
+           }
+       }
+      a = (ip6->src_address.as_u64[0] ^ ip6->src_address.as_u64[1]);
+      b = (ip6->dst_address.as_u64[0] ^ ip6->dst_address.as_u64[1]);
+
+      t1 = is_tcp_udp ? tcp->src : 0;
+      t2 = is_tcp_udp ? tcp->dst : 0;
+      c = (t2 << 16) | t1;
+      hash_mix64 (a, b, c);
+
+      return c % vec_len (bif->active_slaves);
+    }
+
+  return (bond_load_balance_l2 (vm, node, bif, b0));
+}
+
+static inline u32
+bond_load_balance_round_robin (vlib_main_t * vm,
+                              vlib_node_runtime_t * node,
+                              bond_if_t * bif, vlib_buffer_t * b0)
+{
+  bif->lb_rr_last_index++;
+  bif->lb_rr_last_index %= vec_len (bif->active_slaves);
+
+  return bif->lb_rr_last_index;
+}
+
+static inline u32
+bond_load_balance_active_backup (vlib_main_t * vm,
+                                vlib_node_runtime_t * node,
+                                bond_if_t * bif, vlib_buffer_t * b0)
+{
+  /* First interface is the active, the rest is backup */
+  return 0;
+}
+
+static bond_load_balance_func_t bond_load_balance_table[] = {
+#define _(v,f,s, p) { bond_load_balance_##p },
+  foreach_bond_lb_algo
+#undef _
+};
+
+static uword
+bond_tx_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
+           vlib_frame_t * frame)
+{
+  vnet_interface_output_runtime_t *rund = (void *) node->runtime_data;
+  bond_main_t *bm = &bond_main;
+  bond_if_t *bif = pool_elt_at_index (bm->interfaces, rund->dev_instance);
+  u32 bi0, bi1, bi2, bi3;
+  vlib_buffer_t *b0, *b1, *b2, *b3;
+  u32 *from = vlib_frame_vector_args (frame);
+  u32 n_left_from;
+  ethernet_header_t *eth;
+  u32 next0 = 0, next1 = 0, next2 = 0, next3 = 0;
+  u32 port, port1, port2, port3;
+  u32 sw_if_index, sw_if_index1, sw_if_index2, sw_if_index3;
+  bond_packet_trace_t *t0;
+  uword n_trace = vlib_get_trace_count (vm, node);
+  u16 thread_index = vlib_get_thread_index ();
+  vnet_main_t *vnm = vnet_get_main ();
+  u32 *to_next, *to_next1, *to_next2, *to_next3;
+  u32 sif_if_index, sif_if_index1, sif_if_index2, sif_if_index3;
+  vlib_frame_t *f, *f1, *f2, *f3;
+
+  if (PREDICT_FALSE (bif->admin_up == 0))
+    {
+      vlib_buffer_free (vm, vlib_frame_args (frame), frame->n_vectors);
+      vlib_increment_simple_counter (vnet_main.interface_main.sw_if_counters +
+                                    VNET_INTERFACE_COUNTER_DROP,
+                                    thread_index, bif->sw_if_index,
+                                    frame->n_vectors);
+      vlib_error_count (vm, node->node_index, BOND_TX_ERROR_IF_DOWN,
+                       frame->n_vectors);
+      return frame->n_vectors;
+    }
+
+  if (PREDICT_FALSE (vec_len (bif->active_slaves) == 0))
+    {
+      bi0 = from[0];
+      b0 = vlib_get_buffer (vm, bi0);
+      vlib_increment_combined_counter
+       (vnet_main.interface_main.combined_sw_if_counters
+        + VNET_INTERFACE_COUNTER_TX, thread_index, bif->sw_if_index,
+        frame->n_vectors, b0->current_length);
+
+      vlib_buffer_free (vm, vlib_frame_args (frame), frame->n_vectors);
+      vlib_increment_simple_counter (vnet_main.interface_main.sw_if_counters +
+                                    VNET_INTERFACE_COUNTER_DROP,
+                                    thread_index, bif->sw_if_index,
+                                    frame->n_vectors);
+      vlib_error_count (vm, node->node_index, BOND_TX_ERROR_NO_SLAVE,
+                       frame->n_vectors);
+      return frame->n_vectors;
+    }
+
+  /* Number of buffers / pkts */
+  n_left_from = frame->n_vectors;
+
+  while (n_left_from >= 8)
+    {
+      // Prefetch next iteration
+      {
+       vlib_buffer_t *p4, *p5, *p6, *p7;
+
+       p4 = vlib_get_buffer (vm, from[4]);
+       p5 = vlib_get_buffer (vm, from[5]);
+       p6 = vlib_get_buffer (vm, from[6]);
+       p7 = vlib_get_buffer (vm, from[7]);
+
+       vlib_prefetch_buffer_header (p4, STORE);
+       vlib_prefetch_buffer_header (p5, STORE);
+       vlib_prefetch_buffer_header (p6, STORE);
+       vlib_prefetch_buffer_header (p7, STORE);
+
+       CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, LOAD);
+       CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, LOAD);
+       CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, LOAD);
+       CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, LOAD);
+      }
+
+      bi0 = from[0];
+      bi1 = from[1];
+      bi2 = from[2];
+      bi3 = from[3];
+
+      b0 = vlib_get_buffer (vm, bi0);
+      b1 = vlib_get_buffer (vm, bi1);
+      b2 = vlib_get_buffer (vm, bi2);
+      b3 = vlib_get_buffer (vm, bi3);
+
+      VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
+      VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b1);
+      VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b2);
+      VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b3);
+
+      sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+      sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_TX];
+      sw_if_index2 = vnet_buffer (b2)->sw_if_index[VLIB_TX];
+      sw_if_index3 = vnet_buffer (b3)->sw_if_index[VLIB_TX];
+
+      port =
+       (bond_load_balance_table[bif->lb]).load_balance (vm, node, bif, b0);
+      port1 =
+       (bond_load_balance_table[bif->lb]).load_balance (vm, node, bif, b1);
+      port2 =
+       (bond_load_balance_table[bif->lb]).load_balance (vm, node, bif, b2);
+      port3 =
+       (bond_load_balance_table[bif->lb]).load_balance (vm, node, bif, b3);
+
+      sif_if_index = *vec_elt_at_index (bif->active_slaves, port);
+      sif_if_index1 = *vec_elt_at_index (bif->active_slaves, port1);
+      sif_if_index2 = *vec_elt_at_index (bif->active_slaves, port2);
+      sif_if_index3 = *vec_elt_at_index (bif->active_slaves, port3);
+
+      vnet_buffer (b0)->sw_if_index[VLIB_TX] = sif_if_index;
+      vnet_buffer (b1)->sw_if_index[VLIB_TX] = sif_if_index1;
+      vnet_buffer (b2)->sw_if_index[VLIB_TX] = sif_if_index2;
+      vnet_buffer (b3)->sw_if_index[VLIB_TX] = sif_if_index3;
+
+      f = vnet_get_frame_to_sw_interface (vnm, sif_if_index);
+      f1 = vnet_get_frame_to_sw_interface (vnm, sif_if_index1);
+      f2 = vnet_get_frame_to_sw_interface (vnm, sif_if_index2);
+      f3 = vnet_get_frame_to_sw_interface (vnm, sif_if_index3);
+
+      to_next = vlib_frame_vector_args (f);
+      to_next1 = vlib_frame_vector_args (f1);
+      to_next2 = vlib_frame_vector_args (f2);
+      to_next3 = vlib_frame_vector_args (f3);
+
+      to_next += f->n_vectors;
+      to_next1 += f1->n_vectors;
+      to_next2 += f2->n_vectors;
+      to_next3 += f3->n_vectors;
+
+      to_next[0] = vlib_get_buffer_index (vm, b0);
+      to_next1[0] = vlib_get_buffer_index (vm, b1);
+      to_next2[0] = vlib_get_buffer_index (vm, b2);
+      to_next3[0] = vlib_get_buffer_index (vm, b3);
+
+      f->n_vectors++;
+      f1->n_vectors++;
+      f2->n_vectors++;
+      f3->n_vectors++;
+
+      vnet_put_frame_to_sw_interface (vnm, sif_if_index, f);
+      vnet_put_frame_to_sw_interface (vnm, sif_if_index1, f1);
+      vnet_put_frame_to_sw_interface (vnm, sif_if_index2, f2);
+      vnet_put_frame_to_sw_interface (vnm, sif_if_index3, f3);
+
+      if (PREDICT_FALSE (n_trace > 0))
+       {
+         vlib_trace_buffer (vm, node, next0, b0, 0 /* follow_chain */ );
+         vlib_set_trace_count (vm, node, --n_trace);
+         t0 = vlib_add_trace (vm, node, b0, sizeof (*t0));
+         eth = (ethernet_header_t *) vlib_buffer_get_current (b0);
+         t0->ethernet = *eth;
+         t0->sw_if_index = sw_if_index;
+         t0->bond_sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+
+         if (PREDICT_TRUE (n_trace > 0))
+           {
+             vlib_trace_buffer (vm, node, next1, b1, 0 /* follow_chain */ );
+             vlib_set_trace_count (vm, node, --n_trace);
+             t0 = vlib_add_trace (vm, node, b1, sizeof (*t0));
+             eth = (ethernet_header_t *) vlib_buffer_get_current (b1);
+             t0->ethernet = *eth;
+             t0->sw_if_index = sw_if_index1;
+             t0->bond_sw_if_index = vnet_buffer (b1)->sw_if_index[VLIB_TX];
+
+             if (PREDICT_TRUE (n_trace > 0))
+               {
+                 vlib_trace_buffer (vm, node, next2, b2,
+                                    0 /* follow_chain */ );
+                 vlib_set_trace_count (vm, node, --n_trace);
+                 t0 = vlib_add_trace (vm, node, b2, sizeof (*t0));
+                 eth = (ethernet_header_t *) vlib_buffer_get_current (b2);
+                 t0->ethernet = *eth;
+                 t0->sw_if_index = sw_if_index2;
+                 t0->bond_sw_if_index =
+                   vnet_buffer (b2)->sw_if_index[VLIB_TX];
+
+                 if (PREDICT_TRUE (n_trace > 0))
+                   {
+                     vlib_trace_buffer (vm, node, next3, b3,
+                                        0 /* follow_chain */ );
+                     vlib_set_trace_count (vm, node, --n_trace);
+                     t0 = vlib_add_trace (vm, node, b3, sizeof (*t0));
+                     eth =
+                       (ethernet_header_t *) vlib_buffer_get_current (b3);
+                     t0->ethernet = *eth;
+                     t0->sw_if_index = sw_if_index3;
+                     t0->bond_sw_if_index =
+                       vnet_buffer (b3)->sw_if_index[VLIB_TX];
+                   }
+               }
+           }
+       }
+
+      from += 4;
+      n_left_from -= 4;
+    }
+
+  while (n_left_from > 0)
+    {
+      // Prefetch next iteration
+      if (n_left_from > 1)
+       {
+         vlib_buffer_t *p2;
+
+         p2 = vlib_get_buffer (vm, from[1]);
+         vlib_prefetch_buffer_header (p2, STORE);
+         CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, LOAD);
+       }
+
+      bi0 = from[0];
+      b0 = vlib_get_buffer (vm, bi0);
+
+      VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
+
+      sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+
+      port =
+       (bond_load_balance_table[bif->lb]).load_balance (vm, node, bif, b0);
+      sif_if_index = *vec_elt_at_index (bif->active_slaves, port);
+      vnet_buffer (b0)->sw_if_index[VLIB_TX] = sif_if_index;
+      f = vnet_get_frame_to_sw_interface (vnm, sif_if_index);
+      to_next = vlib_frame_vector_args (f);
+      to_next += f->n_vectors;
+
+      to_next[0] = vlib_get_buffer_index (vm, b0);
+      f->n_vectors++;
+      vnet_put_frame_to_sw_interface (vnm, sif_if_index, f);
+
+      if (PREDICT_FALSE (n_trace > 0))
+       {
+         vlib_trace_buffer (vm, node, next0, b0, 0 /* follow_chain */ );
+         vlib_set_trace_count (vm, node, --n_trace);
+         t0 = vlib_add_trace (vm, node, b0, sizeof (*t0));
+         eth = (ethernet_header_t *) vlib_buffer_get_current (b0);
+         t0->ethernet = *eth;
+         t0->sw_if_index = sw_if_index;
+         t0->bond_sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+       }
+
+      from += 1;
+      n_left_from -= 1;
+    }
+
+  vlib_increment_simple_counter (vnet_main.interface_main.sw_if_counters
+                                + VNET_INTERFACE_COUNTER_TX, thread_index,
+                                bif->sw_if_index, frame->n_vectors);
+
+  return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VNET_DEVICE_CLASS (bond_dev_class) = {
+  .name = "bond",
+  .tx_function = bond_tx_fn,
+  .tx_function_n_errors = BOND_TX_N_ERROR,
+  .tx_function_error_strings = bond_tx_error_strings,
+  .format_device_name = format_bond_interface_name,
+  .admin_up_down_function = bond_interface_admin_up_down,
+  .subif_add_del_function = bond_subif_add_del_function,
+  .format_tx_trace = format_bond_tx_trace,
+};
+
+VLIB_DEVICE_TX_FUNCTION_MULTIARCH (bond_dev_class, bond_tx_fn)
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/bonding/node.c b/src/vnet/bonding/node.c
new file mode 100644 (file)
index 0000000..4deec82
--- /dev/null
@@ -0,0 +1,509 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#define _GNU_SOURCE
+#include <stdint.h>
+#include <vnet/llc/llc.h>
+#include <vnet/snap/snap.h>
+#include <vnet/bonding/node.h>
+
+bond_main_t bond_main;
+
+#define foreach_bond_input_error \
+  _(NONE, "no error")            \
+  _(IF_DOWN, "interface down")   \
+  _(NO_SLAVE, "no slave")        \
+  _(NO_BOND, "no bond interface")\
+  _(PASS_THRU, "pass through")
+
+typedef enum
+{
+#define _(f,s) BOND_INPUT_ERROR_##f,
+  foreach_bond_input_error
+#undef _
+    BOND_INPUT_N_ERROR,
+} bond_input_error_t;
+
+static char *bond_input_error_strings[] = {
+#define _(n,s) s,
+  foreach_bond_input_error
+#undef _
+};
+
+static u8 *
+format_bond_input_trace (u8 * s, va_list * args)
+{
+  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+  bond_packet_trace_t *t = va_arg (*args, bond_packet_trace_t *);
+  vnet_hw_interface_t *hw, *hw1;
+  vnet_main_t *vnm = vnet_get_main ();
+
+  hw = vnet_get_sup_hw_interface (vnm, t->sw_if_index);
+  hw1 = vnet_get_sup_hw_interface (vnm, t->bond_sw_if_index);
+  s = format (s, "src %U, dst %U, %s -> %s",
+             format_ethernet_address, t->ethernet.src_address,
+             format_ethernet_address, t->ethernet.dst_address,
+             hw->name, hw1->name);
+
+  return s;
+}
+
+static_always_inline u8
+packet_is_cdp (ethernet_header_t * eth)
+{
+  llc_header_t *llc;
+  snap_header_t *snap;
+
+  llc = (llc_header_t *) (eth + 1);
+  snap = (snap_header_t *) (llc + 1);
+
+  return ((eth->type == htons (ETHERNET_TYPE_CDP)) ||
+         ((llc->src_sap == 0xAA) && (llc->control == 0x03) &&
+          (snap->protocol == htons (0x2000)) &&
+          (snap->oui[0] == 0) && (snap->oui[1] == 0) &&
+          (snap->oui[2] == 0x0C)));
+}
+
+static inline void
+bond_sw_if_index_rewrite (vlib_main_t * vm, vlib_node_runtime_t * node,
+                         slave_if_t * sif, ethernet_header_t * eth,
+                         vlib_buffer_t * b0)
+{
+  bond_if_t *bif;
+  u16 thread_index = vlib_get_thread_index ();
+  u16 *ethertype_p, ethertype;
+  ethernet_vlan_header_t *vlan;
+
+  if (PREDICT_TRUE (sif != 0))
+    {
+      bif = bond_get_master_by_sw_if_index (sif->group);
+      if (PREDICT_TRUE (bif != 0))
+       {
+         if (PREDICT_TRUE (vec_len (bif->slaves) >= 1))
+           {
+             if (PREDICT_TRUE (bif->admin_up == 1))
+               {
+                 if (!ethernet_frame_is_tagged (ntohs (eth->type)))
+                   {
+                     // Let some layer2 packets pass through.
+                     if (PREDICT_TRUE ((eth->type !=
+                                        htons (ETHERNET_TYPE_SLOW_PROTOCOLS))
+                                       && !packet_is_cdp (eth)
+                                       && (eth->type !=
+                                           htons
+                                           (ETHERNET_TYPE_802_1_LLDP))))
+                       {
+                         // Change the physical interface to
+                         // bond interface
+                         vnet_buffer (b0)->sw_if_index[VLIB_RX] =
+                           bif->sw_if_index;
+
+                         /* increase rx counters */
+                         vlib_increment_simple_counter
+                           (vnet_main.interface_main.sw_if_counters +
+                            VNET_INTERFACE_COUNTER_RX, thread_index,
+                            bif->sw_if_index, 1);
+                       }
+                     else
+                       {
+                         vlib_error_count (vm, node->node_index,
+                                           BOND_INPUT_ERROR_PASS_THRU, 1);
+                       }
+                   }
+                 else
+                   {
+                     vlan = (void *) (eth + 1);
+                     ethertype_p = &vlan->type;
+                     if (*ethertype_p == ntohs (ETHERNET_TYPE_VLAN))
+                       {
+                         vlan++;
+                         ethertype_p = &vlan->type;
+                       }
+                     ethertype = *ethertype_p;
+                     if (PREDICT_TRUE ((ethertype !=
+                                        htons (ETHERNET_TYPE_SLOW_PROTOCOLS))
+                                       && (ethertype !=
+                                           htons (ETHERNET_TYPE_CDP))
+                                       && (ethertype !=
+                                           htons
+                                           (ETHERNET_TYPE_802_1_LLDP))))
+                       {
+                         // Change the physical interface to
+                         // bond interface
+                         vnet_buffer (b0)->sw_if_index[VLIB_RX] =
+                           bif->sw_if_index;
+
+                         /* increase rx counters */
+                         vlib_increment_simple_counter
+                           (vnet_main.interface_main.sw_if_counters +
+                            VNET_INTERFACE_COUNTER_RX, thread_index,
+                            bif->sw_if_index, 1);
+                       }
+                     else
+                       {
+                         vlib_error_count (vm, node->node_index,
+                                           BOND_INPUT_ERROR_PASS_THRU, 1);
+                       }
+                   }
+               }
+             else
+               {
+                 vlib_error_count (vm, node->node_index,
+                                   BOND_INPUT_ERROR_IF_DOWN, 1);
+               }
+           }
+         else
+           {
+             vlib_error_count (vm, node->node_index,
+                               BOND_INPUT_ERROR_NO_SLAVE, 1);
+           }
+       }
+      else
+       {
+         vlib_error_count (vm, node->node_index,
+                           BOND_INPUT_ERROR_NO_BOND, 1);
+       }
+    }
+  else
+    {
+      vlib_error_count (vm, node->node_index, BOND_INPUT_ERROR_NO_SLAVE, 1);
+    }
+
+}
+
+static uword
+bond_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
+              vlib_frame_t * frame)
+{
+  u32 bi0, bi1, bi2, bi3;
+  vlib_buffer_t *b0, *b1, *b2, *b3;
+  u32 next_index;
+  u32 *from, *to_next, n_left_from, n_left_to_next;
+  ethernet_header_t *eth, *eth1, *eth2, *eth3;
+  u32 next0, next1, next2, next3;
+  bond_packet_trace_t *t0;
+  uword n_trace = vlib_get_trace_count (vm, node);
+  u32 sw_if_index, sw_if_index1, sw_if_index2, sw_if_index3;
+  slave_if_t *sif, *sif1, *sif2, *sif3;
+  u16 thread_index = vlib_get_thread_index ();
+
+  /* Vector of buffer / pkt indices we're supposed to process */
+  from = vlib_frame_vector_args (frame);
+
+  /* Number of buffers / pkts */
+  n_left_from = frame->n_vectors;
+
+  /* Speculatively send the first buffer to the last disposition we used */
+  next_index = node->cached_next_index;
+
+  while (n_left_from > 0)
+    {
+      /* set up to enqueue to our disposition with index = next_index */
+      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+      while (n_left_from >= 12 && n_left_to_next >= 4)
+       {
+         // Prefetch next iteration
+         {
+           vlib_buffer_t *b4, *b5, *b6, *b7;
+
+           b4 = vlib_get_buffer (vm, from[4]);
+           b5 = vlib_get_buffer (vm, from[5]);
+           b6 = vlib_get_buffer (vm, from[6]);
+           b7 = vlib_get_buffer (vm, from[7]);
+
+           vlib_prefetch_buffer_header (b4, STORE);
+           vlib_prefetch_buffer_header (b5, STORE);
+           vlib_prefetch_buffer_header (b6, STORE);
+           vlib_prefetch_buffer_header (b7, STORE);
+
+           CLIB_PREFETCH (b4->data, CLIB_CACHE_LINE_BYTES, LOAD);
+           CLIB_PREFETCH (b5->data, CLIB_CACHE_LINE_BYTES, LOAD);
+           CLIB_PREFETCH (b6->data, CLIB_CACHE_LINE_BYTES, LOAD);
+           CLIB_PREFETCH (b7->data, CLIB_CACHE_LINE_BYTES, LOAD);
+         }
+
+         next0 = 0;
+         next1 = 0;
+         next2 = 0;
+         next3 = 0;
+
+         bi0 = from[0];
+         bi1 = from[1];
+         bi2 = from[2];
+         bi3 = from[3];
+
+         to_next[0] = bi0;
+         to_next[1] = bi1;
+         to_next[2] = bi2;
+         to_next[3] = bi3;
+
+         from += 4;
+         to_next += 4;
+         n_left_from -= 4;
+         n_left_to_next -= 4;
+
+         b0 = vlib_get_buffer (vm, bi0);
+         b1 = vlib_get_buffer (vm, bi1);
+         b2 = vlib_get_buffer (vm, bi2);
+         b3 = vlib_get_buffer (vm, bi3);
+
+         vnet_feature_next (vnet_buffer (b0)->sw_if_index[VLIB_RX], &next0,
+                            b0);
+         vnet_feature_next (vnet_buffer (b1)->sw_if_index[VLIB_RX], &next1,
+                            b1);
+         vnet_feature_next (vnet_buffer (b2)->sw_if_index[VLIB_RX], &next2,
+                            b2);
+         vnet_feature_next (vnet_buffer (b3)->sw_if_index[VLIB_RX], &next3,
+                            b3);
+
+         eth = (ethernet_header_t *) vlib_buffer_get_current (b0);
+         eth1 = (ethernet_header_t *) vlib_buffer_get_current (b1);
+         eth2 = (ethernet_header_t *) vlib_buffer_get_current (b2);
+         eth3 = (ethernet_header_t *) vlib_buffer_get_current (b3);
+
+         sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+         sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
+         sw_if_index2 = vnet_buffer (b2)->sw_if_index[VLIB_RX];
+         sw_if_index3 = vnet_buffer (b3)->sw_if_index[VLIB_RX];
+
+         // sw_if_index points to the physical interface
+         sif = bond_get_slave_by_sw_if_index (sw_if_index);
+         sif1 = bond_get_slave_by_sw_if_index (sw_if_index1);
+         sif2 = bond_get_slave_by_sw_if_index (sw_if_index2);
+         sif3 = bond_get_slave_by_sw_if_index (sw_if_index3);
+
+         bond_sw_if_index_rewrite (vm, node, sif, eth, b0);
+         bond_sw_if_index_rewrite (vm, node, sif1, eth1, b1);
+         bond_sw_if_index_rewrite (vm, node, sif2, eth2, b2);
+         bond_sw_if_index_rewrite (vm, node, sif3, eth3, b3);
+
+         if (PREDICT_FALSE (n_trace > 0))
+           {
+             vlib_trace_buffer (vm, node, next0, b0, 0 /* follow_chain */ );
+             vlib_set_trace_count (vm, node, --n_trace);
+             t0 = vlib_add_trace (vm, node, b0, sizeof (*t0));
+             t0->ethernet = *eth;
+             t0->sw_if_index = sw_if_index;
+             t0->bond_sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+
+             if (PREDICT_TRUE (n_trace > 0))
+               {
+                 vlib_trace_buffer (vm, node, next1, b1,
+                                    0 /* follow_chain */ );
+                 vlib_set_trace_count (vm, node, --n_trace);
+                 t0 = vlib_add_trace (vm, node, b1, sizeof (*t0));
+                 t0->ethernet = *eth1;
+                 t0->sw_if_index = sw_if_index1;
+                 t0->bond_sw_if_index =
+                   vnet_buffer (b1)->sw_if_index[VLIB_RX];
+
+                 if (PREDICT_TRUE (n_trace > 0))
+                   {
+                     vlib_trace_buffer (vm, node, next1, b2,
+                                        0 /* follow_chain */ );
+                     vlib_set_trace_count (vm, node, --n_trace);
+                     t0 = vlib_add_trace (vm, node, b2, sizeof (*t0));
+                     t0->ethernet = *eth2;
+                     t0->sw_if_index = sw_if_index2;
+                     t0->bond_sw_if_index =
+                       vnet_buffer (b2)->sw_if_index[VLIB_RX];
+
+                     if (PREDICT_TRUE (n_trace > 0))
+                       {
+                         vlib_trace_buffer (vm, node, next1, b2,
+                                            0 /* follow_chain */ );
+                         vlib_set_trace_count (vm, node, --n_trace);
+                         t0 = vlib_add_trace (vm, node, b3, sizeof (*t0));
+                         t0->ethernet = *eth3;
+                         t0->sw_if_index = sw_if_index3;
+                         t0->bond_sw_if_index =
+                           vnet_buffer (b3)->sw_if_index[VLIB_RX];
+                       }
+                   }
+               }
+           }
+
+         VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
+         VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b1);
+         VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b2);
+         VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b3);
+
+         /* verify speculative enqueue, maybe switch current next frame */
+         vlib_validate_buffer_enqueue_x4 (vm, node, next_index,
+                                          to_next, n_left_to_next,
+                                          bi0, bi1, bi2, bi3, next0, next1,
+                                          next2, next3);
+       }
+
+      while (n_left_from > 0 && n_left_to_next > 0)
+       {
+         // Prefetch next iteration
+         if (n_left_from > 1)
+           {
+             vlib_buffer_t *p2;
+
+             p2 = vlib_get_buffer (vm, from[1]);
+             vlib_prefetch_buffer_header (p2, STORE);
+             CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, LOAD);
+           }
+
+         next0 = 0;
+         bi0 = from[0];
+         to_next[0] = bi0;
+         from += 1;
+         to_next += 1;
+         n_left_from -= 1;
+         n_left_to_next -= 1;
+
+         b0 = vlib_get_buffer (vm, bi0);
+         vnet_feature_next (vnet_buffer (b0)->sw_if_index[VLIB_RX], &next0,
+                            b0);
+
+         eth = (ethernet_header_t *) vlib_buffer_get_current (b0);
+
+         sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+         // sw_if_index points to the physical interface
+         sif = bond_get_slave_by_sw_if_index (sw_if_index);
+         bond_sw_if_index_rewrite (vm, node, sif, eth, b0);
+
+         VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
+
+         /* verify speculative enqueue, maybe switch current next frame */
+         vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+                                          to_next, n_left_to_next,
+                                          bi0, next0);
+       }
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+
+  vlib_node_increment_counter (vm, bond_input_node.index,
+                              BOND_INPUT_ERROR_NONE, frame->n_vectors);
+
+  vnet_device_increment_rx_packets (thread_index, frame->n_vectors);
+
+  return frame->n_vectors;
+}
+
+static clib_error_t *
+bond_input_init (vlib_main_t * vm)
+{
+  return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (bond_input_node) = {
+  .function = bond_input_fn,
+  .name = "bond-input",
+  .vector_size = sizeof (u32),
+  .format_buffer = format_ethernet_header_with_length,
+  .format_trace = format_bond_input_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+  .n_errors = BOND_INPUT_N_ERROR,
+  .error_strings = bond_input_error_strings,
+  .n_next_nodes = 0,
+  .next_nodes =
+  {
+    [0] = "error-drop"
+  }
+};
+
+VLIB_INIT_FUNCTION (bond_input_init);
+
+VNET_FEATURE_INIT (bond_input, static) =
+{
+  .arc_name = "device-input",
+  .node_name = "bond-input",
+  .runs_before = VNET_FEATURES ("ethernet-input"),
+};
+VLIB_NODE_FUNCTION_MULTIARCH (bond_input_node, bond_input_fn)
+/* *INDENT-ON* */
+
+static clib_error_t *
+bond_sw_interface_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
+{
+  bond_main_t *bm = &bond_main;
+  slave_if_t *sif;
+  vlib_main_t *vm = bm->vlib_main;
+
+  sif = bond_get_slave_by_sw_if_index (sw_if_index);
+  if (sif)
+    {
+      sif->port_enabled = flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP;
+      if (sif->port_enabled == 0)
+       {
+         if (sif->lacp_enabled == 0)
+           {
+             bond_disable_collecting_distributing (vm, sif);
+           }
+       }
+      else
+       {
+         if (sif->lacp_enabled == 0)
+           {
+             bond_enable_collecting_distributing (vm, sif);
+           }
+       }
+    }
+
+  return 0;
+}
+
+VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (bond_sw_interface_up_down);
+
+static clib_error_t *
+bond_hw_interface_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
+{
+  bond_main_t *bm = &bond_main;
+  slave_if_t *sif;
+  vnet_sw_interface_t *sw;
+  vlib_main_t *vm = bm->vlib_main;
+  vnet_interface_main_t *im = &vnm->interface_main;
+
+  sw = pool_elt_at_index (im->sw_interfaces, hw_if_index);
+  sif = bond_get_slave_by_sw_if_index (sw->sw_if_index);
+  if (sif)
+    {
+      if (!(flags & VNET_HW_INTERFACE_FLAG_LINK_UP))
+       {
+         if (sif->lacp_enabled == 0)
+           {
+             bond_disable_collecting_distributing (vm, sif);
+           }
+       }
+      else
+       {
+         if (sif->lacp_enabled == 0)
+           {
+             bond_enable_collecting_distributing (vm, sif);
+           }
+       }
+    }
+
+  return 0;
+}
+
+VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION (bond_hw_interface_up_down);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/bonding/node.h b/src/vnet/bonding/node.h
new file mode 100644 (file)
index 0000000..74f3b1a
--- /dev/null
@@ -0,0 +1,451 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_vnet_bonding_node_h__
+#define __included_vnet_bonding_node_h__
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vppinfra/format.h>
+#include <vppinfra/hash.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/interface.h>
+
+#define LACP_FAST_PERIODIC_TIMER        1.0
+#define LACP_SHORT_TIMOUT_TIME          (LACP_FAST_PERIODIC_TIMER * 3)
+#define LACP_SLOW_PERIODIC_TIMER        30.0
+#define LACP_LONG_TIMOUT_TIME           (LACP_SLOW_PERIODIC_TIMER * 3)
+
+#ifndef MIN
+#define MIN(x,y) (((x)<(y))?(x):(y))
+#endif
+
+#define foreach_bond_mode          \
+  _ (1, ROUND_ROBIN, "round-robin") \
+  _ (2, ACTIVE_BACKUP, "active-backup") \
+  _ (3, XOR, "xor") \
+  _ (4, BROADCAST, "broadcast") \
+  _ (5, LACP, "lacp")
+
+typedef enum
+{
+#define _(v, f, s) BOND_MODE_##f = v,
+  foreach_bond_mode
+#undef _
+} bond_mode_t;
+
+/* configurable load-balances */
+#define foreach_bond_lb          \
+  _ (2, L23, "l23", l23)  \
+  _ (1, l34 , "l34", l34) \
+  _ (0, L2, "l2", l2)
+
+/* load-balance functions implemented in bond-output */
+#define foreach_bond_lb_algo                    \
+  _ (0, L2, "l2", l2)                            \
+  _ (1, l34 , "l34", l34)                        \
+  _ (2, L23, "l23", l23)                         \
+  _ (3, RR, "round-robin", round_robin)          \
+  _ (4, BC, "broadcast", broadcast)              \
+  _ (5, AB, "active-backup", active_backup)
+
+typedef enum
+{
+#define _(v, f, s, p) BOND_LB_##f = v,
+  foreach_bond_lb_algo
+#undef _
+} bond_load_balance_t;
+
+typedef struct
+{
+  u8 hw_addr_set;
+  u8 hw_addr[6];
+  u8 mode;
+  u8 lb;
+  /* return */
+  u32 sw_if_index;
+  int rv;
+  clib_error_t *error;
+} bond_create_if_args_t;
+
+typedef struct
+{
+  /* slave's sw_if_index */
+  u32 slave;
+  /* bond's sw_if_index */
+  u32 group;
+  u8 is_passive;
+  u8 is_long_timeout;
+  /* return */
+  int rv;
+  clib_error_t *error;
+} bond_enslave_args_t;
+
+typedef struct
+{
+  u32 slave;
+  /* return */
+  int rv;
+  clib_error_t *error;
+} bond_detach_slave_args_t;
+
+/** BOND interface details struct */
+typedef struct
+{
+  u32 sw_if_index;
+  u8 interface_name[64];
+  u8 mode;
+  u8 lb;
+  u32 active_slaves;
+  u32 slaves;
+} bond_interface_details_t;
+
+/** slave interface details struct */
+typedef struct
+{
+  u32 sw_if_index;
+  u8 interface_name[64];
+  u8 is_passive;
+  u8 is_long_timeout;
+  u32 active_slaves;
+} slave_interface_details_t;
+
+typedef CLIB_PACKED (struct
+                    {
+                    u16 system_priority;
+                    u8 system[6];
+                    u16 key; u16 port_priority; u16 port_number;
+                    u8 state;
+                    }) lacp_port_info_t;
+
+typedef struct
+{
+  u8 admin_up;
+  u8 mode;
+  u8 lb;
+
+  /* the last slave index for the rr lb */
+  u32 lb_rr_last_index;
+
+  u32 dev_instance;
+  u32 hw_if_index;
+  u32 sw_if_index;
+
+  /* Configured slaves */
+  u32 *slaves;
+
+  /* Slaves that are in DISTRIBUTING state */
+  u32 *active_slaves;
+
+  /* rapidly find an active slave */
+  uword *active_slave_by_sw_if_index;
+
+  lacp_port_info_t partner;
+  lacp_port_info_t actor;
+  u8 individual_aggregator;
+
+  u32 group;
+  uword *port_number_bitmap;
+  u8 use_custom_mac;
+  u8 hw_address[6];
+} bond_if_t;
+
+typedef struct
+{
+  u8 persistent_hw_address[6];
+
+  /* neighbor's vlib software interface index */
+  u32 sw_if_index;
+
+  /* Neighbor time-to-live (usually 3s) */
+  f32 ttl_in_seconds;
+
+  /* 1 = interface is configured with long timeout (60s) */
+  u8 is_long_timeout;
+
+  /* 1 = debug is on; 0 = debug is off */
+  u8 debug;
+
+  /* tx packet template id for this neighbor */
+  u8 packet_template_index;
+
+  /* Info we actually keep about each neighbor */
+
+  /* Jenkins hash optimization: avoid tlv scan, send short keepalive msg */
+  u8 last_packet_signature_valid;
+  uword last_packet_signature;
+
+  /* last received lacp packet, for the J-hash optimization */
+  u8 *last_rx_pkt;
+
+  /* last marker packet */
+  u8 *last_marker_pkt;
+
+  /* neighbor vlib hw_if_index */
+  u32 hw_if_index;
+
+  /* actor does not initiate the protocol exchange */
+  u8 is_passive;
+
+  /* Partner port information */
+  lacp_port_info_t partner;
+  lacp_port_info_t partner_admin;;
+
+  /* Partner port information */
+  lacp_port_info_t actor;
+  lacp_port_info_t actor_admin;
+
+  /* Need To Transmit flag */
+  u8 ntt;
+
+  /* Link has been established and Aggregate Port is operable */
+  u8 port_enabled;
+
+  /* Initialization or reinitialization of the lacp protocol entity */
+  u8 begin;
+
+  /* Aggregation Port is operating the lacp */
+  u8 lacp_enabled;
+
+  /* MUX to indicate to the Selection Logic wait_while_timer expired */
+  u8 ready_n;
+
+  /* Selection Logic indicates al Aggregation Ports attached */
+  u8 ready;
+
+  /* Selection Logic selected an Aggregator */
+  int selected;
+
+  /* RX machine indicates an Aggregation Port in PORT_DISABLED state */
+  u8 port_moved;
+
+  /* timer used to detect whether received protocol information has expired */
+  f64 current_while_timer;
+
+  /* timer used to detect actor churn states */
+  f64 actor_churn_timer;
+
+  /* time last lacpdu was sent */
+  f64 last_lacpdu_time;
+
+  /* timer used to generate periodic transmission */
+  f64 periodic_timer;
+
+  /* timer used to detect partner churn states */
+  f64 partner_churn_timer;
+
+  /* provides hysteresis before performing an aggregation change */
+  f64 wait_while_timer;
+
+  /* Implemention variables, not in the spec */
+  int rx_state;
+  int tx_state;
+  int mux_state;
+  int ptx_state;
+
+  /* actor admin key */
+  u32 group;
+
+  u32 marker_tx_id;
+
+  u32 bif_dev_instance;
+
+  u8 loopback_port;
+
+  /* bond mode */
+  u8 mode;
+
+  clib_spinlock_t lockp;
+} slave_if_t;
+
+typedef void (*lacp_enable_disable_func) (vlib_main_t * vm, bond_if_t * bif,
+                                         slave_if_t * sif, u8 enable);
+
+typedef struct
+{
+  /* pool of bonding interfaces */
+  bond_if_t *interfaces;
+
+  /* pool of lacp neighbors */
+  slave_if_t *neighbors;
+
+  /* rapidly find a neighbor by vlib software interface index */
+  uword *neighbor_by_sw_if_index;
+
+  /* rapidly find a bond by vlib software interface index */
+  uword *bond_by_sw_if_index;
+
+  /* convenience variables */
+  vlib_main_t *vlib_main;
+  vnet_main_t *vnet_main;
+
+  /* lacp plugin is loaded */
+  u8 lacp_plugin_loaded;
+
+  lacp_enable_disable_func lacp_enable_disable;
+} bond_main_t;
+
+/* bond packet trace capture */
+typedef struct
+{
+  ethernet_header_t ethernet;
+  u32 sw_if_index;
+  u32 bond_sw_if_index;
+} bond_packet_trace_t;
+
+typedef u32 (*load_balance_func) (vlib_main_t * vm,
+                                 vlib_node_runtime_t * node, bond_if_t * bif,
+                                 vlib_buffer_t * b0);
+
+typedef struct
+{
+  load_balance_func load_balance;
+} bond_load_balance_func_t;
+
+extern vlib_node_registration_t bond_input_node;
+extern vnet_device_class_t bond_dev_class;
+extern bond_main_t bond_main;
+
+void bond_disable_collecting_distributing (vlib_main_t * vm,
+                                          slave_if_t * sif);
+void bond_enable_collecting_distributing (vlib_main_t * vm, slave_if_t * sif);
+u8 *format_bond_interface_name (u8 * s, va_list * args);
+
+void bond_create_if (vlib_main_t * vm, bond_create_if_args_t * args);
+int bond_delete_if (vlib_main_t * vm, u32 sw_if_index);
+void bond_enslave (vlib_main_t * vm, bond_enslave_args_t * args);
+void bond_detach_slave (vlib_main_t * vm, bond_detach_slave_args_t * args);
+int bond_dump_ifs (bond_interface_details_t ** out_bondids);
+int bond_dump_slave_ifs (slave_interface_details_t ** out_slaveids,
+                        u32 bond_sw_if_index);
+
+static inline uword
+unformat_bond_mode (unformat_input_t * input, va_list * args)
+{
+  u8 *r = va_arg (*args, u8 *);
+
+  if (0);
+#define _(v, f, s) else if (unformat (input, s)) *r = BOND_MODE_##f;
+  foreach_bond_mode
+#undef _
+    else
+    return 0;
+
+  return 1;
+}
+
+static inline u8 *
+format_bond_mode (u8 * s, va_list * args)
+{
+  u32 i = va_arg (*args, u32);
+  u8 *t = 0;
+
+  switch (i)
+    {
+#define _(v, f, s) case BOND_MODE_##f: t = (u8 *) s; break;
+      foreach_bond_mode
+#undef _
+    default:
+      return format (s, "unknown");
+    }
+  return format (s, "%s", t);
+}
+
+static inline uword
+unformat_bond_load_balance (unformat_input_t * input, va_list * args)
+{
+  u8 *r = va_arg (*args, u8 *);
+
+  if (0);
+#define _(v, f, s, p) else if (unformat (input, s)) *r = BOND_LB_##f;
+  foreach_bond_lb
+#undef _
+    else
+    return 0;
+
+  return 1;
+}
+
+static inline u8 *
+format_bond_load_balance (u8 * s, va_list * args)
+{
+  u32 i = va_arg (*args, u32);
+  u8 *t = 0;
+
+  switch (i)
+    {
+#define _(v, f, s, p) case BOND_LB_##f: t = (u8 *) s; break;
+      foreach_bond_lb_algo
+#undef _
+    default:
+      return format (s, "unknown");
+    }
+  return format (s, "%s", t);
+}
+
+static inline void
+bond_register_callback (lacp_enable_disable_func func)
+{
+  bond_main_t *bm = &bond_main;
+
+  bm->lacp_plugin_loaded = 1;
+  bm->lacp_enable_disable = func;
+}
+
+static inline bond_if_t *
+bond_get_master_by_sw_if_index (u32 sw_if_index)
+{
+  bond_main_t *bm = &bond_main;
+  uword *p;
+
+  p = hash_get (bm->bond_by_sw_if_index, sw_if_index);
+  if (!p)
+    {
+      return 0;
+    }
+  return pool_elt_at_index (bm->interfaces, p[0]);
+}
+
+static inline bond_if_t *
+bond_get_master_by_dev_instance (u32 dev_instance)
+{
+  bond_main_t *bm = &bond_main;
+
+  return pool_elt_at_index (bm->interfaces, dev_instance);
+}
+
+static inline slave_if_t *
+bond_get_slave_by_sw_if_index (u32 sw_if_index)
+{
+  bond_main_t *bm = &bond_main;
+  slave_if_t *sif = 0;
+  uword *p;
+
+  p = hash_get (bm->neighbor_by_sw_if_index, sw_if_index);
+  if (p)
+    {
+      sif = pool_elt_at_index (bm->neighbors, p[0]);
+    }
+  return sif;
+}
+
+#endif /* __included_vnet_bonding_node_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
index fea92e7..5c7c486 100644 (file)
@@ -29,6 +29,7 @@
 #include <vlibmemory/vl_memory_api_h.h>
 #endif /* included_from_layer_3 */
 
+#include <vnet/bonding/bond.api.h>
 #include <vnet/devices/af_packet/af_packet.api.h>
 #include <vnet/devices/netmap/netmap.api.h>
 #include <vnet/devices/virtio/vhost_user.api.h>
index 28f16d2..ded6e7d 100644 (file)
@@ -47,6 +47,8 @@
 
 #include <vpp/api/vpe_msg_enum.h>
 
+#include <vnet/bonding/node.h>
+
 #define vl_typedefs            /* define message structures */
 #include <vpp/api/vpe_all_api_h.h>
 #undef vl_typedefs
@@ -609,6 +611,84 @@ static void *vl_api_sw_interface_tap_v2_dump_t_print
   FINISH;
 }
 
+static void *vl_api_bond_create_t_print
+  (vl_api_bond_create_t * mp, void *handle)
+{
+  u8 *s;
+  u8 null_mac[6];
+
+  memset (null_mac, 0, sizeof (null_mac));
+
+  s = format (0, "SCRIPT: bond_create ");
+  if (memcmp (mp->mac_address, null_mac, 6))
+    s = format (s, "mac-address %U ",
+               format_ethernet_address, mp->mac_address);
+  if (mp->mode)
+    s = format (s, "mode %U", format_bond_mode, mp->mode);
+  if (mp->lb)
+    s = format (s, "lb %U", format_bond_load_balance, mp->lb);
+  FINISH;
+}
+
+static void *vl_api_bond_delete_t_print
+  (vl_api_bond_delete_t * mp, void *handle)
+{
+  u8 *s;
+
+  s = format (0, "SCRIPT: bond_delete ");
+  s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index));
+
+  FINISH;
+}
+
+static void *vl_api_bond_enslave_t_print
+  (vl_api_bond_enslave_t * mp, void *handle)
+{
+  u8 *s;
+
+  s = format (0, "SCRIPT: bond_enslave ");
+  s = format (s, "bond_sw_if_index %u ", mp->bond_sw_if_index);
+  s = format (s, "sw_if_index %u ", mp->sw_if_index);
+  if (mp->is_passive)
+    s = format (s, "passive ");
+  if (mp->is_long_timeout)
+    s = format (s, "long-timeout ");
+
+  FINISH;
+}
+
+static void *vl_api_bond_detach_slave_t_print
+  (vl_api_bond_detach_slave_t * mp, void *handle)
+{
+  u8 *s;
+
+  s = format (0, "SCRIPT: bond_detach_slave ");
+  s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index));
+
+  FINISH;
+}
+
+static void *vl_api_sw_interface_bond_dump_t_print
+  (vl_api_sw_interface_bond_dump_t * mp, void *handle)
+{
+  u8 *s;
+
+  s = format (0, "SCRIPT: sw_interface_bond_dump ");
+
+  FINISH;
+}
+
+static void *vl_api_sw_interface_slave_dump_t_print
+  (vl_api_sw_interface_slave_dump_t * mp, void *handle)
+{
+  u8 *s;
+
+  s = format (0, "SCRIPT: sw_interface_slave_dump ");
+  s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index));
+
+  FINISH;
+}
+
 static void *vl_api_ip_add_del_route_t_print
   (vl_api_ip_add_del_route_t * mp, void *handle)
 {
@@ -3357,6 +3437,10 @@ _(TAP_CONNECT, tap_connect)                                             \
 _(TAP_MODIFY, tap_modify)                                               \
 _(TAP_DELETE, tap_delete)                                               \
 _(SW_INTERFACE_TAP_DUMP, sw_interface_tap_dump)                         \
+_(BOND_CREATE, bond_create)                                             \
+_(BOND_DELETE, bond_delete)                                             \
+_(BOND_ENSLAVE, bond_enslave)                                           \
+_(BOND_DETACH_SLAVE, bond_detach_slave)                                 \
 _(TAP_CREATE_V2, tap_create_v2)                                         \
 _(TAP_DELETE_V2, tap_delete_v2)                                         \
 _(SW_INTERFACE_TAP_V2_DUMP, sw_interface_tap_v2_dump)                   \
diff --git a/test/test_bond.py b/test/test_bond.py
new file mode 100644 (file)
index 0000000..b54a1f1
--- /dev/null
@@ -0,0 +1,282 @@
+#!/usr/bin/env python
+
+import socket
+import unittest
+
+from framework import VppTestCase, VppTestRunner
+from scapy.packet import Raw
+from scapy.layers.l2 import Ether
+from scapy.layers.inet import IP, UDP
+from util import mactobinary
+from vpp_bond_interface import VppBondInterface
+
+
+class TestBondInterface(VppTestCase):
+    """Bond Test Case
+
+    """
+
+    @classmethod
+    def setUpClass(cls):
+        super(TestBondInterface, cls).setUpClass()
+        # Test variables
+        cls.pkts_per_burst = 257    # Number of packets per burst
+        # create 3 pg interfaces
+        cls.create_pg_interfaces(range(4))
+
+        # packet sizes
+        cls.pg_if_packet_sizes = [64, 512, 1518]  # , 9018]
+
+        # setup all interfaces
+        for i in cls.pg_interfaces:
+            i.admin_up()
+
+    def setUp(self):
+        super(TestBondInterface, self).setUp()
+
+    def tearDown(self):
+        super(TestBondInterface, self).tearDown()
+        if not self.vpp_dead:
+            self.logger.info(self.vapi.ppcli("show interface"))
+
+    def test_bond_traffic(self):
+        """ Bond traffic test """
+
+        # topology
+        #
+        # RX->              TX->
+        #
+        # pg2 ------+        +------pg0 (slave)
+        #           |        |
+        #          BondEthernet0 (10.10.10.1)
+        #           |        |
+        # pg3 ------+        +------pg1 (slave)
+        #
+
+        # create interface (BondEthernet0)
+        #        self.logger.info("create bond")
+        bond0_mac = "02:fe:38:30:59:3c"
+        mac = mactobinary(bond0_mac)
+        bond0 = VppBondInterface(self,
+                                 mode=3,
+                                 lb=1,
+                                 use_custom_mac=1,
+                                 mac_address=mac)
+        bond0.add_vpp_config()
+        bond0.admin_up()
+        bond0_addr = socket.inet_pton(socket.AF_INET, "10.10.10.1")
+        self.vapi.sw_interface_add_del_address(bond0.sw_if_index,
+                                               bond0_addr,
+                                               24)
+
+        self.pg2.config_ip4()
+        self.pg2.resolve_arp()
+        self.pg3.config_ip4()
+        self.pg3.resolve_arp()
+
+        self.logger.info(self.vapi.cli("show interface"))
+        self.logger.info(self.vapi.cli("show interface address"))
+        self.logger.info(self.vapi.cli("show ip arp"))
+
+        # enslave pg0 and pg1 to BondEthernet0
+        self.logger.info("bond enslave interface pg0 to BondEthernet0")
+        bond0.enslave_vpp_bond_interface(sw_if_index=self.pg0.sw_if_index,
+                                         is_passive=0,
+                                         is_long_timeout=0)
+        self.logger.info("bond enslave interface pg1 to BondEthernet0")
+        bond0.enslave_vpp_bond_interface(sw_if_index=self.pg1.sw_if_index,
+                                         is_passive=0,
+                                         is_long_timeout=0)
+
+        # verify both slaves in BondEthernet0
+        if_dump = self.vapi.sw_interface_slave_dump(bond0.sw_if_index)
+        self.assertTrue(self.pg0.is_interface_config_in_dump(if_dump))
+        self.assertTrue(self.pg1.is_interface_config_in_dump(if_dump))
+
+        # generate a packet from pg2 -> BondEthernet0 -> pg1
+        # BondEthernet0 TX hashes this packet to pg1
+        p2 = (Ether(src=bond0_mac, dst=self.pg2.local_mac) /
+              IP(src=self.pg2.local_ip4, dst="10.10.10.12") /
+              UDP(sport=1235, dport=1235) /
+              Raw('\xa5' * 100))
+        self.pg2.add_stream(p2)
+
+        # generate a packet from pg3 -> BondEthernet0 -> pg0
+        # BondEthernet0 TX hashes this packet to pg0
+        # notice the ip address and ports are different than p2 packet
+        p3 = (Ether(src=bond0_mac, dst=self.pg3.local_mac) /
+              IP(src=self.pg3.local_ip4, dst="10.10.10.11") /
+              UDP(sport=1234, dport=1234) /
+              Raw('\xa5' * 100))
+        self.pg3.add_stream(p3)
+
+        self.pg_enable_capture(self.pg_interfaces)
+
+        # set up the static arp entries pointing to the BondEthernet0 interface
+        # so that it does not try to resolve the ip address
+        self.logger.info(self.vapi.cli(
+            "set ip arp static BondEthernet0 10.10.10.12 abcd.abcd.0002"))
+        self.logger.info(self.vapi.cli(
+            "set ip arp static BondEthernet0 10.10.10.11 abcd.abcd.0004"))
+
+        # clear the interface counters
+        self.logger.info(self.vapi.cli("clear interfaces"))
+
+        self.pg_start()
+
+        self.logger.info("check the interface counters")
+
+        # verify counters
+
+        # BondEthernet0 tx bytes = 284
+        intfs = self.vapi.cli("show interface BondEthernet0").split("\n")
+        found = 0
+        for intf in intfs:
+            if "tx bytes" in intf and "284" in intf:
+                found = 1
+        self.assertEqual(found, 1)
+
+        # pg0 tx bytes = 142
+        intfs = self.vapi.cli("show interface pg0").split("\n")
+        found = 0
+        for intf in intfs:
+            if "tx bytes" in intf and "142" in intf:
+                found = 1
+        self.assertEqual(found, 1)
+
+        # pg0 tx bytes = 142
+        intfs = self.vapi.cli("show interface pg1").split("\n")
+        found = 0
+        for intf in intfs:
+            if "tx bytes" in intf and "142" in intf:
+                found = 1
+        self.assertEqual(found, 1)
+
+        # pg2 rx bytes = 142
+        intfs = self.vapi.cli("show interface pg2").split("\n")
+        found = 0
+        for intf in intfs:
+            if "rx bytes" in intf and "142" in intf:
+                found = 1
+        self.assertEqual(found, 1)
+
+        # pg3 rx bytes = 142
+        intfs = self.vapi.cli("show interface pg3").split("\n")
+        found = 0
+        for intf in intfs:
+            if "rx bytes" in intf and "142" in intf:
+                found = 1
+        self.assertEqual(found, 1)
+
+        bond0.remove_vpp_config()
+
+    def test_bond_enslave(self):
+        """ Bond enslave/detach slave test """
+
+        # create interface (BondEthernet0)
+        self.logger.info("create bond")
+        bond0 = VppBondInterface(self, mode=3)
+        bond0.add_vpp_config()
+        bond0.admin_up()
+
+        # verify pg0 and pg1 not in BondEthernet0
+        if_dump = self.vapi.sw_interface_slave_dump(bond0.sw_if_index)
+        self.assertFalse(self.pg0.is_interface_config_in_dump(if_dump))
+        self.assertFalse(self.pg1.is_interface_config_in_dump(if_dump))
+
+        # enslave pg0 and pg1 to BondEthernet0
+        self.logger.info("bond enslave interface pg0 to BondEthernet0")
+        bond0.enslave_vpp_bond_interface(sw_if_index=self.pg0.sw_if_index,
+                                         is_passive=0,
+                                         is_long_timeout=0)
+
+        self.logger.info("bond enslave interface pg1 to BondEthernet0")
+        bond0.enslave_vpp_bond_interface(sw_if_index=self.pg1.sw_if_index,
+                                         is_passive=0,
+                                         is_long_timeout=0)
+
+        # verify both slaves in BondEthernet0
+        if_dump = self.vapi.sw_interface_slave_dump(bond0.sw_if_index)
+        self.assertTrue(self.pg0.is_interface_config_in_dump(if_dump))
+        self.assertTrue(self.pg1.is_interface_config_in_dump(if_dump))
+
+        # detach interface pg0
+        self.logger.info("detach interface pg0")
+        bond0.detach_vpp_bond_interface(sw_if_index=self.pg0.sw_if_index)
+
+        # verify pg0 is not in BondEthernet0, but pg1 is
+        if_dump = self.vapi.sw_interface_slave_dump(bond0.sw_if_index)
+        self.assertFalse(self.pg0.is_interface_config_in_dump(if_dump))
+        self.assertTrue(self.pg1.is_interface_config_in_dump(if_dump))
+
+        # detach interface pg1
+        self.logger.info("detach interface pg1")
+        bond0.detach_vpp_bond_interface(sw_if_index=self.pg1.sw_if_index)
+
+        # verify pg0 and pg1 not in BondEthernet0
+        if_dump = self.vapi.sw_interface_slave_dump(bond0.sw_if_index)
+        self.assertFalse(self.pg0.is_interface_config_in_dump(if_dump))
+        self.assertFalse(self.pg1.is_interface_config_in_dump(if_dump))
+
+        bond0.remove_vpp_config()
+
+    def test_bond(self):
+        """ Bond add/delete interface test """
+        self.logger.info("Bond add interfaces")
+
+        # create interface 1 (BondEthernet0)
+        bond0 = VppBondInterface(self, mode=5)
+        bond0.add_vpp_config()
+        bond0.admin_up()
+
+        # create interface 2 (BondEthernet1)
+        bond1 = VppBondInterface(self, mode=3)
+        bond1.add_vpp_config()
+        bond1.admin_up()
+
+        # verify both interfaces in the show
+        ifs = self.vapi.cli("show interface")
+        self.assertNotEqual(ifs.find('BondEthernet0'), -1)
+        self.assertNotEqual(ifs.find('BondEthernet1'), -1)
+
+        # verify they are in the dump also
+        if_dump = self.vapi.sw_interface_bond_dump()
+        self.assertTrue(bond0.is_interface_config_in_dump(if_dump))
+        self.assertTrue(bond1.is_interface_config_in_dump(if_dump))
+
+        # delete BondEthernet1
+        self.logger.info("Deleting BondEthernet1")
+        bond1.remove_vpp_config()
+
+        self.logger.info("Verifying BondEthernet1 is deleted")
+
+        ifs = self.vapi.cli("show interface")
+        # verify BondEthernet0 still in the show
+        self.assertNotEqual(ifs.find('BondEthernet0'), -1)
+
+        # verify BondEthernet1 not in the show
+        self.assertEqual(ifs.find('BondEthernet1'), -1)
+
+        # verify BondEthernet1 is not in the dump
+        if_dump = self.vapi.sw_interface_bond_dump()
+        self.assertFalse(bond1.is_interface_config_in_dump(if_dump))
+
+        # verify BondEthernet0 is still in the dump
+        self.assertTrue(bond0.is_interface_config_in_dump(if_dump))
+
+        # delete BondEthernet0
+        self.logger.info("Deleting BondEthernet0")
+        bond0.remove_vpp_config()
+
+        self.logger.info("Verifying BondEthernet0 is deleted")
+
+        # verify BondEthernet0 not in the show
+        ifs = self.vapi.cli("show interface")
+        self.assertEqual(ifs.find('BondEthernet0'), -1)
+
+        # verify BondEthernet0 is not in the dump
+        if_dump = self.vapi.sw_interface_bond_dump()
+        self.assertFalse(bond0.is_interface_config_in_dump(if_dump))
+
+if __name__ == '__main__':
+    unittest.main(testRunner=VppTestRunner)
diff --git a/test/vpp_bond_interface.py b/test/vpp_bond_interface.py
new file mode 100644 (file)
index 0000000..1c33e1c
--- /dev/null
@@ -0,0 +1,48 @@
+from vpp_object import VppObject
+from vpp_interface import VppInterface
+
+
+class VppBondInterface(VppInterface):
+    """VPP bond interface."""
+
+    def __init__(self, test, mode, lb=0,
+                 use_custom_mac=0, mac_address=''):
+
+        """ Create VPP Bond interface """
+        self._test = test
+        self.mode = mode
+        self.lb = lb
+        self.use_custom_mac = use_custom_mac
+        self.mac_address = mac_address
+        self._sw_if_index = 0
+        super(VppBondInterface, self).__init__(test)
+
+    def add_vpp_config(self):
+        r = self.test.vapi.bond_create(self.mode,
+                                       self.lb,
+                                       self.use_custom_mac,
+                                       self.mac_address)
+        self._sw_if_index = r.sw_if_index
+
+    def remove_vpp_config(self):
+        self.test.vapi.bond_delete(self.sw_if_index)
+
+    def enslave_vpp_bond_interface(self,
+                                   sw_if_index,
+                                   is_passive,
+                                   is_long_timeout):
+        self.test.vapi.bond_enslave(sw_if_index,
+                                    self.sw_if_index,
+                                    is_passive,
+                                    is_long_timeout)
+
+    def detach_vpp_bond_interface(self,
+                                  sw_if_index):
+        self.test.vapi.bond_detach_slave(sw_if_index)
+
+    def is_interface_config_in_dump(self, dump):
+        for i in dump:
+            if i.sw_if_index == self.sw_if_index:
+                return True
+        else:
+            return False
index 65cf766..5517174 100644 (file)
@@ -3341,3 +3341,78 @@ class VppPapiProvider(object):
     def want_igmp_events(self, enable=1):
         return self.api(self.papi.want_igmp_events, {'enable': enable,
                                                      'pid': os.getpid()})
+
+    def bond_create(
+            self,
+            mode,
+            lb,
+            use_custom_mac,
+            mac_address=''):
+        """
+        :param mode: mode
+        :param lb: load balance
+        :param use_custom_mac: use custom mac
+        :param mac_address: mac address
+        """
+        return self.api(
+            self.papi.bond_create,
+            {'mode': mode,
+             'lb': lb,
+             'use_custom_mac': use_custom_mac,
+             'mac_address': mac_address
+             })
+
+    def bond_delete(
+            self,
+            sw_if_index):
+        """
+        :param sw_if_index: interface the operation is applied to
+        """
+        return self.api(self.papi.bond_delete,
+                        {'sw_if_index': sw_if_index})
+
+    def bond_enslave(
+            self,
+            sw_if_index,
+            bond_sw_if_index,
+            is_passive,
+            is_long_timeout):
+        """
+        :param sw_if_index: slave sw_if_index
+        :param bond_sw_if_index: bond sw_if_index
+        :param is_passive: is passive lacp speaker
+        :param is_long_time: 90 seconds timeout instead of 3 seconds timeout
+        """
+        return self.api(
+            self.papi.bond_enslave,
+            {'sw_if_index': sw_if_index,
+             'bond_sw_if_index': bond_sw_if_index,
+             'is_passive': is_passive,
+             'is_long_timeout': is_long_timeout
+             })
+
+    def bond_detach_slave(
+            self,
+            sw_if_index):
+        """
+        :param sw_if_index: slave interface the operation is applied to
+        """
+        return self.api(self.papi.bond_detach_slave,
+                        {'sw_if_index': sw_if_index})
+
+    def sw_interface_slave_dump(
+            self,
+            sw_if_index):
+        """
+        :param sw_if_index: bond sw_if_index
+        """
+        return self.api(self.papi.sw_interface_slave_dump,
+                        {'sw_if_index': sw_if_index})
+
+    def sw_interface_bond_dump(
+            self):
+        """
+
+        """
+        return self.api(self.papi.sw_interface_bond_dump,
+                        {})