Changing the IP table for an interface is an error if the interface already has an...
[vpp.git] / src / vnet / ip / ip6_forward.c
index b5c7955..6f77c6d 100644 (file)
 #include <vnet/ethernet/ethernet.h>    /* for ethernet_header_t */
 #include <vnet/srp/srp.h>      /* for srp_hw_interface_class */
 #include <vppinfra/cache.h>
-#include <vnet/fib/fib_table.h>
+#include <vnet/fib/fib_urpf_list.h>    /* for FIB uRPF check */
 #include <vnet/fib/ip6_fib.h>
+#include <vnet/mfib/ip6_mfib.h>
 #include <vnet/dpo/load_balance.h>
 #include <vnet/dpo/classify_dpo.h>
 
 #include <vppinfra/bihash_template.c>
 
+/* Flag used by IOAM code. Classifier sets it pop-hop-by-hop checks it */
+#define OI_DECAP   0x80000000
+
 /**
  * @file
  * @brief IPv6 Forwarding.
@@ -404,24 +408,24 @@ ip6_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
     }
   else
     {
-      ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
-      if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
+      /* The ref count is 0 when an address is removed from an interface that has
+       * no address - this is not a ciritical error */
+      if (0 == im->ip_enabled_by_sw_if_index[sw_if_index] ||
+         0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
        return;
     }
 
   vnet_feature_enable_disable ("ip6-unicast", "ip6-lookup", sw_if_index,
                               is_enable, 0, 0);
 
-  vnet_feature_enable_disable ("ip6-multicast", "ip6-lookup", sw_if_index,
-                              is_enable, 0, 0);
+  vnet_feature_enable_disable ("ip6-multicast", "ip6-mfib-forward-lookup",
+                              sw_if_index, is_enable, 0, 0);
 
 }
 
 /* get first interface address */
 ip6_address_t *
-ip6_interface_first_address (ip6_main_t * im,
-                            u32 sw_if_index,
-                            ip_interface_address_t ** result_ia)
+ip6_interface_first_address (ip6_main_t * im, u32 sw_if_index)
 {
   ip_lookup_main_t *lm = &im->lookup_main;
   ip_interface_address_t *ia = 0;
@@ -436,8 +440,6 @@ ip6_interface_first_address (ip6_main_t * im,
     break;
   }));
   /* *INDENT-ON* */
-  if (result_ia)
-    *result_ia = result ? ia : 0;
   return result;
 }
 
@@ -455,6 +457,8 @@ ip6_add_del_interface_address (vlib_main_t * vm,
   ip6_address_fib_t ip6_af, *addr_fib = 0;
 
   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
+  vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
+
   ip6_addr_fib_init (&ip6_af, address,
                     vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
   vec_add1 (addr_fib, ip6_af);
@@ -581,6 +585,13 @@ VNET_FEATURE_INIT (ip6_vpath, static) =
 {
   .arc_name = "ip6-unicast",
   .node_name = "vpath-input-ip6",
+  .runs_before = VNET_FEATURES ("ip6-vxlan-bypass"),
+};
+
+VNET_FEATURE_INIT (ip6_vxlan_bypass, static) =
+{
+  .arc_name = "ip6-unicast",
+  .node_name = "ip6-vxlan-bypass",
   .runs_before = VNET_FEATURES ("ip6-lookup"),
 };
 
@@ -609,12 +620,12 @@ VNET_FEATURE_ARC_INIT (ip6_multicast, static) =
 VNET_FEATURE_INIT (ip6_vpath_mc, static) = {
   .arc_name = "ip6-multicast",
   .node_name = "vpath-input-ip6",
-  .runs_before = VNET_FEATURES ("ip6-lookup"),
+  .runs_before = VNET_FEATURES ("ip6-mfib-forward-lookup"),
 };
 
 VNET_FEATURE_INIT (ip6_mc_lookup, static) = {
   .arc_name = "ip6-multicast",
-  .node_name = "ip6-lookup",
+  .node_name = "ip6-mfib-forward-lookup",
   .runs_before = VNET_FEATURES ("ip6-drop"),
 };
 
@@ -914,7 +925,7 @@ typedef struct
 }
 ip6_forward_next_trace_t;
 
-static u8 *
+u8 *
 format_ip6_forward_next_trace (u8 * s, va_list * args)
 {
   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
@@ -1120,22 +1131,6 @@ VLIB_REGISTER_NODE (ip6_punt_node, static) =
 
 VLIB_NODE_FUNCTION_MULTIARCH (ip6_punt_node, ip6_punt);
 
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (ip6_multicast_node, static) =
-{
-  .function = ip6_drop,
-  .name = "ip6-multicast",
-  .vector_size = sizeof (u32),
-  .format_trace = format_ip6_forward_next_trace,
-  .n_next_nodes = 1,
-  .next_nodes =
-  {
-    [0] = "error-drop",
-  },
-};
-
-/* *INDENT-ON* */
-
 /* Compute TCP/UDP/ICMP6 checksum in software. */
 u16
 ip6_tcp_udp_icmp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
@@ -1315,6 +1310,25 @@ ip6_locate_header (vlib_buffer_t * p0,
   return (next_proto);
 }
 
+/**
+ * @brief returns number of links on which src is reachable.
+ */
+always_inline int
+ip6_urpf_loose_check (ip6_main_t * im, vlib_buffer_t * b, ip6_header_t * i)
+{
+  const load_balance_t *lb0;
+  index_t lbi;
+
+  lbi = ip6_fib_table_fwding_lookup_with_if_index (im,
+                                                  vnet_buffer
+                                                  (b)->sw_if_index[VLIB_RX],
+                                                  &i->src_address);
+
+  lb0 = load_balance_get (lbi);
+
+  return (fib_urpf_check_size (lb0->lb_urpf));
+}
+
 static uword
 ip6_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
 {
@@ -1361,6 +1375,9 @@ ip6_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
          ip0 = vlib_buffer_get_current (p0);
          ip1 = vlib_buffer_get_current (p1);
 
+         vnet_buffer (p0)->ip.start_of_ip_header = p0->current_data;
+         vnet_buffer (p1)->ip.start_of_ip_header = p1->current_data;
+
          type0 = lm->builtin_protocol_by_ip_protocol[ip0->protocol];
          type1 = lm->builtin_protocol_by_ip_protocol[ip1->protocol];
 
@@ -1462,16 +1479,14 @@ ip6_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
              type0 != IP_BUILTIN_PROTOCOL_ICMP &&
              !ip6_address_is_link_local_unicast (&ip0->src_address))
            {
-             u32 src_adj_index0 = ip6_src_lookup_for_packet (im, p0, ip0);
-             error0 = (ADJ_INDEX_INVALID == src_adj_index0
+             error0 = (!ip6_urpf_loose_check (im, p0, ip0)
                        ? IP6_ERROR_SRC_LOOKUP_MISS : error0);
            }
          if (error1 == IP6_ERROR_UNKNOWN_PROTOCOL &&
              type1 != IP_BUILTIN_PROTOCOL_ICMP &&
              !ip6_address_is_link_local_unicast (&ip1->src_address))
            {
-             u32 src_adj_index1 = ip6_src_lookup_for_packet (im, p1, ip1);
-             error1 = (ADJ_INDEX_INVALID == src_adj_index1
+             error1 = (!ip6_urpf_loose_check (im, p1, ip1)
                        ? IP6_ERROR_SRC_LOOKUP_MISS : error1);
            }
 
@@ -1508,6 +1523,8 @@ ip6_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
 
          ip0 = vlib_buffer_get_current (p0);
 
+         vnet_buffer (p0)->ip.start_of_ip_header = p0->current_data;
+
          type0 = lm->builtin_protocol_by_ip_protocol[ip0->protocol];
          next0 = lm->local_next_by_ip_protocol[ip0->protocol];
 
@@ -1567,8 +1584,7 @@ ip6_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
              type0 != IP_BUILTIN_PROTOCOL_ICMP &&
              !ip6_address_is_link_local_unicast (&ip0->src_address))
            {
-             u32 src_adj_index0 = ip6_src_lookup_for_packet (im, p0, ip0);
-             error0 = (ADJ_INDEX_INVALID == src_adj_index0
+             error0 = (!ip6_urpf_loose_check (im, p0, ip0)
                        ? IP6_ERROR_SRC_LOOKUP_MISS : error0);
            }
 
@@ -1970,7 +1986,7 @@ typedef enum
 always_inline uword
 ip6_rewrite_inline (vlib_main_t * vm,
                    vlib_node_runtime_t * node,
-                   vlib_frame_t * frame, int is_midchain)
+                   vlib_frame_t * frame, int is_midchain, int is_mcast)
 {
   ip_lookup_main_t *lm = &ip6_main.lookup_main;
   u32 *from = vlib_frame_vector_args (frame);
@@ -2101,14 +2117,14 @@ ip6_rewrite_inline (vlib_main_t * vm,
          vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
          vnet_buffer (p1)->ip.save_rewrite_length = rw_len1;
 
-         vlib_increment_combined_counter (&adjacency_counters,
-                                          cpu_index, adj_index0,
-                                          /* packet increment */ 0,
-                                          /* byte increment */ rw_len0);
-         vlib_increment_combined_counter (&adjacency_counters,
-                                          cpu_index, adj_index1,
-                                          /* packet increment */ 0,
-                                          /* byte increment */ rw_len1);
+         vlib_increment_combined_counter
+           (&adjacency_counters,
+            cpu_index,
+            adj_index0, 1, vlib_buffer_length_in_chain (vm, p0) + rw_len0);
+         vlib_increment_combined_counter
+           (&adjacency_counters,
+            cpu_index, adj_index1,
+            1, vlib_buffer_length_in_chain (vm, p1) + rw_len1);
 
          /* Check MTU of outgoing interface. */
          error0 =
@@ -2158,6 +2174,14 @@ ip6_rewrite_inline (vlib_main_t * vm,
              adj0->sub_type.midchain.fixup_func (vm, adj0, p0);
              adj1->sub_type.midchain.fixup_func (vm, adj1, p1);
            }
+         if (is_mcast)
+           {
+             /*
+              * copy bytes from the IP address into the MAC rewrite
+              */
+             vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0, 0);
+             vnet_fixup_one_header (adj1[0], &ip1->dst_address, ip1, 0);
+           }
 
          vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
                                           to_next, n_left_to_next,
@@ -2226,10 +2250,10 @@ ip6_rewrite_inline (vlib_main_t * vm,
          rw_len0 = adj0[0].rewrite_header.data_bytes;
          vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
 
-         vlib_increment_combined_counter (&adjacency_counters,
-                                          cpu_index, adj_index0,
-                                          /* packet increment */ 0,
-                                          /* byte increment */ rw_len0);
+         vlib_increment_combined_counter
+           (&adjacency_counters,
+            cpu_index,
+            adj_index0, 1, vlib_buffer_length_in_chain (vm, p0) + rw_len0);
 
          /* Check MTU of outgoing interface. */
          error0 =
@@ -2258,6 +2282,10 @@ ip6_rewrite_inline (vlib_main_t * vm,
            {
              adj0->sub_type.midchain.fixup_func (vm, adj0, p0);
            }
+         if (is_mcast)
+           {
+             vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0, 0);
+           }
 
          p0->error = error_node->errors[error0];
 
@@ -2285,16 +2313,21 @@ static uword
 ip6_rewrite (vlib_main_t * vm,
             vlib_node_runtime_t * node, vlib_frame_t * frame)
 {
-  return ip6_rewrite_inline (vm, node, frame,
-                            /* midchain */ 0);
+  return ip6_rewrite_inline (vm, node, frame, 0, 0);
+}
+
+static uword
+ip6_rewrite_mcast (vlib_main_t * vm,
+                  vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+  return ip6_rewrite_inline (vm, node, frame, 0, 1);
 }
 
 static uword
 ip6_midchain (vlib_main_t * vm,
              vlib_node_runtime_t * node, vlib_frame_t * frame)
 {
-  return ip6_rewrite_inline (vm, node, frame,
-                            /* midchain */ 1);
+  return ip6_rewrite_inline (vm, node, frame, 1, 0);
 }
 
 /* *INDENT-OFF* */
@@ -2328,10 +2361,22 @@ VLIB_REGISTER_NODE (ip6_rewrite_node) =
 
 VLIB_NODE_FUNCTION_MULTIARCH (ip6_rewrite_node, ip6_rewrite);
 
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_rewrite_mcast_node) =
+{
+  .function = ip6_rewrite_mcast,
+  .name = "ip6-rewrite-mcast",
+  .vector_size = sizeof (u32),
+  .format_trace = format_ip6_rewrite_trace,
+  .sibling_of = "ip6-rewrite",
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_rewrite_mcast_node, ip6_rewrite_mcast);
+
 /*
  * Hop-by-Hop handling
  */
-
 ip6_hop_by_hop_main_t ip6_hop_by_hop_main;
 
 #define foreach_ip6_hop_by_hop_error \
@@ -2339,13 +2384,15 @@ _(PROCESSED, "pkts with ip6 hop-by-hop options") \
 _(FORMAT, "incorrectly formatted hop-by-hop options") \
 _(UNKNOWN_OPTION, "unknown ip6 hop-by-hop options")
 
+/* *INDENT-OFF* */
 typedef enum
 {
 #define _(sym,str) IP6_HOP_BY_HOP_ERROR_##sym,
   foreach_ip6_hop_by_hop_error
 #undef _
-    IP6_HOP_BY_HOP_N_ERROR,
+  IP6_HOP_BY_HOP_N_ERROR,
 } ip6_hop_by_hop_error_t;
+/* *INDENT-ON* */
 
 /*
  * Primary h-b-h handler trace support
@@ -2871,6 +2918,7 @@ ip6_lookup_init (vlib_main_t * vm)
 
   /* Create FIB with index 0 and table id of 0. */
   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, 0);
+  mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, 0);
 
   {
     pg_node_t *pn;
@@ -2921,6 +2969,7 @@ add_del_ip6_interface_table (vlib_main_t * vm,
                             vlib_cli_command_t * cmd)
 {
   vnet_main_t *vnm = vnet_get_main ();
+  ip_interface_address_t *ia;
   clib_error_t *error = 0;
   u32 sw_if_index, table_id;
 
@@ -2942,12 +2991,40 @@ add_del_ip6_interface_table (vlib_main_t * vm,
       goto done;
     }
 
+  /*
+   * If the interface already has in IP address, then a change int
+   * VRF is not allowed. The IP address applied must first be removed.
+   * We do not do that automatically here, since VPP has no knowledge
+   * of whether thoses subnets are valid in the destination VRF.
+   */
+  /* *INDENT-OFF* */
+  foreach_ip_interface_address (&ip6_main.lookup_main,
+                                ia, sw_if_index,
+                                1 /* honor unnumbered */,
+  ({
+      ip4_address_t * a;
+
+      a = ip_interface_address_get_address (&ip6_main.lookup_main, ia);
+      error = clib_error_return (0, "interface %U has address %U",
+                                 format_vnet_sw_if_index_name, vnm,
+                                 sw_if_index,
+                                 format_ip6_address, a);
+      goto done;
+  }));
+  /* *INDENT-ON* */
+
   {
     u32 fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6,
                                                       table_id);
 
     vec_validate (ip6_main.fib_index_by_sw_if_index, sw_if_index);
     ip6_main.fib_index_by_sw_if_index[sw_if_index] = fib_index;
+
+    fib_index = mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6,
+                                                   table_id);
+
+    vec_validate (ip6_main.mfib_index_by_sw_if_index, sw_if_index);
+    ip6_main.mfib_index_by_sw_if_index[sw_if_index] = fib_index;
   }
 
 
@@ -2963,13 +3040,12 @@ done:
  * an IP Address is assigned to an interface in the table (which adds a route
  * automatically).
  *
- * @note IP addresses added after setting the interface IP table end up in
- * the indicated FIB table. If the IP address is added prior to adding the
- * interface to the FIB table, it will NOT be part of the FIB table. Predictable
- * but potentially counter-intuitive results occur if you provision interface
- * addresses in multiple FIBs. Upon RX, packets will be processed in the last
- * IP table ID provisioned. It might be marginally useful to evade source RPF
- * drops to put an interface address into multiple FIBs.
+ * @note IP addresses added after setting the interface IP table are added to
+ * the indicated FIB table. If an IP address is added prior to changing the
+ * table then this is an error. The control plane must remove these addresses
+ * first and then change the table. VPP will not automatically move the
+ * addresses from the old to the new table as it does not know the validity
+ * of such a change.
  *
  * @cliexpar
  * Example of how to add an interface to an IPv6 FIB table (where 2 is the table-id):
@@ -3257,7 +3333,7 @@ vnet_set_ip6_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
 
-  if_addr = ip6_interface_first_address (ipm, sw_if_index, NULL);
+  if_addr = ip6_interface_first_address (ipm, sw_if_index);
 
   if (NULL != if_addr)
     {