MTU: Setting of MTU on software interface (instead of hardware interface)
[vpp.git] / src / vnet / ip / ip6_forward.c
index c1c9ec0..7599733 100644 (file)
 #include <vnet/dpo/classify_dpo.h>
 
 #include <vppinfra/bihash_template.c>
+#include <vnet/ip/ip6_forward.h>
 
 /* Flag used by IOAM code. Classifier sets it pop-hop-by-hop checks it */
 #define OI_DECAP   0x80000000
 
-/**
- * @file
- * @brief IPv6 Forwarding.
- *
- * This file contains the source code for IPv6 forwarding.
- */
-
-
-always_inline uword
-ip6_lookup_inline (vlib_main_t * vm,
-                  vlib_node_runtime_t * node, vlib_frame_t * frame)
-{
-  ip6_main_t *im = &ip6_main;
-  vlib_combined_counter_main_t *cm = &load_balance_main.lbm_to_counters;
-  u32 n_left_from, n_left_to_next, *from, *to_next;
-  ip_lookup_next_t next;
-  u32 thread_index = vlib_get_thread_index ();
-
-  from = vlib_frame_vector_args (frame);
-  n_left_from = frame->n_vectors;
-  next = node->cached_next_index;
-
-  while (n_left_from > 0)
-    {
-      vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
-
-      while (n_left_from >= 4 && n_left_to_next >= 2)
-       {
-         vlib_buffer_t *p0, *p1;
-         u32 pi0, pi1, lbi0, lbi1, wrong_next;
-         ip_lookup_next_t next0, next1;
-         ip6_header_t *ip0, *ip1;
-         ip6_address_t *dst_addr0, *dst_addr1;
-         u32 fib_index0, fib_index1;
-         u32 flow_hash_config0, flow_hash_config1;
-         const dpo_id_t *dpo0, *dpo1;
-         const load_balance_t *lb0, *lb1;
-
-         /* Prefetch next iteration. */
-         {
-           vlib_buffer_t *p2, *p3;
-
-           p2 = vlib_get_buffer (vm, from[2]);
-           p3 = vlib_get_buffer (vm, from[3]);
-
-           vlib_prefetch_buffer_header (p2, LOAD);
-           vlib_prefetch_buffer_header (p3, LOAD);
-           CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
-           CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
-         }
-
-         pi0 = to_next[0] = from[0];
-         pi1 = to_next[1] = from[1];
-
-         p0 = vlib_get_buffer (vm, pi0);
-         p1 = vlib_get_buffer (vm, pi1);
-
-         ip0 = vlib_buffer_get_current (p0);
-         ip1 = vlib_buffer_get_current (p1);
-
-         dst_addr0 = &ip0->dst_address;
-         dst_addr1 = &ip1->dst_address;
-
-         fib_index0 =
-           vec_elt (im->fib_index_by_sw_if_index,
-                    vnet_buffer (p0)->sw_if_index[VLIB_RX]);
-         fib_index1 =
-           vec_elt (im->fib_index_by_sw_if_index,
-                    vnet_buffer (p1)->sw_if_index[VLIB_RX]);
-
-         fib_index0 = (vnet_buffer (p0)->sw_if_index[VLIB_TX] == (u32) ~ 0) ?
-           fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
-         fib_index1 = (vnet_buffer (p1)->sw_if_index[VLIB_TX] == (u32) ~ 0) ?
-           fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
-
-         lbi0 = ip6_fib_table_fwding_lookup (im, fib_index0, dst_addr0);
-         lbi1 = ip6_fib_table_fwding_lookup (im, fib_index1, dst_addr1);
-
-         lb0 = load_balance_get (lbi0);
-         lb1 = load_balance_get (lbi1);
-         ASSERT (lb0->lb_n_buckets > 0);
-         ASSERT (lb1->lb_n_buckets > 0);
-         ASSERT (is_pow2 (lb0->lb_n_buckets));
-         ASSERT (is_pow2 (lb1->lb_n_buckets));
-
-         vnet_buffer (p0)->ip.flow_hash = vnet_buffer (p1)->ip.flow_hash = 0;
-
-         if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
-           {
-             flow_hash_config0 = lb0->lb_hash_config;
-             vnet_buffer (p0)->ip.flow_hash =
-               ip6_compute_flow_hash (ip0, flow_hash_config0);
-             dpo0 =
-               load_balance_get_fwd_bucket (lb0,
-                                            (vnet_buffer (p0)->ip.flow_hash &
-                                             (lb0->lb_n_buckets_minus_1)));
-           }
-         else
-           {
-             dpo0 = load_balance_get_bucket_i (lb0, 0);
-           }
-         if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
-           {
-             flow_hash_config1 = lb1->lb_hash_config;
-             vnet_buffer (p1)->ip.flow_hash =
-               ip6_compute_flow_hash (ip1, flow_hash_config1);
-             dpo1 =
-               load_balance_get_fwd_bucket (lb1,
-                                            (vnet_buffer (p1)->ip.flow_hash &
-                                             (lb1->lb_n_buckets_minus_1)));
-           }
-         else
-           {
-             dpo1 = load_balance_get_bucket_i (lb1, 0);
-           }
-         next0 = dpo0->dpoi_next_node;
-         next1 = dpo1->dpoi_next_node;
-
-         /* Only process the HBH Option Header if explicitly configured to do so */
-         if (PREDICT_FALSE
-             (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
-           {
-             next0 = (dpo_is_adj (dpo0) && im->hbh_enabled) ?
-               (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next0;
-           }
-         if (PREDICT_FALSE
-             (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
-           {
-             next1 = (dpo_is_adj (dpo1) && im->hbh_enabled) ?
-               (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next1;
-           }
-         vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
-         vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
-
-         vlib_increment_combined_counter
-           (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
-         vlib_increment_combined_counter
-           (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
-
-         from += 2;
-         to_next += 2;
-         n_left_to_next -= 2;
-         n_left_from -= 2;
-
-         wrong_next = (next0 != next) + 2 * (next1 != next);
-         if (PREDICT_FALSE (wrong_next != 0))
-           {
-             switch (wrong_next)
-               {
-               case 1:
-                 /* A B A */
-                 to_next[-2] = pi1;
-                 to_next -= 1;
-                 n_left_to_next += 1;
-                 vlib_set_next_frame_buffer (vm, node, next0, pi0);
-                 break;
-
-               case 2:
-                 /* A A B */
-                 to_next -= 1;
-                 n_left_to_next += 1;
-                 vlib_set_next_frame_buffer (vm, node, next1, pi1);
-                 break;
-
-               case 3:
-                 /* A B C */
-                 to_next -= 2;
-                 n_left_to_next += 2;
-                 vlib_set_next_frame_buffer (vm, node, next0, pi0);
-                 vlib_set_next_frame_buffer (vm, node, next1, pi1);
-                 if (next0 == next1)
-                   {
-                     /* A B B */
-                     vlib_put_next_frame (vm, node, next, n_left_to_next);
-                     next = next1;
-                     vlib_get_next_frame (vm, node, next, to_next,
-                                          n_left_to_next);
-                   }
-               }
-           }
-       }
-
-      while (n_left_from > 0 && n_left_to_next > 0)
-       {
-         vlib_buffer_t *p0;
-         ip6_header_t *ip0;
-         u32 pi0, lbi0;
-         ip_lookup_next_t next0;
-         load_balance_t *lb0;
-         ip6_address_t *dst_addr0;
-         u32 fib_index0, flow_hash_config0;
-         const dpo_id_t *dpo0;
-
-         pi0 = from[0];
-         to_next[0] = pi0;
-
-         p0 = vlib_get_buffer (vm, pi0);
-
-         ip0 = vlib_buffer_get_current (p0);
-
-         dst_addr0 = &ip0->dst_address;
-
-         fib_index0 =
-           vec_elt (im->fib_index_by_sw_if_index,
-                    vnet_buffer (p0)->sw_if_index[VLIB_RX]);
-         fib_index0 =
-           (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
-            (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
-
-         lbi0 = ip6_fib_table_fwding_lookup (im, fib_index0, dst_addr0);
-
-         lb0 = load_balance_get (lbi0);
-         flow_hash_config0 = lb0->lb_hash_config;
-
-         vnet_buffer (p0)->ip.flow_hash = 0;
-         ASSERT (lb0->lb_n_buckets > 0);
-         ASSERT (is_pow2 (lb0->lb_n_buckets));
-
-         if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
-           {
-             flow_hash_config0 = lb0->lb_hash_config;
-             vnet_buffer (p0)->ip.flow_hash =
-               ip6_compute_flow_hash (ip0, flow_hash_config0);
-             dpo0 =
-               load_balance_get_fwd_bucket (lb0,
-                                            (vnet_buffer (p0)->ip.flow_hash &
-                                             (lb0->lb_n_buckets_minus_1)));
-           }
-         else
-           {
-             dpo0 = load_balance_get_bucket_i (lb0, 0);
-           }
-
-         dpo0 = load_balance_get_bucket_i (lb0,
-                                           (vnet_buffer (p0)->ip.flow_hash &
-                                            lb0->lb_n_buckets_minus_1));
-         next0 = dpo0->dpoi_next_node;
-
-         /* Only process the HBH Option Header if explicitly configured to do so */
-         if (PREDICT_FALSE
-             (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
-           {
-             next0 = (dpo_is_adj (dpo0) && im->hbh_enabled) ?
-               (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next0;
-           }
-         vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
-
-         vlib_increment_combined_counter
-           (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
-
-         from += 1;
-         to_next += 1;
-         n_left_to_next -= 1;
-         n_left_from -= 1;
-
-         if (PREDICT_FALSE (next0 != next))
-           {
-             n_left_to_next += 1;
-             vlib_put_next_frame (vm, node, next, n_left_to_next);
-             next = next0;
-             vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
-             to_next[0] = pi0;
-             to_next += 1;
-             n_left_to_next -= 1;
-           }
-       }
-
-      vlib_put_next_frame (vm, node, next, n_left_to_next);
-    }
-
-  if (node->flags & VLIB_NODE_FLAG_TRACE)
-    ip6_forward_next_trace (vm, node, frame, VLIB_TX);
-
-  return frame->n_vectors;
-}
-
 static void
 ip6_add_interface_routes (vnet_main_t * vnm, u32 sw_if_index,
                          ip6_main_t * im, u32 fib_index,
@@ -670,6 +395,12 @@ VNET_FEATURE_ARC_INIT (ip6_output, static) =
   .arc_index_ptr = &ip6_main.lookup_main.output_feature_arc_index,
 };
 
+VNET_FEATURE_INIT (ip6_outacl, static) = {
+  .arc_name = "ip6-output",
+  .node_name = "ip6-outacl",
+  .runs_before = VNET_FEATURES ("ipsec-output-ip6"),
+};
+
 VNET_FEATURE_INIT (ip6_ipsec_output, static) = {
   .arc_name = "ip6-output",
   .node_name = "ipsec-output-ip6",
@@ -701,8 +432,9 @@ ip6_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
       vlib_main_t *vm = vlib_get_main ();
 
       ip6_neighbor_sw_interface_add_del (vnm, sw_if_index, 0 /* is_add */ );
+      vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
       /* *INDENT-OFF* */
-      foreach_ip_interface_address (lm6, ia, sw_if_index, 1 /* honor unnumbered */,
+      foreach_ip_interface_address (lm6, ia, sw_if_index, 0,
       ({
         address = ip_interface_address_get_address (lm6, ia);
         ip6_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
@@ -1243,12 +975,15 @@ ip6_urpf_loose_check (ip6_main_t * im, vlib_buffer_t * b, ip6_header_t * i)
 {
   const load_balance_t *lb0;
   index_t lbi;
+  u32 fib_index;
 
-  lbi = ip6_fib_table_fwding_lookup_with_if_index (im,
-                                                  vnet_buffer
-                                                  (b)->sw_if_index[VLIB_RX],
-                                                  &i->src_address);
+  fib_index = vec_elt (im->fib_index_by_sw_if_index,
+                      vnet_buffer (b)->sw_if_index[VLIB_RX]);
+  fib_index =
+    (vnet_buffer (b)->sw_if_index[VLIB_TX] == (u32) ~ 0) ?
+    fib_index : vnet_buffer (b)->sw_if_index[VLIB_TX];
 
+  lbi = ip6_fib_table_fwding_lookup (im, fib_index, &i->src_address);
   lb0 = load_balance_get (lbi);
 
   return (fib_urpf_check_size (lb0->lb_urpf));
@@ -1578,6 +1313,7 @@ ip6_local_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
          next0 = lm->local_next_by_ip_protocol[ip0->protocol];
          next0 =
            error0 != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
+
          p0->error = error_node->errors[error0];
 
          if (head_of_feature_arc)
@@ -1815,9 +1551,10 @@ ip6_discover_neighbor_inline (vlib_main_t * vm,
             * Choose source address based on destination lookup
             * adjacency.
             */
-           if (ip6_src_address_for_packet (lm,
-                                           sw_if_index0,
-                                           &h0->ip.src_address))
+           if (!ip6_src_address_for_packet (lm,
+                                            sw_if_index0,
+                                            &ip0->dst_address,
+                                            &h0->ip.src_address))
              {
                /* There is no address on the interface */
                p0->error =
@@ -2011,7 +1748,8 @@ ip6_probe_neighbor (vlib_main_t * vm, ip6_address_t * dst, u32 sw_if_index)
   if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE)
     {
       adj_unlock (ai);
-      ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP6, sw_if_index, &nh);
+      ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP6,
+                                 VNET_LINK_IP6, sw_if_index, &nh);
       adj = adj_get (ai);
     }
 
@@ -2036,6 +1774,19 @@ typedef enum
   IP6_REWRITE_NEXT_ICMP_ERROR,
 } ip6_rewrite_next_t;
 
+always_inline void
+ip6_mtu_check (vlib_buffer_t * b, u16 buffer_packet_bytes,
+              u16 adj_packet_bytes, u32 * next, u32 * error)
+{
+  if (adj_packet_bytes >= 1280 && buffer_packet_bytes > adj_packet_bytes)
+    {
+      *error = IP6_ERROR_MTU_EXCEEDED;
+      icmp6_error_set_vnet_buffer (b, ICMP6_packet_too_big, 0,
+                                  adj_packet_bytes);
+      *next = IP6_REWRITE_NEXT_ICMP_ERROR;
+    }
+}
+
 always_inline uword
 ip6_rewrite_inline (vlib_main_t * vm,
                    vlib_node_runtime_t * node,
@@ -2160,9 +1911,14 @@ ip6_rewrite_inline (vlib_main_t * vm,
            {
              p1->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
            }
+
          adj0 = adj_get (adj_index0);
          adj1 = adj_get (adj_index1);
 
+         /* Guess we are only writing on simple Ethernet header. */
+         vnet_rewrite_two_headers (adj0[0], adj1[0],
+                                   ip0, ip1, sizeof (ethernet_header_t));
+
          rw_len0 = adj0[0].rewrite_header.data_bytes;
          rw_len1 = adj1[0].rewrite_header.data_bytes;
          vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
@@ -2181,16 +1937,12 @@ ip6_rewrite_inline (vlib_main_t * vm,
            }
 
          /* Check MTU of outgoing interface. */
-         error0 =
-           (vlib_buffer_length_in_chain (vm, p0) >
-            adj0[0].
-            rewrite_header.max_l3_packet_bytes ? IP6_ERROR_MTU_EXCEEDED :
-            error0);
-         error1 =
-           (vlib_buffer_length_in_chain (vm, p1) >
-            adj1[0].
-            rewrite_header.max_l3_packet_bytes ? IP6_ERROR_MTU_EXCEEDED :
-            error1);
+         ip6_mtu_check (p0, vlib_buffer_length_in_chain (vm, p0),
+                        adj0[0].rewrite_header.max_l3_packet_bytes, &next0,
+                        &error0);
+         ip6_mtu_check (p1, vlib_buffer_length_in_chain (vm, p1),
+                        adj1[0].rewrite_header.max_l3_packet_bytes, &next1,
+                        &error1);
 
          /* Don't adjust the buffer for hop count issue; icmp-error node
           * wants to see the IP headerr */
@@ -2207,6 +1959,19 @@ ip6_rewrite_inline (vlib_main_t * vm,
                  (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
                vnet_feature_arc_start (lm->output_feature_arc_index,
                                        tx_sw_if_index0, &next0, p0);
+
+             if (is_midchain)
+               {
+                 adj0->sub_type.midchain.fixup_func
+                   (vm, adj0, p0, adj0->sub_type.midchain.fixup_data);
+               }
+             if (is_mcast)
+               {
+                 /*
+                  * copy bytes from the IP address into the MAC rewrite
+                  */
+                 vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0);
+               }
            }
          if (PREDICT_TRUE (error1 == IP6_ERROR_NONE))
            {
@@ -2221,26 +1986,19 @@ ip6_rewrite_inline (vlib_main_t * vm,
                  (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
                vnet_feature_arc_start (lm->output_feature_arc_index,
                                        tx_sw_if_index1, &next1, p1);
-           }
-
-         /* Guess we are only writing on simple Ethernet header. */
-         vnet_rewrite_two_headers (adj0[0], adj1[0],
-                                   ip0, ip1, sizeof (ethernet_header_t));
 
-         if (is_midchain)
-           {
-             adj0->sub_type.midchain.fixup_func
-               (vm, adj0, p0, adj0->sub_type.midchain.fixup_data);
-             adj1->sub_type.midchain.fixup_func
-               (vm, adj1, p1, adj1->sub_type.midchain.fixup_data);
-           }
-         if (is_mcast)
-           {
-             /*
-              * copy bytes from the IP address into the MAC rewrite
-              */
-             vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0);
-             vnet_fixup_one_header (adj1[0], &ip1->dst_address, ip1);
+             if (is_midchain)
+               {
+                 adj1->sub_type.midchain.fixup_func
+                   (vm, adj1, p1, adj1->sub_type.midchain.fixup_data);
+               }
+             if (is_mcast)
+               {
+                 /*
+                  * copy bytes from the IP address into the MAC rewrite
+                  */
+                 vnet_fixup_one_header (adj1[0], &ip1->dst_address, ip1);
+               }
            }
 
          vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
@@ -2316,11 +2074,9 @@ ip6_rewrite_inline (vlib_main_t * vm,
            }
 
          /* Check MTU of outgoing interface. */
-         error0 =
-           (vlib_buffer_length_in_chain (vm, p0) >
-            adj0[0].
-            rewrite_header.max_l3_packet_bytes ? IP6_ERROR_MTU_EXCEEDED :
-            error0);
+         ip6_mtu_check (p0, vlib_buffer_length_in_chain (vm, p0),
+                        adj0[0].rewrite_header.max_l3_packet_bytes, &next0,
+                        &error0);
 
          /* Don't adjust the buffer for hop count issue; icmp-error node
           * wants to see the IP headerr */
@@ -2338,16 +2094,16 @@ ip6_rewrite_inline (vlib_main_t * vm,
                  (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
                vnet_feature_arc_start (lm->output_feature_arc_index,
                                        tx_sw_if_index0, &next0, p0);
-           }
 
-         if (is_midchain)
-           {
-             adj0->sub_type.midchain.fixup_func
-               (vm, adj0, p0, adj0->sub_type.midchain.fixup_data);
-           }
-         if (is_mcast)
-           {
-             vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0);
+             if (is_midchain)
+               {
+                 adj0->sub_type.midchain.fixup_func
+                   (vm, adj0, p0, adj0->sub_type.midchain.fixup_data);
+               }
+             if (is_mcast)
+               {
+                 vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0);
+               }
            }
 
          p0->error = error_node->errors[error0];
@@ -2961,7 +2717,7 @@ ip6_hbh_register_option (u8 option,
   ip6_main_t *im = &ip6_main;
   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
 
-  ASSERT (option < ARRAY_LEN (hm->options));
+  ASSERT ((u32) option < ARRAY_LEN (hm->options));
 
   /* Already registered */
   if (hm->options[option])
@@ -2982,7 +2738,7 @@ ip6_hbh_unregister_option (u8 option)
   ip6_main_t *im = &ip6_main;
   ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
 
-  ASSERT (option < ARRAY_LEN (hm->options));
+  ASSERT ((u32) option < ARRAY_LEN (hm->options));
 
   /* Not registered */
   if (!hm->options[option])