MTU: Setting of MTU on software interface (instead of hardware interface)
[vpp.git] / src / vnet / ip / ip4_forward.c
index b9875d7..3dce590 100644 (file)
@@ -793,8 +793,9 @@ ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
       ip4_address_t *address;
       vlib_main_t *vm = vlib_get_main ();
 
+      vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
       /* *INDENT-OFF* */
-      foreach_ip_interface_address (lm4, ia, sw_if_index, 1 /* honor unnumbered */,
+      foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
       ({
         address = ip_interface_address_get_address (lm4, ia);
         ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
@@ -1216,14 +1217,8 @@ ip4_local_inline (vlib_main_t * vm,
          sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
          sw_if_index1 = vnet_buffer (p1)->sw_if_index[VLIB_RX];
 
-         /* Treat IP frag packets as "experimental" protocol for now
-            until support of IP frag reassembly is implemented */
-         proto0 =
-           ip4_is_fragment (ip0) ? IP_PROTOCOL_VPP_FRAGMENTATION :
-           ip0->protocol;
-         proto1 =
-           ip4_is_fragment (ip1) ? IP_PROTOCOL_VPP_FRAGMENTATION :
-           ip1->protocol;
+         proto0 = ip0->protocol;
+         proto1 = ip1->protocol;
 
          if (head_of_feature_arc == 0)
            goto skip_checks;
@@ -1386,11 +1381,7 @@ ip4_local_inline (vlib_main_t * vm,
          vnet_buffer (p0)->l3_hdr_offset = p0->current_data;
          sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
 
-         /* Treat IP frag packets as "experimental" protocol for now
-            until support of IP frag reassembly is implemented */
-         proto0 =
-           ip4_is_fragment (ip0) ? IP_PROTOCOL_VPP_FRAGMENTATION :
-           ip0->protocol;
+         proto0 = ip0->protocol;
 
          if (head_of_feature_arc == 0 || p0->flags & VNET_BUFFER_F_IS_NATED)
            goto skip_check;
@@ -1481,7 +1472,6 @@ VLIB_REGISTER_NODE (ip4_local_node) =
     [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
-    [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-reassembly",
   },
 };
 /* *INDENT-ON* */
@@ -1923,7 +1913,8 @@ ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
   if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE)
     {
       adj_unlock (ai);
-      ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP4, sw_if_index, &nh);
+      ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP4,
+                                 VNET_LINK_IP4, sw_if_index, &nh);
       adj = adj_get (ai);
     }
 
@@ -1949,6 +1940,29 @@ typedef enum
   IP4_REWRITE_NEXT_ICMP_ERROR,
 } ip4_rewrite_next_t;
 
+always_inline void
+ip4_mtu_check (vlib_buffer_t * b, u16 buffer_packet_bytes,
+              u16 adj_packet_bytes, bool df, u32 * next, u32 * error)
+{
+  if (buffer_packet_bytes > adj_packet_bytes)
+    {
+      *error = IP4_ERROR_MTU_EXCEEDED;
+      if (df)
+       {
+         icmp4_error_set_vnet_buffer
+           (b, ICMP4_destination_unreachable,
+            ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
+            adj_packet_bytes);
+         *next = IP4_REWRITE_NEXT_ICMP_ERROR;
+       }
+      else
+       {
+         /* Add support for fragmentation here */
+         *next = IP4_REWRITE_NEXT_DROP;
+       }
+    }
+}
+
 always_inline uword
 ip4_rewrite_inline (vlib_main_t * vm,
                    vlib_node_runtime_t * node,
@@ -2109,26 +2123,20 @@ ip4_rewrite_inline (vlib_main_t * vm,
          vnet_buffer (p1)->ip.save_rewrite_length = rw_len1;
 
          /* Check MTU of outgoing interface. */
-         if (vlib_buffer_length_in_chain (vm, p0) >
-             adj0[0].rewrite_header.max_l3_packet_bytes)
-           {
-             error0 = IP4_ERROR_MTU_EXCEEDED;
-             next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
-             icmp4_error_set_vnet_buffer
-               (p0, ICMP4_destination_unreachable,
-                ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
-                0);
-           }
-         if (vlib_buffer_length_in_chain (vm, p1) >
-             adj1[0].rewrite_header.max_l3_packet_bytes)
-           {
-             error1 = IP4_ERROR_MTU_EXCEEDED;
-             next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
-             icmp4_error_set_vnet_buffer
-               (p1, ICMP4_destination_unreachable,
-                ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
-                0);
-           }
+         ip4_mtu_check (p0, vlib_buffer_length_in_chain (vm, p0),
+                        adj0[0].rewrite_header.max_l3_packet_bytes,
+                        ip0->flags_and_fragment_offset &
+                        clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
+                        &next0, &error0);
+         ip4_mtu_check (p1, vlib_buffer_length_in_chain (vm, p1),
+                        adj1[0].rewrite_header.max_l3_packet_bytes,
+                        ip1->flags_and_fragment_offset &
+                        clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
+                        &next1, &error1);
+
+         /* Guess we are only writing on simple Ethernet header. */
+         vnet_rewrite_two_headers (adj0[0], adj1[0],
+                                   ip0, ip1, sizeof (ethernet_header_t));
 
          if (is_mcast)
            {
@@ -2152,10 +2160,17 @@ ip4_rewrite_inline (vlib_main_t * vm,
              tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
              vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
 
+             if (is_midchain)
+               {
+                 adj0->sub_type.midchain.fixup_func
+                   (vm, adj0, p0, adj0->sub_type.midchain.fixup_data);
+               }
+
              if (PREDICT_FALSE
                  (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
                vnet_feature_arc_start (lm->output_feature_arc_index,
                                        tx_sw_if_index0, &next0, p0);
+
            }
          if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
            {
@@ -2166,16 +2181,18 @@ ip4_rewrite_inline (vlib_main_t * vm,
              tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
              vnet_buffer (p1)->sw_if_index[VLIB_TX] = tx_sw_if_index1;
 
+             if (is_midchain)
+               {
+                 adj1->sub_type.midchain.fixup_func
+                   (vm, adj1, p1, adj0->sub_type.midchain.fixup_data);
+               }
+
              if (PREDICT_FALSE
                  (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
                vnet_feature_arc_start (lm->output_feature_arc_index,
                                        tx_sw_if_index1, &next1, p1);
            }
 
-         /* Guess we are only writing on simple Ethernet header. */
-         vnet_rewrite_two_headers (adj0[0], adj1[0],
-                                   ip0, ip1, sizeof (ethernet_header_t));
-
          /*
           * Bump the per-adjacency counters
           */
@@ -2194,13 +2211,6 @@ ip4_rewrite_inline (vlib_main_t * vm,
                 vlib_buffer_length_in_chain (vm, p1) + rw_len1);
            }
 
-         if (is_midchain)
-           {
-             adj0->sub_type.midchain.fixup_func
-               (vm, adj0, p0, adj0->sub_type.midchain.fixup_data);
-             adj1->sub_type.midchain.fixup_func
-               (vm, adj1, p1, adj0->sub_type.midchain.fixup_data);
-           }
          if (is_mcast)
            {
              /*
@@ -2281,6 +2291,7 @@ ip4_rewrite_inline (vlib_main_t * vm,
 
          /* Guess we are only writing on simple Ethernet header. */
          vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
+
          if (is_mcast)
            {
              /*
@@ -2300,16 +2311,12 @@ ip4_rewrite_inline (vlib_main_t * vm,
               vlib_buffer_length_in_chain (vm, p0) + rw_len0);
 
          /* Check MTU of outgoing interface. */
-         if (vlib_buffer_length_in_chain (vm, p0) >
-             adj0[0].rewrite_header.max_l3_packet_bytes)
-           {
-             error0 = IP4_ERROR_MTU_EXCEEDED;
-             next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
-             icmp4_error_set_vnet_buffer
-               (p0, ICMP4_destination_unreachable,
-                ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
-                0);
-           }
+         ip4_mtu_check (p0, vlib_buffer_length_in_chain (vm, p0),
+                        adj0[0].rewrite_header.max_l3_packet_bytes,
+                        ip0->flags_and_fragment_offset &
+                        clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
+                        &next0, &error0);
+
          if (is_mcast)
            {
              error0 = ((adj0[0].rewrite_header.sw_if_index ==