VPP-1283: IPv4 PMTU missing MTU value in ICMP4 message.
[vpp.git] / src / vnet / ip / ip4_forward.c
index ae45106..40fe9dc 100644 (file)
@@ -545,7 +545,7 @@ ip4_add_del_interface_address_internal (vlib_main_t * vm,
                     vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
   vec_add1 (addr_fib, ip4_af);
 
-  /* FIXME-LATER
+  /*
    * there is no support for adj-fib handling in the presence of overlapping
    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
    * most routers do.
@@ -554,31 +554,44 @@ ip4_add_del_interface_address_internal (vlib_main_t * vm,
   if (!is_del)
     {
       /* When adding an address check that it does not conflict
-         with an existing address. */
+         with an existing address on any interface in this table. */
       ip_interface_address_t *ia;
-      foreach_ip_interface_address
-        (&im->lookup_main, ia, sw_if_index,
-         0 /* honor unnumbered */ ,
-         ({
-           ip4_address_t * x =
-             ip_interface_address_get_address
-             (&im->lookup_main, ia);
-           if (ip4_destination_matches_route
-               (im, address, x, ia->address_length) ||
-               ip4_destination_matches_route (im,
-                                              x,
-                                              address,
-                                              address_length))
-             return
-               clib_error_create
-               ("failed to add %U which conflicts with %U for interface %U",
-                format_ip4_address_and_length, address,
-                address_length,
-                format_ip4_address_and_length, x,
-                ia->address_length,
-                format_vnet_sw_if_index_name, vnm,
-                sw_if_index);
-         }));
+      vnet_sw_interface_t *sif;
+
+      pool_foreach(sif, vnm->interface_main.sw_interfaces,
+      ({
+          if (im->fib_index_by_sw_if_index[sw_if_index] ==
+              im->fib_index_by_sw_if_index[sif->sw_if_index])
+            {
+              foreach_ip_interface_address
+                (&im->lookup_main, ia, sif->sw_if_index,
+                 0 /* honor unnumbered */ ,
+                 ({
+                   ip4_address_t * x =
+                     ip_interface_address_get_address
+                     (&im->lookup_main, ia);
+                   if (ip4_destination_matches_route
+                       (im, address, x, ia->address_length) ||
+                       ip4_destination_matches_route (im,
+                                                      x,
+                                                      address,
+                                                      address_length))
+                     {
+                       vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
+
+                       return
+                         clib_error_create
+                         ("failed to add %U which conflicts with %U for interface %U",
+                          format_ip4_address_and_length, address,
+                          address_length,
+                          format_ip4_address_and_length, x,
+                          ia->address_length,
+                          format_vnet_sw_if_index_name, vnm,
+                          sif->sw_if_index);
+                     }
+                 }));
+            }
+      }));
     }
   /* *INDENT-ON* */
 
@@ -793,8 +806,9 @@ ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
       ip4_address_t *address;
       vlib_main_t *vm = vlib_get_main ();
 
+      vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
       /* *INDENT-OFF* */
-      foreach_ip_interface_address (lm4, ia, sw_if_index, 1 /* honor unnumbered */,
+      foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
       ({
         address = ip_interface_address_get_address (lm4, ia);
         ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
@@ -1216,14 +1230,8 @@ ip4_local_inline (vlib_main_t * vm,
          sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
          sw_if_index1 = vnet_buffer (p1)->sw_if_index[VLIB_RX];
 
-         /* Treat IP frag packets as "experimental" protocol for now
-            until support of IP frag reassembly is implemented */
-         proto0 =
-           ip4_is_fragment (ip0) ? IP_PROTOCOL_VPP_FRAGMENTATION :
-           ip0->protocol;
-         proto1 =
-           ip4_is_fragment (ip1) ? IP_PROTOCOL_VPP_FRAGMENTATION :
-           ip1->protocol;
+         proto0 = ip0->protocol;
+         proto1 = ip1->protocol;
 
          if (head_of_feature_arc == 0)
            goto skip_checks;
@@ -1386,11 +1394,7 @@ ip4_local_inline (vlib_main_t * vm,
          vnet_buffer (p0)->l3_hdr_offset = p0->current_data;
          sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
 
-         /* Treat IP frag packets as "experimental" protocol for now
-            until support of IP frag reassembly is implemented */
-         proto0 =
-           ip4_is_fragment (ip0) ? IP_PROTOCOL_VPP_FRAGMENTATION :
-           ip0->protocol;
+         proto0 = ip0->protocol;
 
          if (head_of_feature_arc == 0 || p0->flags & VNET_BUFFER_F_IS_NATED)
            goto skip_check;
@@ -1481,7 +1485,6 @@ VLIB_REGISTER_NODE (ip4_local_node) =
     [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
-    [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-reassembly",
   },
 };
 /* *INDENT-ON* */
@@ -1923,7 +1926,8 @@ ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
   if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE)
     {
       adj_unlock (ai);
-      ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP4, sw_if_index, &nh);
+      ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP4,
+                                 VNET_LINK_IP4, sw_if_index, &nh);
       adj = adj_get (ai);
     }
 
@@ -1949,6 +1953,39 @@ typedef enum
   IP4_REWRITE_NEXT_ICMP_ERROR,
 } ip4_rewrite_next_t;
 
+/**
+ * This bits of an IPv4 address to mask to construct a multicast
+ * MAC address
+ */
+#if CLIB_ARCH_IS_BIG_ENDIAN
+#define IP4_MCAST_ADDR_MASK 0x007fffff
+#else
+#define IP4_MCAST_ADDR_MASK 0xffff7f00
+#endif
+
+always_inline void
+ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
+              u16 adj_packet_bytes, bool df, u32 * next, u32 * error)
+{
+  if (packet_len > adj_packet_bytes)
+    {
+      *error = IP4_ERROR_MTU_EXCEEDED;
+      if (df)
+       {
+         icmp4_error_set_vnet_buffer
+           (b, ICMP4_destination_unreachable,
+            ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
+            adj_packet_bytes);
+         *next = IP4_REWRITE_NEXT_ICMP_ERROR;
+       }
+      else
+       {
+         /* Add support for fragmentation here */
+         *next = IP4_REWRITE_NEXT_DROP;
+       }
+    }
+}
+
 always_inline uword
 ip4_rewrite_inline (vlib_main_t * vm,
                    vlib_node_runtime_t * node,
@@ -2109,16 +2146,16 @@ ip4_rewrite_inline (vlib_main_t * vm,
          vnet_buffer (p1)->ip.save_rewrite_length = rw_len1;
 
          /* Check MTU of outgoing interface. */
-         error0 =
-           (vlib_buffer_length_in_chain (vm, p0) >
-            adj0[0].
-            rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED :
-            error0);
-         error1 =
-           (vlib_buffer_length_in_chain (vm, p1) >
-            adj1[0].
-            rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED :
-            error1);
+         ip4_mtu_check (p0, clib_net_to_host_u16 (ip0->length),
+                        adj0[0].rewrite_header.max_l3_packet_bytes,
+                        ip0->flags_and_fragment_offset &
+                        clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
+                        &next0, &error0);
+         ip4_mtu_check (p1, clib_net_to_host_u16 (ip1->length),
+                        adj1[0].rewrite_header.max_l3_packet_bytes,
+                        ip1->flags_and_fragment_offset &
+                        clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
+                        &next1, &error1);
 
          if (is_mcast)
            {
@@ -2196,8 +2233,16 @@ ip4_rewrite_inline (vlib_main_t * vm,
              /*
               * copy bytes from the IP address into the MAC rewrite
               */
-             vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0);
-             vnet_fixup_one_header (adj1[0], &ip1->dst_address, ip1);
+             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
+                                         adj0->
+                                         rewrite_header.dst_mcast_offset,
+                                         &ip0->dst_address.as_u32,
+                                         (u8 *) ip0);
+             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
+                                         adj0->
+                                         rewrite_header.dst_mcast_offset,
+                                         &ip1->dst_address.as_u32,
+                                         (u8 *) ip1);
            }
 
          vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
@@ -2276,7 +2321,11 @@ ip4_rewrite_inline (vlib_main_t * vm,
              /*
               * copy bytes from the IP address into the MAC rewrite
               */
-             vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0);
+             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
+                                         adj0->
+                                         rewrite_header.dst_mcast_offset,
+                                         &ip0->dst_address.as_u32,
+                                         (u8 *) ip0);
            }
 
          /* Update packet buffer attributes/set output interface. */
@@ -2290,9 +2339,12 @@ ip4_rewrite_inline (vlib_main_t * vm,
               vlib_buffer_length_in_chain (vm, p0) + rw_len0);
 
          /* Check MTU of outgoing interface. */
-         error0 = (vlib_buffer_length_in_chain (vm, p0)
-                   > adj0[0].rewrite_header.max_l3_packet_bytes
-                   ? IP4_ERROR_MTU_EXCEEDED : error0);
+         ip4_mtu_check (p0, clib_net_to_host_u16 (ip0->length),
+                        adj0[0].rewrite_header.max_l3_packet_bytes,
+                        ip0->flags_and_fragment_offset &
+                        clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
+                        &next0, &error0);
+
          if (is_mcast)
            {
              error0 = ((adj0[0].rewrite_header.sw_if_index ==