fib: constify the adjacency in the rewrite nodes
[vpp.git] / src / vnet / ip / ip4_forward.c
index 060787f..a67b061 100644 (file)
@@ -736,9 +736,11 @@ ip4_add_del_interface_address_internal (vlib_main_t * vm,
 
                        return
                          clib_error_create
-                         ("failed to add %U which conflicts with %U for interface %U",
+                         ("failed to add %U on %U which conflicts with %U for interface %U",
                           format_ip4_address_and_length, address,
                           address_length,
+                         format_vnet_sw_if_index_name, vnm,
+                         sw_if_index,
                           format_ip4_address_and_length, x,
                           ia->address_length,
                           format_vnet_sw_if_index_name, vnm,
@@ -1201,7 +1203,7 @@ format_ip4_rewrite_trace (u8 * s, va_list * args)
   s = format (s, "\n%U%U",
              format_white_space, indent,
              format_ip_adjacency_packet_data,
-             t->dpo_index, t->packet_data, sizeof (t->packet_data));
+             t->packet_data, sizeof (t->packet_data));
   return s;
 }
 
@@ -1301,9 +1303,6 @@ ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
 {
   ip_csum_t sum0;
   u32 ip_header_length, payload_length_host_byte_order;
-  u32 n_this_buffer, n_bytes_left, n_ip_bytes_this_buffer;
-  u16 sum16;
-  void *data_this_buffer;
 
   /* Initialize checksum with ip header. */
   ip_header_length = ip4_header_bytes (ip0);
@@ -1326,34 +1325,9 @@ ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
     sum0 =
       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
 
-  n_bytes_left = n_this_buffer = payload_length_host_byte_order;
-  data_this_buffer = (void *) ip0 + ip_header_length;
-  n_ip_bytes_this_buffer =
-    p0->current_length - (((u8 *) ip0 - p0->data) - p0->current_data);
-  if (n_this_buffer + ip_header_length > n_ip_bytes_this_buffer)
-    {
-      n_this_buffer = n_ip_bytes_this_buffer > ip_header_length ?
-       n_ip_bytes_this_buffer - ip_header_length : 0;
-    }
-  while (1)
-    {
-      sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
-      n_bytes_left -= n_this_buffer;
-      if (n_bytes_left == 0)
-       break;
-
-      ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
-      if (!(p0->flags & VLIB_BUFFER_NEXT_PRESENT))
-       return 0xfefe;
-
-      p0 = vlib_get_buffer (vm, p0->next_buffer);
-      data_this_buffer = vlib_buffer_get_current (p0);
-      n_this_buffer = clib_min (p0->current_length, n_bytes_left);
-    }
-
-  sum16 = ~ip_csum_fold (sum0);
-
-  return sum16;
+  return ip_calculate_l4_checksum (vm, p0, sum0,
+                                  payload_length_host_byte_order, (u8 *) ip0,
+                                  ip_header_length, NULL);
 }
 
 u32
@@ -1857,7 +1831,7 @@ VLIB_REGISTER_NODE (ip4_local_node) =
     [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
-    [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-reassembly",
+    [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-full-reassembly",
   },
 };
 /* *INDENT-ON* */
@@ -2319,7 +2293,8 @@ typedef enum
 
 always_inline void
 ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
-              u16 adj_packet_bytes, bool df, u16 * next, u32 * error)
+              u16 adj_packet_bytes, bool df, u16 * next,
+              u8 is_midchain, u32 * error)
 {
   if (packet_len > adj_packet_bytes)
     {
@@ -2336,12 +2311,39 @@ ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
        {
          /* IP fragmentation */
          ip_frag_set_vnet_buffer (b, adj_packet_bytes,
-                                  IP4_FRAG_NEXT_IP4_REWRITE, 0);
+                                  (is_midchain ?
+                                   IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN :
+                                   IP_FRAG_NEXT_IP_REWRITE), 0);
          *next = IP4_REWRITE_NEXT_FRAGMENT;
        }
     }
 }
 
+/* increment TTL & update checksum.
+   Works either endian, so no need for byte swap. */
+static_always_inline void
+ip4_ttl_inc (vlib_buffer_t * b, ip4_header_t * ip)
+{
+  i32 ttl;
+  u32 checksum;
+  if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
+    {
+      b->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
+      return;
+    }
+
+  ttl = ip->ttl;
+
+  checksum = ip->checksum - clib_host_to_net_u16 (0x0100);
+  checksum += checksum >= 0xffff;
+
+  ip->checksum = checksum;
+  ttl += 1;
+  ip->ttl = ttl;
+
+  ASSERT (ip->checksum == ip4_header_checksum (ip));
+}
+
 /* Decrement TTL & update checksum.
    Works either endian, so no need for byte swap. */
 static_always_inline void
@@ -2409,6 +2411,7 @@ ip4_rewrite_inline_with_gso (vlib_main_t * vm,
   vlib_get_buffers (vm, from, bufs, n_left_from);
   clib_memset_u16 (nexts, IP4_REWRITE_NEXT_DROP, n_left_from);
 
+#if (CLIB_N_PREFETCHES >= 8)
   if (n_left_from >= 6)
     {
       int i;
@@ -2420,7 +2423,7 @@ ip4_rewrite_inline_with_gso (vlib_main_t * vm,
   b = bufs;
   while (n_left_from >= 8)
     {
-      ip_adjacency_t *adj0, *adj1;
+      const ip_adjacency_t *adj0, *adj1;
       ip4_header_t *ip0, *ip1;
       u32 rw_len0, error0, adj_index0;
       u32 rw_len1, error1, adj_index1;
@@ -2483,12 +2486,12 @@ ip4_rewrite_inline_with_gso (vlib_main_t * vm,
                     adj0[0].rewrite_header.max_l3_packet_bytes,
                     ip0->flags_and_fragment_offset &
                     clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
-                    next + 0, &error0);
+                    next + 0, is_midchain, &error0);
       ip4_mtu_check (b[1], ip1_len,
                     adj1[0].rewrite_header.max_l3_packet_bytes,
                     ip1->flags_and_fragment_offset &
                     clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
-                    next + 1, &error1);
+                    next + 1, is_midchain, &error1);
 
       if (is_mcast)
        {
@@ -2505,8 +2508,8 @@ ip4_rewrite_inline_with_gso (vlib_main_t * vm,
       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
        {
          u32 next_index = adj0[0].rewrite_header.next_index;
-         b[0]->current_data -= rw_len0;
-         b[0]->current_length += rw_len0;
+         vlib_buffer_advance (b[0], -(word) rw_len0);
+
          tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
          vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
 
@@ -2515,16 +2518,19 @@ ip4_rewrite_inline_with_gso (vlib_main_t * vm,
            vnet_feature_arc_start (lm->output_feature_arc_index,
                                    tx_sw_if_index0, &next_index, b[0]);
          next[0] = next_index;
+         if (is_midchain)
+           calc_checksums (vm, b[0]);
        }
       else
        {
          b[0]->error = error_node->errors[error0];
+         if (error0 == IP4_ERROR_MTU_EXCEEDED)
+           ip4_ttl_inc (b[0], ip0);
        }
       if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
        {
          u32 next_index = adj1[0].rewrite_header.next_index;
-         b[1]->current_data -= rw_len1;
-         b[1]->current_length += rw_len1;
+         vlib_buffer_advance (b[1], -(word) rw_len1);
 
          tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
          vnet_buffer (b[1])->sw_if_index[VLIB_TX] = tx_sw_if_index1;
@@ -2534,63 +2540,188 @@ ip4_rewrite_inline_with_gso (vlib_main_t * vm,
            vnet_feature_arc_start (lm->output_feature_arc_index,
                                    tx_sw_if_index1, &next_index, b[1]);
          next[1] = next_index;
+         if (is_midchain)
+           calc_checksums (vm, b[1]);
        }
       else
        {
          b[1]->error = error_node->errors[error1];
+         if (error1 == IP4_ERROR_MTU_EXCEEDED)
+           ip4_ttl_inc (b[1], ip1);
        }
-      if (is_midchain)
-       {
-         calc_checksums (vm, b[0]);
-         calc_checksums (vm, b[1]);
-       }
+
       /* Guess we are only writing on simple Ethernet header. */
       vnet_rewrite_two_headers (adj0[0], adj1[0],
                                ip0, ip1, sizeof (ethernet_header_t));
 
-      /*
-       * Bump the per-adjacency counters
-       */
       if (do_counters)
        {
-         vlib_increment_combined_counter
-           (&adjacency_counters,
-            thread_index,
-            adj_index0, 1, vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
-
-         vlib_increment_combined_counter
-           (&adjacency_counters,
-            thread_index,
-            adj_index1, 1, vlib_buffer_length_in_chain (vm, b[1]) + rw_len1);
+         if (error0 == IP4_ERROR_NONE)
+           vlib_increment_combined_counter
+             (&adjacency_counters,
+              thread_index,
+              adj_index0, 1,
+              vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
+
+         if (error1 == IP4_ERROR_NONE)
+           vlib_increment_combined_counter
+             (&adjacency_counters,
+              thread_index,
+              adj_index1, 1,
+              vlib_buffer_length_in_chain (vm, b[1]) + rw_len1);
        }
 
       if (is_midchain)
        {
-         if (adj0->sub_type.midchain.fixup_func)
+         if (error0 == IP4_ERROR_NONE && adj0->sub_type.midchain.fixup_func)
            adj0->sub_type.midchain.fixup_func
              (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
-         if (adj1->sub_type.midchain.fixup_func)
+         if (error1 == IP4_ERROR_NONE && adj1->sub_type.midchain.fixup_func)
            adj1->sub_type.midchain.fixup_func
              (vm, adj1, b[1], adj1->sub_type.midchain.fixup_data);
        }
 
       if (is_mcast)
        {
-         /*
-          * copy bytes from the IP address into the MAC rewrite
-          */
-         vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
-                                     adj0->rewrite_header.dst_mcast_offset,
-                                     &ip0->dst_address.as_u32, (u8 *) ip0);
-         vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
-                                     adj1->rewrite_header.dst_mcast_offset,
-                                     &ip1->dst_address.as_u32, (u8 *) ip1);
+         /* copy bytes from the IP address into the MAC rewrite */
+         if (error0 == IP4_ERROR_NONE)
+           vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
+                                       adj0->rewrite_header.dst_mcast_offset,
+                                       &ip0->dst_address.as_u32, (u8 *) ip0);
+         if (error1 == IP4_ERROR_NONE)
+           vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
+                                       adj1->rewrite_header.dst_mcast_offset,
+                                       &ip1->dst_address.as_u32, (u8 *) ip1);
        }
 
       next += 2;
       b += 2;
       n_left_from -= 2;
     }
+#elif (CLIB_N_PREFETCHES >= 4)
+  next = nexts;
+  b = bufs;
+  while (n_left_from >= 1)
+    {
+      ip_adjacency_t *adj0;
+      ip4_header_t *ip0;
+      u32 rw_len0, error0, adj_index0;
+      u32 tx_sw_if_index0;
+      u8 *p;
+
+      /* Prefetch next iteration */
+      if (PREDICT_TRUE (n_left_from >= 4))
+       {
+         ip_adjacency_t *adj2;
+         u32 adj_index2;
+
+         vlib_prefetch_buffer_header (b[3], LOAD);
+         vlib_prefetch_buffer_data (b[2], LOAD);
+
+         /* Prefetch adj->rewrite_header */
+         adj_index2 = vnet_buffer (b[2])->ip.adj_index[VLIB_TX];
+         adj2 = adj_get (adj_index2);
+         p = (u8 *) adj2;
+         CLIB_PREFETCH (p + CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES,
+                        LOAD);
+       }
+
+      adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
+
+      /*
+       * Prefetch the per-adjacency counters
+       */
+      if (do_counters)
+       {
+         vlib_prefetch_combined_counter (&adjacency_counters,
+                                         thread_index, adj_index0);
+       }
+
+      ip0 = vlib_buffer_get_current (b[0]);
+
+      error0 = IP4_ERROR_NONE;
+
+      ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
+
+      /* Rewrite packet header and updates lengths. */
+      adj0 = adj_get (adj_index0);
+
+      /* Rewrite header was prefetched. */
+      rw_len0 = adj0[0].rewrite_header.data_bytes;
+      vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
+
+      /* Check MTU of outgoing interface. */
+      u16 ip0_len = clib_net_to_host_u16 (ip0->length);
+
+      if (do_gso && (b[0]->flags & VNET_BUFFER_F_GSO))
+       ip0_len = gso_mtu_sz (b[0]);
+
+      ip4_mtu_check (b[0], ip0_len,
+                    adj0[0].rewrite_header.max_l3_packet_bytes,
+                    ip0->flags_and_fragment_offset &
+                    clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
+                    next + 0, is_midchain, &error0);
+
+      if (is_mcast)
+       {
+         error0 = ((adj0[0].rewrite_header.sw_if_index ==
+                    vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
+                   IP4_ERROR_SAME_INTERFACE : error0);
+       }
+
+      /* Don't adjust the buffer for ttl issue; icmp-error node wants
+       * to see the IP header */
+      if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
+       {
+         u32 next_index = adj0[0].rewrite_header.next_index;
+         vlib_buffer_advance (b[0], -(word) rw_len0);
+         tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
+         vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
+
+         if (PREDICT_FALSE
+             (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
+           vnet_feature_arc_start (lm->output_feature_arc_index,
+                                   tx_sw_if_index0, &next_index, b[0]);
+         next[0] = next_index;
+
+         if (is_midchain)
+           calc_checksums (vm, b[0]);
+
+         /* Guess we are only writing on simple Ethernet header. */
+         vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
+
+         /*
+          * Bump the per-adjacency counters
+          */
+         if (do_counters)
+           vlib_increment_combined_counter
+             (&adjacency_counters,
+              thread_index,
+              adj_index0, 1, vlib_buffer_length_in_chain (vm,
+                                                          b[0]) + rw_len0);
+
+         if (is_midchain && adj0->sub_type.midchain.fixup_func)
+           adj0->sub_type.midchain.fixup_func
+             (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
+
+         if (is_mcast)
+           /* copy bytes from the IP address into the MAC rewrite */
+           vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
+                                       adj0->rewrite_header.dst_mcast_offset,
+                                       &ip0->dst_address.as_u32, (u8 *) ip0);
+       }
+      else
+       {
+         b[0]->error = error_node->errors[error0];
+         if (error0 == IP4_ERROR_MTU_EXCEEDED)
+           ip4_ttl_inc (b[0], ip0);
+       }
+
+      next += 1;
+      b += 1;
+      n_left_from -= 1;
+    }
+#endif
 
   while (n_left_from > 0)
     {
@@ -2627,7 +2758,7 @@ ip4_rewrite_inline_with_gso (vlib_main_t * vm,
                     adj0[0].rewrite_header.max_l3_packet_bytes,
                     ip0->flags_and_fragment_offset &
                     clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
-                    next + 0, &error0);
+                    next + 0, is_midchain, &error0);
 
       if (is_mcast)
        {
@@ -2641,8 +2772,7 @@ ip4_rewrite_inline_with_gso (vlib_main_t * vm,
       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
        {
          u32 next_index = adj0[0].rewrite_header.next_index;
-         b[0]->current_data -= rw_len0;
-         b[0]->current_length += rw_len0;
+         vlib_buffer_advance (b[0], -(word) rw_len0);
          tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
          vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
 
@@ -2651,39 +2781,36 @@ ip4_rewrite_inline_with_gso (vlib_main_t * vm,
            vnet_feature_arc_start (lm->output_feature_arc_index,
                                    tx_sw_if_index0, &next_index, b[0]);
          next[0] = next_index;
-       }
-      else
-       {
-         b[0]->error = error_node->errors[error0];
-       }
-      if (is_midchain)
-       {
-         calc_checksums (vm, b[0]);
-       }
-      /* Guess we are only writing on simple Ethernet header. */
-      vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
 
-      if (do_counters)
-       vlib_increment_combined_counter
-         (&adjacency_counters,
-          thread_index, adj_index0, 1,
-          vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
+         if (is_midchain)
+           /* this acts on the packet that is about to be encapped */
+           calc_checksums (vm, b[0]);
 
-      if (is_midchain)
-       {
-         if (adj0->sub_type.midchain.fixup_func)
+         /* Guess we are only writing on simple Ethernet header. */
+         vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
+
+         if (do_counters)
+           vlib_increment_combined_counter
+             (&adjacency_counters,
+              thread_index, adj_index0, 1,
+              vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
+
+         if (is_midchain && adj0->sub_type.midchain.fixup_func)
            adj0->sub_type.midchain.fixup_func
              (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
-       }
 
-      if (is_mcast)
+         if (is_mcast)
+           /* copy bytes from the IP address into the MAC rewrite */
+           vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
+                                       adj0->rewrite_header.dst_mcast_offset,
+                                       &ip0->dst_address.as_u32, (u8 *) ip0);
+       }
+      else
        {
-         /*
-          * copy bytes from the IP address into the MAC rewrite
-          */
-         vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
-                                     adj0->rewrite_header.dst_mcast_offset,
-                                     &ip0->dst_address.as_u32, (u8 *) ip0);
+         b[0]->error = error_node->errors[error0];
+         /* undo the TTL decrement - we'll be back to do it again */
+         if (error0 == IP4_ERROR_MTU_EXCEEDED)
+           ip4_ttl_inc (b[0], ip0);
        }
 
       next += 1;
@@ -2841,8 +2968,8 @@ VLIB_REGISTER_NODE (ip4_mcast_midchain_node) = {
 VLIB_REGISTER_NODE (ip4_midchain_node) = {
   .name = "ip4-midchain",
   .vector_size = sizeof (u32),
-  .format_trace = format_ip4_forward_next_trace,
-  .sibling_of =  "ip4-rewrite",
+  .format_trace = format_ip4_rewrite_trace,
+  .sibling_of = "ip4-rewrite",
 };
 /* *INDENT-ON */