Locally generated packet go through lookup/load-balance - locally-generated is an...
authorNeale Ranns <[email protected]>
Tue, 29 Nov 2016 14:51:37 +0000 (06:51 -0800)
committerDamjan Marion <[email protected]>
Mon, 5 Dec 2016 09:20:26 +0000 (09:20 +0000)
Add a flag to the vnet_buffer to indicate a packet is locally originated. Then in the rewrite nodes we can check this flag and not perform the TTL decrement. The switch path cost is expected to be fractions of a clock - the flags will be hot in the cache.
The cehcks are necessary to due the requirements that VPP must be able to emit an IP packet with TTL=255.

Change-Id: Ieb9cf06e34df54fd5c950293de8b665016295c51
Signed-off-by: Neale Ranns <[email protected]>
12 files changed:
vnet/vnet/adj/adj_nbr.c
vnet/vnet/buffer.h
vnet/vnet/fib/fib_types.c
vnet/vnet/ip/icmp4.c
vnet/vnet/ip/ip4_forward.c
vnet/vnet/ip/ip4_source_and_port_range_check.c
vnet/vnet/ip/ip6_forward.c
vnet/vnet/ip/ip6_neighbor.c
vnet/vnet/ip/lookup.h
vnet/vnet/ip/ping.c
vnet/vnet/ip/ping.h
vnet/vnet/map/ip6_map.c

index 003e18e..95d1254 100644 (file)
@@ -979,7 +979,7 @@ const static dpo_vft_t adj_nbr_incompl_dpo_vft = {
  */
 const static char* const nbr_ip4_nodes[] =
 {
-    "ip4-rewrite-transit",
+    "ip4-rewrite",
     NULL,
 };
 const static char* const nbr_ip6_nodes[] =
index b3c71c1..6da6993 100644 (file)
@@ -67,8 +67,8 @@
 #define LOG2_BUFFER_HANDOFF_NEXT_VALID LOG2_VLIB_BUFFER_FLAG_USER(6)
 #define BUFFER_HANDOFF_NEXT_VALID (1 << LOG2_BUFFER_HANDOFF_NEXT_VALID)
 
-#define LOG2_VNET_BUFFER_RTE_MBUF_IS_VALID LOG2_VLIB_BUFFER_FLAG_USER(7)
-#define VNET_BUFFER_RTE_MBUF_IS_VALID (1 << LOG2_VNET_BUFFER_RTE_MBUF_IS_VALID)
+#define LOG2_VNET_BUFFER_LOCALLY_ORIGINATED LOG2_VLIB_BUFFER_FLAG_USER(7)
+#define VNET_BUFFER_LOCALLY_ORIGINATED (1 << LOG2_VNET_BUFFER_LOCALLY_ORIGINATED)
 
 #define foreach_buffer_opaque_union_subtype     \
 _(ethernet)                                     \
index d25a773..b66e719 100644 (file)
@@ -194,9 +194,17 @@ fib_route_path_cmp (const fib_route_path_t *rpath1,
 
     if (0 != res) return (res);
 
-    res = vnet_sw_interface_compare(vnet_get_main(),
-                                   rpath1->frp_sw_if_index,
-                                   rpath2->frp_sw_if_index);
+    if (~0 != rpath1->frp_sw_if_index &&
+        ~0 != rpath2->frp_sw_if_index)
+    {
+        res = vnet_sw_interface_compare(vnet_get_main(),
+                                        rpath1->frp_sw_if_index,
+                                        rpath2->frp_sw_if_index);
+    }
+    else
+    {
+        res = rpath1->frp_sw_if_index - rpath2->frp_sw_if_index;
+    }
 
     if (0 != res) return (res);
 
index c160f88..b1834ac 100644 (file)
@@ -328,6 +328,9 @@ ip4_icmp_echo_request (vlib_main_t * vm,
 
          ASSERT (ip0->checksum == ip4_header_checksum (ip0));
          ASSERT (ip1->checksum == ip4_header_checksum (ip1));
+
+          p0->flags |= VNET_BUFFER_LOCALLY_ORIGINATED;
+          p1->flags |= VNET_BUFFER_LOCALLY_ORIGINATED;
        }
   
       while (n_left_from > 0 && n_left_to_next > 0)
@@ -380,6 +383,8 @@ ip4_icmp_echo_request (vlib_main_t * vm,
          ip0->checksum = ip_csum_fold (sum0);
 
          ASSERT (ip0->checksum == ip4_header_checksum (ip0));
+
+          p0->flags |= VNET_BUFFER_LOCALLY_ORIGINATED;
        }
   
       vlib_put_next_frame (vm, node, next, n_left_to_next);
@@ -402,7 +407,7 @@ VLIB_REGISTER_NODE (ip4_icmp_echo_request_node,static) = {
 
   .n_next_nodes = 1,
   .next_nodes = {
-    [0] = "ip4-rewrite-local",
+    [0] = "ip4-load-balance",
   },
 };
 
index 2a6791e..fc7b349 100644 (file)
@@ -1025,7 +1025,7 @@ VNET_FEATURE_INIT (ip4_mc_drop, static) = {
 VNET_FEATURE_ARC_INIT (ip4_output, static) =
 {
   .arc_name  = "ip4-output",
-  .start_nodes = VNET_FEATURES ("ip4-rewrite-transit", "ip4-midchain"),
+  .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain"),
   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
 };
 
@@ -1749,8 +1749,7 @@ ip4_local (vlib_main_t * vm,
          dpo0 = load_balance_get_bucket_i(lb0, 0);
 
          vnet_buffer (p0)->ip.adj_index[VLIB_TX] =
-             vnet_buffer (p0)->ip.adj_index[VLIB_RX] =
-                 dpo0->dpoi_index;
+             vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0;
 
           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
                     dpo0->dpoi_type == DPO_RECEIVE) ?
@@ -2186,7 +2185,6 @@ ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
 
 typedef enum {
   IP4_REWRITE_NEXT_DROP,
-  IP4_REWRITE_NEXT_ARP,
   IP4_REWRITE_NEXT_ICMP_ERROR,
 } ip4_rewrite_next_t;
 
@@ -2194,14 +2192,12 @@ always_inline uword
 ip4_rewrite_inline (vlib_main_t * vm,
                    vlib_node_runtime_t * node,
                    vlib_frame_t * frame,
-                   int rewrite_for_locally_received_packets,
                    int is_midchain)
 {
   ip_lookup_main_t * lm = &ip4_main.lookup_main;
   u32 * from = vlib_frame_vector_args (frame);
   u32 n_left_from, n_left_to_next, * to_next, next_index;
   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
-  vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX;
 
   n_left_from = frame->n_vectors;
   next_index = node->cached_next_index;
@@ -2218,12 +2214,8 @@ ip4_rewrite_inline (vlib_main_t * vm,
          ip4_header_t * ip0, * ip1;
          u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
          u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
-          u32 next0_override, next1_override;
           u32 tx_sw_if_index0, tx_sw_if_index1;
 
-          if (rewrite_for_locally_received_packets)
-              next0_override = next1_override = 0;
-
          /* Prefetch next iteration. */
          {
             vlib_buffer_t * p2, * p3;
@@ -2249,8 +2241,8 @@ ip4_rewrite_inline (vlib_main_t * vm,
          p0 = vlib_get_buffer (vm, pi0);
          p1 = vlib_get_buffer (vm, pi1);
 
-         adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
-         adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx];
+         adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+         adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
 
           /* We should never rewrite a pkt using the MISS adjacency */
           ASSERT(adj_index0 && adj_index1);
@@ -2263,28 +2255,19 @@ ip4_rewrite_inline (vlib_main_t * vm,
 
          /* Decrement TTL & update checksum.
             Works either endian, so no need for byte swap. */
-         if (! rewrite_for_locally_received_packets)
+         if (PREDICT_TRUE(!(p0->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
            {
-             i32 ttl0 = ip0->ttl, ttl1 = ip1->ttl;
+                i32 ttl0 = ip0->ttl;
 
              /* Input node should have reject packets with ttl 0. */
              ASSERT (ip0->ttl > 0);
-             ASSERT (ip1->ttl > 0);
 
              checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
-             checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
-
              checksum0 += checksum0 >= 0xffff;
-             checksum1 += checksum1 >= 0xffff;
 
              ip0->checksum = checksum0;
-             ip1->checksum = checksum1;
-
              ttl0 -= 1;
-             ttl1 -= 1;
-
              ip0->ttl = ttl0;
-             ip1->ttl = ttl1;
 
               /*
                * If the ttl drops below 1 when forwarding, generate
@@ -2298,6 +2281,32 @@ ip4_rewrite_inline (vlib_main_t * vm,
                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
                 }
+
+             /* Verify checksum. */
+             ASSERT (ip0->checksum == ip4_header_checksum (ip0));
+           }
+          else
+            {
+              p0->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
+            }
+         if (PREDICT_TRUE(!(p1->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
+           {
+             i32 ttl1 = ip1->ttl;
+
+             /* Input node should have reject packets with ttl 0. */
+             ASSERT (ip1->ttl > 0);
+
+             checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
+             checksum1 += checksum1 >= 0xffff;
+
+             ip1->checksum = checksum1;
+             ttl1 -= 1;
+             ip1->ttl = ttl1;
+
+              /*
+               * If the ttl drops below 1 when forwarding, generate
+               * an ICMP response.
+               */
               if (PREDICT_FALSE(ttl1 <= 0))
                 {
                   error1 = IP4_ERROR_TIME_EXPIRED;
@@ -2311,21 +2320,15 @@ ip4_rewrite_inline (vlib_main_t * vm,
              ASSERT (ip0->checksum == ip4_header_checksum (ip0));
              ASSERT (ip1->checksum == ip4_header_checksum (ip1));
            }
+          else
+            {
+              p1->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
+            }
 
          /* Rewrite packet header and updates lengths. */
          adj0 = ip_get_adjacency (lm, adj_index0);
          adj1 = ip_get_adjacency (lm, adj_index1);
 
-          if (rewrite_for_locally_received_packets)
-            {
-              if (PREDICT_FALSE(adj0->lookup_next_index
-                                == IP_LOOKUP_NEXT_ARP))
-                next0_override = IP4_REWRITE_NEXT_ARP;
-              if (PREDICT_FALSE(adj1->lookup_next_index
-                                == IP_LOOKUP_NEXT_ARP))
-                next1_override = IP4_REWRITE_NEXT_ARP;
-            }
-
           /* Worth pipelining. No guarantee that adj0,1 are hot... */
          rw_len0 = adj0[0].rewrite_header.data_bytes;
          rw_len1 = adj1[0].rewrite_header.data_bytes;
@@ -2343,15 +2346,9 @@ ip4_rewrite_inline (vlib_main_t * vm,
           next0 = (error0 == IP4_ERROR_NONE)
             ? adj0[0].rewrite_header.next_index : next0;
 
-          if (rewrite_for_locally_received_packets)
-              next0 = next0 && next0_override ? next0_override : next0;
-
           next1 = (error1 == IP4_ERROR_NONE)
             ? adj1[0].rewrite_header.next_index : next1;
 
-          if (rewrite_for_locally_received_packets)
-              next1 = next1 && next1_override ? next1_override : next1;
-
           /*
            * We've already accounted for an ethernet_header_t elsewhere
            */
@@ -2417,17 +2414,13 @@ ip4_rewrite_inline (vlib_main_t * vm,
          vlib_buffer_t * p0;
          ip4_header_t * ip0;
          u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
-          u32 next0_override;
           u32 tx_sw_if_index0;
 
-          if (rewrite_for_locally_received_packets)
-              next0_override = 0;
-
          pi0 = to_next[0] = from[0];
 
          p0 = vlib_get_buffer (vm, pi0);
 
-         adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
+         adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
 
           /* We should never rewrite a pkt using the MISS adjacency */
           ASSERT(adj_index0);
@@ -2440,7 +2433,7 @@ ip4_rewrite_inline (vlib_main_t * vm,
           next0 = IP4_REWRITE_NEXT_DROP;            /* drop on error */
 
          /* Decrement TTL & update checksum. */
-         if (! rewrite_for_locally_received_packets)
+         if (PREDICT_TRUE(!(p0->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
            {
              i32 ttl0 = ip0->ttl;
 
@@ -2471,16 +2464,9 @@ ip4_rewrite_inline (vlib_main_t * vm,
                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
                 }
            }
-
-          if (rewrite_for_locally_received_packets)
+          else
             {
-              /*
-               * We have to override the next_index in ARP adjacencies,
-               * because they're set up for ip4-arp, not this node...
-               */
-              if (PREDICT_FALSE(adj0->lookup_next_index
-                                == IP_LOOKUP_NEXT_ARP))
-                next0_override = IP4_REWRITE_NEXT_ARP;
+              p0->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
             }
 
          /* Guess we are only writing on simple Ethernet header. */
@@ -2527,9 +2513,6 @@ ip4_rewrite_inline (vlib_main_t * vm,
 
             }
 
-          if (rewrite_for_locally_received_packets)
-              next0 = next0 && next0_override ? next0_override : next0;
-
          from += 1;
          n_left_from -= 1;
          to_next += 1;
@@ -2545,14 +2528,14 @@ ip4_rewrite_inline (vlib_main_t * vm,
 
   /* Need to do trace after rewrites to pick up new packet data. */
   if (node->flags & VLIB_NODE_FLAG_TRACE)
-    ip4_forward_next_trace (vm, node, frame, adj_rx_tx);
+    ip4_forward_next_trace (vm, node, frame, VLIB_TX);
 
   return frame->n_vectors;
 }
 
 
-/** @brief IPv4 transit rewrite node.
-    @node ip4-rewrite-transit
+/** @brief IPv4 rewrite node.
+    @node ip4-rewrite
 
     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
     header checksum, fetch the ip adjacency, check the outbound mtu,
@@ -2583,54 +2566,11 @@ ip4_rewrite_inline (vlib_main_t * vm,
       or @c error-drop
 */
 static uword
-ip4_rewrite_transit (vlib_main_t * vm,
-                    vlib_node_runtime_t * node,
-                    vlib_frame_t * frame)
+ip4_rewrite (vlib_main_t * vm,
+             vlib_node_runtime_t * node,
+             vlib_frame_t * frame)
 {
-  return ip4_rewrite_inline (vm, node, frame,
-                            /* rewrite_for_locally_received_packets */ 0, 0);
-}
-
-/** @brief IPv4 local rewrite node.
-    @node ip4-rewrite-local
-
-    This is the IPv4 local rewrite node. Fetch the ip adjacency, check
-    the outbound interface mtu, apply the adjacency rewrite, and send
-    pkts to the adjacency rewrite header's rewrite_next_index. Deal
-    with hemorrhoids of the form "some clown sends an icmp4 w/ src =
-    dst = interface addr."
-
-    @param vm vlib_main_t corresponding to the current thread
-    @param node vlib_node_runtime_t
-    @param frame vlib_frame_t whose contents should be dispatched
-
-    @par Graph mechanics: buffer metadata, next index usage
-
-    @em Uses:
-    - <code>vnet_buffer(b)->ip.adj_index[VLIB_RX]</code>
-        - the rewrite adjacency index
-    - <code>adj->lookup_next_index</code>
-        - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
-          the packet will be dropped.
-    - <code>adj->rewrite_header</code>
-        - Rewrite string length, rewrite string, next_index
-
-    @em Sets:
-    - <code>b->current_data, b->current_length</code>
-        - Updated net of applying the rewrite string
-
-    <em>Next Indices:</em>
-    - <code> adj->rewrite_header.next_index </code>
-      or @c error-drop
-*/
-
-static uword
-ip4_rewrite_local (vlib_main_t * vm,
-                  vlib_node_runtime_t * node,
-                  vlib_frame_t * frame)
-{
-  return ip4_rewrite_inline (vm, node, frame,
-                            /* rewrite_for_locally_received_packets */ 1, 0);
+  return ip4_rewrite_inline (vm, node, frame, 0);
 }
 
 static uword
@@ -2638,26 +2578,25 @@ ip4_midchain (vlib_main_t * vm,
              vlib_node_runtime_t * node,
              vlib_frame_t * frame)
 {
-  return ip4_rewrite_inline (vm, node, frame,
-                            /* rewrite_for_locally_received_packets */ 0, 1);
+  return ip4_rewrite_inline (vm, node, frame, 1);
 }
 
+
 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
-  .function = ip4_rewrite_transit,
-  .name = "ip4-rewrite-transit",
+  .function = ip4_rewrite,
+  .name = "ip4-rewrite",
   .vector_size = sizeof (u32),
 
   .format_trace = format_ip4_rewrite_trace,
 
-  .n_next_nodes = 3,
+  .n_next_nodes = 2,
   .next_nodes = {
     [IP4_REWRITE_NEXT_DROP] = "error-drop",
-    [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
   },
 };
 
-VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite_transit)
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite)
 
 VLIB_REGISTER_NODE (ip4_midchain_node) = {
   .function = ip4_midchain,
@@ -2666,25 +2605,11 @@ VLIB_REGISTER_NODE (ip4_midchain_node) = {
 
   .format_trace = format_ip4_forward_next_trace,
 
-  .sibling_of = "ip4-rewrite-transit",
+  .sibling_of = "ip4-rewrite",
 };
 
 VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain)
 
-VLIB_REGISTER_NODE (ip4_rewrite_local_node) = {
-  .function = ip4_rewrite_local,
-  .name = "ip4-rewrite-local",
-  .vector_size = sizeof (u32),
-
-  .sibling_of = "ip4-rewrite-transit",
-
-  .format_trace = format_ip4_rewrite_trace,
-
-  .n_next_nodes = 0,
-};
-
-VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_local_node, ip4_rewrite_local)
-
 static clib_error_t *
 add_del_interface_table (vlib_main_t * vm,
                         unformat_input_t * input,
index 28dabeb..ae836a1 100644 (file)
@@ -784,8 +784,8 @@ set_ip_source_and_port_range_check_fn (vlib_main_t * vm,
  * Example of graph node after range checking is enabled:
  * @cliexstart{show vlib graph ip4-source-and-port-range-check-tx}
  *            Name                      Next                    Previous
- * ip4-source-and-port-range-      error-drop [0]           ip4-rewrite-local
- *                              interface-output [1]       ip4-rewrite-transit
+ * ip4-source-and-port-range-      error-drop [0]              ip4-rewrite
+ *                              interface-output [1]
  * @cliexend
  *
  * Example of how to display the features enabed on an interface:
index 1f40c42..a4ce65a 100644 (file)
@@ -1831,14 +1831,12 @@ always_inline uword
 ip6_rewrite_inline (vlib_main_t * vm,
                    vlib_node_runtime_t * node,
                    vlib_frame_t * frame,
-                   int rewrite_for_locally_received_packets,
                    int is_midchain)
 {
   ip_lookup_main_t * lm = &ip6_main.lookup_main;
   u32 * from = vlib_frame_vector_args (frame);
   u32 n_left_from, n_left_to_next, * to_next, next_index;
   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip6_input_node.index);
-  vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX;
 
   n_left_from = frame->n_vectors;
   next_index = node->cached_next_index;
@@ -1885,8 +1883,8 @@ ip6_rewrite_inline (vlib_main_t * vm,
          p0 = vlib_get_buffer (vm, pi0);
          p1 = vlib_get_buffer (vm, pi1);
 
-         adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
-         adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx];
+         adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+         adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
 
           /* We should never rewrite a pkt using the MISS adjacency */
           ASSERT(adj_index0 && adj_index1);
@@ -1897,19 +1895,16 @@ ip6_rewrite_inline (vlib_main_t * vm,
          error0 = error1 = IP6_ERROR_NONE;
           next0 = next1 = IP6_REWRITE_NEXT_DROP;
 
-         if (! rewrite_for_locally_received_packets)
+         if (PREDICT_TRUE(!(p0->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
            {
-             i32 hop_limit0 = ip0->hop_limit, hop_limit1 = ip1->hop_limit;
+                i32 hop_limit0 = ip0->hop_limit;
 
              /* Input node should have reject packets with hop limit 0. */
              ASSERT (ip0->hop_limit > 0);
-             ASSERT (ip1->hop_limit > 0);
 
              hop_limit0 -= 1;
-             hop_limit1 -= 1;
 
              ip0->hop_limit = hop_limit0;
-             ip1->hop_limit = hop_limit1;
 
               /*
                * If the hop count drops below 1 when forwarding, generate
@@ -1923,6 +1918,26 @@ ip6_rewrite_inline (vlib_main_t * vm,
                   icmp6_error_set_vnet_buffer(p0, ICMP6_time_exceeded,
                         ICMP6_time_exceeded_ttl_exceeded_in_transit, 0);
                 }
+           }
+          else
+            {
+              p0->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
+            }
+         if (PREDICT_TRUE(!(p1->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
+          {
+             i32 hop_limit1 = ip1->hop_limit;
+
+             /* Input node should have reject packets with hop limit 0. */
+             ASSERT (ip1->hop_limit > 0);
+
+             hop_limit1 -= 1;
+
+             ip1->hop_limit = hop_limit1;
+
+              /*
+               * If the hop count drops below 1 when forwarding, generate
+               * an ICMP response.
+               */
               if (PREDICT_FALSE(hop_limit1 <= 0))
                 {
                   error1 = IP6_ERROR_TIME_EXPIRED;
@@ -1931,8 +1946,11 @@ ip6_rewrite_inline (vlib_main_t * vm,
                   icmp6_error_set_vnet_buffer(p1, ICMP6_time_exceeded,
                         ICMP6_time_exceeded_ttl_exceeded_in_transit, 0);
                 }
-           }
-
+          }
+          else
+            {
+              p1->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
+            }
          adj0 = ip_get_adjacency (lm, adj_index0);
          adj1 = ip_get_adjacency (lm, adj_index1);
 
@@ -2018,7 +2036,7 @@ ip6_rewrite_inline (vlib_main_t * vm,
 
          p0 = vlib_get_buffer (vm, pi0);
 
-         adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
+         adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
 
           /* We should never rewrite a pkt using the MISS adjacency */
           ASSERT(adj_index0);
@@ -2031,7 +2049,7 @@ ip6_rewrite_inline (vlib_main_t * vm,
           next0 = IP6_REWRITE_NEXT_DROP;
 
          /* Check hop limit */
-         if (! rewrite_for_locally_received_packets)
+         if (PREDICT_TRUE(!(p0->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
            {
              i32 hop_limit0 = ip0->hop_limit;
 
@@ -2054,6 +2072,10 @@ ip6_rewrite_inline (vlib_main_t * vm,
                         ICMP6_time_exceeded_ttl_exceeded_in_transit, 0);
                 }
            }
+          else
+            {
+              p0->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
+            }
 
          /* Guess we are only writing on simple Ethernet header. */
          vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
@@ -2111,28 +2133,17 @@ ip6_rewrite_inline (vlib_main_t * vm,
 
   /* Need to do trace after rewrites to pick up new packet data. */
   if (node->flags & VLIB_NODE_FLAG_TRACE)
-    ip6_forward_next_trace (vm, node, frame, adj_rx_tx);
+    ip6_forward_next_trace (vm, node, frame, VLIB_TX);
 
   return frame->n_vectors;
 }
 
 static uword
-ip6_rewrite_transit (vlib_main_t * vm,
-                    vlib_node_runtime_t * node,
-                    vlib_frame_t * frame)
+ip6_rewrite (vlib_main_t * vm,
+             vlib_node_runtime_t * node,
+             vlib_frame_t * frame)
 {
   return ip6_rewrite_inline (vm, node, frame,
-                            /* rewrite_for_locally_received_packets */ 0,
-                            /* midchain */ 0);
-}
-
-static uword
-ip6_rewrite_local (vlib_main_t * vm,
-                  vlib_node_runtime_t * node,
-                  vlib_frame_t * frame)
-{
-  return ip6_rewrite_inline (vm, node, frame,
-                            /* rewrite_for_locally_received_packets */ 1,
                             /* midchain */ 0);
 }
 
@@ -2142,7 +2153,6 @@ ip6_midchain (vlib_main_t * vm,
              vlib_frame_t * frame)
 {
   return ip6_rewrite_inline (vm, node, frame,
-                            /* rewrite_for_locally_received_packets */ 0,
                             /* midchain */ 1);
 }
 
@@ -2159,7 +2169,7 @@ VLIB_REGISTER_NODE (ip6_midchain_node) = {
 VLIB_NODE_FUNCTION_MULTIARCH (ip6_midchain_node, ip6_midchain)
 
 VLIB_REGISTER_NODE (ip6_rewrite_node) = {
-  .function = ip6_rewrite_transit,
+  .function = ip6_rewrite,
   .name = "ip6-rewrite",
   .vector_size = sizeof (u32),
 
@@ -2172,21 +2182,7 @@ VLIB_REGISTER_NODE (ip6_rewrite_node) = {
   },
 };
 
-VLIB_NODE_FUNCTION_MULTIARCH (ip6_rewrite_node, ip6_rewrite_transit);
-
-VLIB_REGISTER_NODE (ip6_rewrite_local_node) = {
-  .function = ip6_rewrite_local,
-  .name = "ip6-rewrite-local",
-  .vector_size = sizeof (u32),
-
-  .sibling_of = "ip6-rewrite",
-
-  .format_trace = format_ip6_rewrite_trace,
-
-  .n_next_nodes = 0,
-};
-
-VLIB_NODE_FUNCTION_MULTIARCH (ip6_rewrite_local_node, ip6_rewrite_local);
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_rewrite_node, ip6_rewrite);
 
 /*
  * Hop-by-Hop handling
index a407978..5380950 100644 (file)
@@ -1469,9 +1469,10 @@ icmp6_router_solicitation(vlib_main_t * vm,
                                    : error0);
                               next0 = is_dropped ? 
                                   next0 : ICMP6_ROUTER_SOLICITATION_NEXT_REPLY_RW;
-                              vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0;
+                              vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
                            }
                         }
+                      p0->flags |= VNET_BUFFER_LOCALLY_ORIGINATED;
                      
                      radv_info->n_solicitations_dropped  += is_dropped;
                      radv_info->n_solicitations_rcvd  += is_solicitation;
@@ -2130,15 +2131,16 @@ ip6_neighbor_send_mldpv2_report(u32 sw_if_index)
 
   /* 
    * OK to override w/ no regard for actual FIB, because
-   * ip6-rewrite-local only looks at the adjacency.
+   * ip6-rewrite only looks at the adjacency.
    */
   vnet_buffer (b0)->sw_if_index[VLIB_RX] = 
     vnet_main.local_interface_sw_if_index;
   
-  vnet_buffer (b0)->ip.adj_index[VLIB_RX]  = 
+  vnet_buffer (b0)->ip.adj_index[VLIB_TX]  = 
     radv_info->all_mldv2_routers_adj_index;
+  b0->flags |= VNET_BUFFER_LOCALLY_ORIGINATED;
 
-  vlib_node_t * node = vlib_get_node_by_name (vm, (u8 *) "ip6-rewrite-local");
+  vlib_node_t * node = vlib_get_node_by_name (vm, (u8 *) "ip6-rewrite");
   
   f = vlib_get_frame_to_node (vm, node->index);
   to_next = vlib_frame_vector_args (f);
@@ -2160,7 +2162,7 @@ VLIB_REGISTER_NODE (ip6_icmp_router_solicitation_node,static) = {
   .n_next_nodes = ICMP6_ROUTER_SOLICITATION_N_NEXT,
   .next_nodes = {
     [ICMP6_ROUTER_SOLICITATION_NEXT_DROP] = "error-drop",
-    [ICMP6_ROUTER_SOLICITATION_NEXT_REPLY_RW] = "ip6-rewrite-local",
+    [ICMP6_ROUTER_SOLICITATION_NEXT_REPLY_RW] = "ip6-rewrite",
     [ICMP6_ROUTER_SOLICITATION_NEXT_REPLY_TX] = "interface-output",
   },
 };
index 7f9b984..a609e2f 100644 (file)
@@ -111,7 +111,7 @@ typedef enum {
     [IP_LOOKUP_NEXT_LOCAL] = "ip4-local",                      \
     [IP_LOOKUP_NEXT_ARP] = "ip4-arp",                          \
     [IP_LOOKUP_NEXT_GLEAN] = "ip4-glean",                      \
-    [IP_LOOKUP_NEXT_REWRITE] = "ip4-rewrite-transit",          \
+    [IP_LOOKUP_NEXT_REWRITE] = "ip4-rewrite",                  \
     [IP_LOOKUP_NEXT_MIDCHAIN] = "ip4-midchain",                        \
     [IP_LOOKUP_NEXT_LOAD_BALANCE] = "ip4-load-balance",                \
     [IP_LOOKUP_NEXT_ICMP_ERROR] = "ip4-icmp-error",            \
index 0bf83e9..08e7701 100644 (file)
@@ -256,81 +256,65 @@ init_icmp46_echo_request (icmp46_echo_request_t * icmp46_echo,
   return data_len;
 }
 
-/*
- * Given adj index, return sw_if_index, possibly overwritten
- * by a parameter. There is mostly debug outputs here,
- * but it turned out handy to have these.
- */
-
-static u32
-adj_index_to_sw_if_index (vlib_main_t * vm, ip_lookup_main_t * lm,
-                          char *lookup_next_nodes[], u32 adj_index0,
-                          u32 sw_if_index, u8 verbose)
-{
-  ip_adjacency_t *adj0 = ip_get_adjacency (lm, adj_index0);
-  u32 sw_if_index0 = adj0->rewrite_header.sw_if_index;
-  if (verbose)
-    {
-      vlib_cli_output (vm, "Adjacency index: %u, sw_if_index: %u\n",
-                       adj_index0, sw_if_index0);
-      vlib_cli_output (vm, "Adj: %s\n",
-                       lookup_next_nodes[adj0->lookup_next_index]);
-      vlib_cli_output (vm, "Adj Interface: %d\n", adj0->if_address_index);
-    }
-
-  if (~0 != sw_if_index)
-    {
-      sw_if_index0 = sw_if_index;
-      if (verbose)
-        {
-          vlib_cli_output (vm, "Forced set interface: %d\n", sw_if_index0);
-        }
-    }
-  return sw_if_index0;
-}
-
 static send_ip46_ping_result_t
-send_ip6_ping (vlib_main_t * vm, ip6_main_t * im, ip6_address_t * pa6,
+send_ip6_ping (vlib_main_t * vm, ip6_main_t * im,
+               u32 table_id, ip6_address_t * pa6,
                u32 sw_if_index, u16 seq_host, u16 id_host, u16 data_len,
                u8 verbose)
 {
   icmp6_echo_request_header_t *h0;
   u32 bi0 = 0;
-  u32 sw_if_index0;
-  ip_lookup_main_t *lm = &im->lookup_main;
   int bogus_length = 0;
-  u32 adj_index0;
   vlib_buffer_t *p0;
   vlib_frame_t *f;
   u32 *to_next;
-  u32 fib_index0;
 
   if (vlib_buffer_alloc (vm, &bi0, 1) != 1)
     return SEND_PING_ALLOC_FAIL;
 
   p0 = vlib_get_buffer (vm, bi0);
 
-  /* Determine sw_if_index0 of source intf, may be force-set via sw_if_index. */
-  vnet_buffer (p0)->sw_if_index[VLIB_RX] = 0;
-  vnet_buffer (p0)->sw_if_index[VLIB_TX] = ~0;  /* use interface VRF */
-  fib_index0 = 0;
-  adj_index0 = fib_entry_get_adj(ip6_fib_table_lookup(fib_index0, pa6, 128));
+  /*
+   * if the user did not provide a source interface, use the any interface
+   * that the destination resolves via.
+   */
+  if (~0 == sw_if_index)
+    {
+      fib_node_index_t fib_entry_index;
+      u32 fib_index;
+
+      fib_index = ip6_fib_index_from_table_id(table_id);
 
-  if (ADJ_INDEX_INVALID == adj_index0)
+      if (~0 == fib_index)
+      {
+          vlib_buffer_free (vm, &bi0, 1);
+          return SEND_PING_NO_TABLE;
+      }
+
+      fib_entry_index = ip6_fib_table_lookup(fib_index, pa6, 128);
+      sw_if_index = fib_entry_get_resolving_interface(fib_entry_index);
+      /*
+       * Set the TX interface to force ip-lookup to use its table ID
+       */
+      vnet_buffer (p0)->sw_if_index[VLIB_TX] = fib_index;
+    }
+  else
     {
-      vlib_buffer_free (vm, &bi0, 1);
-      return SEND_PING_NO_INTERFACE;
+      /*
+       * force an IP lookup in the table bound to the user's chosen
+       * source interface.
+       */
+      vnet_buffer (p0)->sw_if_index[VLIB_TX] =
+          ip6_fib_table_get_index_for_sw_if_index(sw_if_index);
     }
 
-  sw_if_index0 =
-    adj_index_to_sw_if_index (vm, lm, ip6_lookup_next_nodes, adj_index0,
-                              sw_if_index, verbose);
-  if ((~0 == sw_if_index0) && (~0 == sw_if_index))
+  if (~0 == sw_if_index)
     {
       vlib_buffer_free (vm, &bi0, 1);
       return SEND_PING_NO_INTERFACE;
     }
-  vnet_buffer (p0)->sw_if_index[VLIB_RX] = sw_if_index0;
+
+  vnet_buffer (p0)->sw_if_index[VLIB_RX] = sw_if_index;
 
   h0 = vlib_buffer_get_current (p0);
 
@@ -344,7 +328,7 @@ send_ip6_ping (vlib_main_t * vm, ip6_main_t * im, ip6_address_t * pa6,
   h0->ip6.src_address = *pa6;
 
   /* Fill in the correct source now */
-  ip6_address_t *a = ip6_interface_first_address (im, sw_if_index0);
+  ip6_address_t *a = ip6_interface_first_address (im, sw_if_index);
   h0->ip6.src_address = a[0];
 
   /* Fill in icmp fields */
@@ -381,19 +365,17 @@ send_ip6_ping (vlib_main_t * vm, ip6_main_t * im, ip6_address_t * pa6,
 static send_ip46_ping_result_t
 send_ip4_ping (vlib_main_t * vm,
                ip4_main_t * im,
+               u32 table_id,
                ip4_address_t * pa4,
                u32 sw_if_index,
                u16 seq_host, u16 id_host, u16 data_len, u8 verbose)
 {
   icmp4_echo_request_header_t *h0;
   u32 bi0 = 0;
-  u32 sw_if_index0;
   ip_lookup_main_t *lm = &im->lookup_main;
-  u32 adj_index0;
   vlib_buffer_t *p0;
   vlib_frame_t *f;
   u32 *to_next;
-  u32 fib_index0;
   u32 if_add_index0;
 
   if (vlib_buffer_alloc (vm, &bi0, 1) != 1)
@@ -401,28 +383,47 @@ send_ip4_ping (vlib_main_t * vm,
 
   p0 = vlib_get_buffer (vm, bi0);
 
-  /* Determine sw_if_index0 of the source intf, may be force-set via sw_if_index.  */
-  vnet_buffer (p0)->sw_if_index[VLIB_RX] = 0;
-  vnet_buffer (p0)->sw_if_index[VLIB_TX] = ~0;  /* use interface VRF */
-  fib_index0 = 0;
-  adj_index0 = fib_entry_get_adj(ip4_fib_table_lookup(
-                                    ip4_fib_get(fib_index0), pa4, 32));
+  /*
+   * if the user did not provide a source interface, use the any interface
+   * that the destination resolves via.
+   */
+  if (~0 == sw_if_index)
+  {
+      fib_node_index_t fib_entry_index;
+      u32 fib_index;
+
+      fib_index = ip4_fib_index_from_table_id(table_id);
 
-  if (ADJ_INDEX_INVALID == adj_index0)
+      if (~0 == fib_index)
+      {
+          vlib_buffer_free (vm, &bi0, 1);
+          return SEND_PING_NO_TABLE;
+      }
+
+      fib_entry_index = ip4_fib_table_lookup(ip4_fib_get(fib_index), pa4, 32);
+      sw_if_index = fib_entry_get_resolving_interface(fib_entry_index);
+      /*
+       * Set the TX interface to force ip-lookup to use the user's table ID
+       */
+      vnet_buffer (p0)->sw_if_index[VLIB_TX] = fib_index;
+    }
+  else
     {
-      vlib_buffer_free (vm, &bi0, 1);
-      return SEND_PING_NO_INTERFACE;
+      /*
+       * force an IP lookup in the table bound to the user's chosen
+       * source interface.
+       */
+      vnet_buffer (p0)->sw_if_index[VLIB_TX] =
+          ip4_fib_table_get_index_for_sw_if_index(sw_if_index);
     }
 
-  sw_if_index0 =
-    adj_index_to_sw_if_index (vm, lm, ip4_lookup_next_nodes, adj_index0,
-                              sw_if_index, verbose);
-  if ((~0 == sw_if_index0) && (~0 == sw_if_index))
+  if (~0 == sw_if_index)
     {
       vlib_buffer_free (vm, &bi0, 1);
       return SEND_PING_NO_INTERFACE;
     }
-  vnet_buffer (p0)->sw_if_index[VLIB_RX] = sw_if_index0;
+
+  vnet_buffer (p0)->sw_if_index[VLIB_RX] = sw_if_index;
 
   h0 = vlib_buffer_get_current (p0);
 
@@ -439,7 +440,7 @@ send_ip4_ping (vlib_main_t * vm,
   h0->ip4.src_address = *pa4;
 
   /* Fill in the correct source now */
-  if_add_index0 = lm->if_address_pool_index_by_sw_if_index[sw_if_index0];
+  if_add_index0 = lm->if_address_pool_index_by_sw_if_index[sw_if_index];
   if (PREDICT_TRUE (if_add_index0 != ~0))
     {
       ip_interface_address_t *if_add =
@@ -532,7 +533,7 @@ print_ip4_icmp_reply (vlib_main_t * vm, u32 bi0)
  */
 
 static void
-run_ping_ip46_address (vlib_main_t * vm, ip4_address_t * pa4,
+run_ping_ip46_address (vlib_main_t * vm, u32 table_id, ip4_address_t * pa4,
                        ip6_address_t * pa6, u32 sw_if_index,
                        f64 ping_interval, u32 ping_repeat, u32 data_len,
                        u32 verbose)
@@ -571,14 +572,14 @@ run_ping_ip46_address (vlib_main_t * vm, ip4_address_t * pa4,
       pr = vec_elt_at_index (pm->ping_runs, ping_run_index);
       pr->curr_seq = i;
       if (pa6 &&
-          (SEND_PING_OK == send_ip6_ping (vm, ping_main.ip6_main, pa6,
+          (SEND_PING_OK == send_ip6_ping (vm, ping_main.ip6_main, table_id, pa6,
                                           sw_if_index, i, icmp_id, data_len,
                                           verbose)))
         {
           n_requests++;
         }
       if (pa4 &&
-          (SEND_PING_OK == send_ip4_ping (vm, ping_main.ip4_main, pa4,
+          (SEND_PING_OK == send_ip4_ping (vm, ping_main.ip4_main, table_id, pa4,
                                           sw_if_index, i, icmp_id, data_len,
                                           verbose)))
         {
@@ -667,9 +668,12 @@ ping_ip_address (vlib_main_t * vm,
   u32 data_len = PING_DEFAULT_DATA_LEN;
   u32 verbose = 0;
   f64 ping_interval = PING_DEFAULT_INTERVAL;
+  u32 sw_if_index, table_id;
+
+  table_id = 0;
   ping_ip4 = ping_ip6 = 0;
-  u32 sw_if_index;
   sw_if_index = ~0;
+
   if (unformat (input, "%U", unformat_ip4_address, &a4))
     {
       ping_ip4 = 1;
@@ -757,6 +761,17 @@ ping_ip_address (vlib_main_t * vm,
               goto done;
             }
         }
+      else if (unformat (input, "table-id"))
+        {
+          if (!unformat (input, "du", &table_id))
+            {
+              error =
+                clib_error_return (0,
+                                   "expecting table-id but got `%U'",
+                                   format_unformat_error, input);
+              goto done;
+            }
+        }
       else if (unformat (input, "interval"))
         {
           if (!unformat (input, "%f", &ping_interval))
@@ -791,7 +806,7 @@ ping_ip_address (vlib_main_t * vm,
         }
     }
 
-  run_ping_ip46_address (vm, ping_ip4 ? &a4 : NULL, ping_ip6 ? &a6 : NULL,
+  run_ping_ip46_address (vm, table_id, ping_ip4 ? &a4 : NULL, ping_ip6 ? &a6 : NULL,
                          sw_if_index, ping_interval, ping_repeat, data_len,
                          verbose);
 done:
@@ -844,7 +859,7 @@ VLIB_CLI_COMMAND (ping_command, static) =
 {
   .path = "ping",
   .function = ping_ip_address,
-  .short_help = "ping {<ip-addr> | ipv4 <ip4-addr> | ipv6 <ip6-addr>} [ipv4 <ip4-addr> | ipv6 <ip6-addr>] [source <interface>] [size <pktsize>] [interval <sec>] [repeat <cnt>] [verbose]",
+  .short_help = "ping {<ip-addr> | ipv4 <ip4-addr> | ipv6 <ip6-addr>} [ipv4 <ip4-addr> | ipv6 <ip6-addr>] [source <interface>] [size <pktsize>] [interval <sec>] [repeat <cnt>] [table-id <id>] [verbose]",
 };
 /* *INDENT-ON* */
 
index 579638c..58c6f4b 100644 (file)
@@ -29,6 +29,7 @@ typedef enum {
   SEND_PING_OK = 0,
   SEND_PING_ALLOC_FAIL,
   SEND_PING_NO_INTERFACE,
+  SEND_PING_NO_TABLE,
 } send_ip46_ping_result_t;
 
 /*
index 2e38b0d..d294505 100644 (file)
@@ -1195,7 +1195,7 @@ VLIB_REGISTER_NODE(ip6_map_node) = {
   .next_nodes = {
     [IP6_MAP_NEXT_IP4_LOOKUP] = "ip4-lookup",
 #ifdef MAP_SKIP_IP6_LOOKUP
-    [IP6_MAP_NEXT_IP4_REWRITE] = "ip4-rewrite-transit",
+    [IP6_MAP_NEXT_IP4_REWRITE] = "ip4-rewrite",
 #endif
     [IP6_MAP_NEXT_IP6_REASS] = "ip6-map-ip6-reass",
     [IP6_MAP_NEXT_IP4_REASS] = "ip6-map-ip4-reass",