VPP-110: vxlan encap node should never touch a deleted tunnel 31/1431/1
authorJohn Lo <[email protected]>
Tue, 7 Jun 2016 13:14:07 +0000 (09:14 -0400)
committerJohn Lo <[email protected]>
Tue, 7 Jun 2016 13:14:07 +0000 (09:14 -0400)
Remove usage of dummy encap string for output from BD to a tunnel which
has been deleted. Instead, use a node l2-outpt-del-tunnel so that if
there are stale entries in the L2FIB for any deleted tunnel sw_if_index,
l2-output will send packets using this entry to the l2-output-tunnel-del
node which just setup the proper drop reason before sending packets
to the error-drop node to drop the packet.

Change-Id: I590982ee25e924ab74e2855c55c58baf29a9fad4
Signed-off-by: John Lo <[email protected]>
vnet/vnet/l2/l2_output.c
vnet/vnet/l2/l2_output.h
vnet/vnet/vxlan/encap.c
vnet/vnet/vxlan/vxlan.c
vnet/vnet/vxlan/vxlan.h

index f5b2211..acca349 100644 (file)
@@ -62,31 +62,12 @@ static u8 * format_l2output_trace (u8 * s, va_list * args)
 }
 
 
-#define foreach_l2output_error                         \
-_(L2OUTPUT,     "L2 output packets")                   \
-_(EFP_DROP,     "L2 EFP filter pre-rewrite drops")     \
-_(VTR_DROP,     "L2 output tag rewrite drops")         \
-_(SHG_DROP,     "L2 split horizon drops")              \
-_(DROP,         "L2 output drops")
-
-typedef enum {
-#define _(sym,str) L2OUTPUT_ERROR_##sym,
-  foreach_l2output_error
-#undef _
-  L2OUTPUT_N_ERROR,
-} l2output_error_t;
-
 static char * l2output_error_strings[] = {
 #define _(sym,string) string,
   foreach_l2output_error
 #undef _
 };
 
-typedef enum {
-  L2OUTPUT_NEXT_DROP,
-  L2OUTPUT_N_NEXT,
-} l2output_next_t;
-
 // Return 0 if split horizon check passes, otherwise return non-zero
 // Packets should not be transmitted out an interface with the same
 // split-horizon group as the input interface, except if the shg is 0
@@ -411,9 +392,114 @@ VLIB_REGISTER_NODE (l2output_node,static) = {
   /* edit / add dispositions here */
   .next_nodes = {
         [L2OUTPUT_NEXT_DROP] = "error-drop",
+        [L2OUTPUT_NEXT_DEL_TUNNEL] = "l2-output-del-tunnel",
+  },
+};
+
+
+#define foreach_l2output_del_tunnel_error      \
+_(DROP,     "L2 output to deleted tunnel")
+
+static char * l2output_del_tunnel_error_strings[] = {
+#define _(sym,string) string,
+  foreach_l2output_del_tunnel_error
+#undef _
+};
+
+typedef enum {
+#define _(sym,str) L2OUTPUT_DEL_TUNNEL_ERROR_##sym,
+  foreach_l2output_del_tunnel_error
+#undef _
+  L2OUTPUT_DEL_TUNNEL_N_ERROR,
+} l2output_del_tunnel_error_t;
+
+
+// Output node for tunnels which was in L2 BD's but were deleted.
+// On deletion of any tunnel which was on a L2 BD, its entry in 
+// l2_output_main table next_nodes.output_node_index_vec[sw_if_index] 
+// MUST be set to the value of L2OUTPUT_NEXT_DEL_TUNNEL. Thus, if there
+// are stale entries in the L2FIB for this tunnel sw_if_index, l2-output 
+// will send packets for this sw_if_index to the l2-output-tunnel-del
+// node which just setup the proper drop reason before sending packets
+// to the error-drop node to drop the packet. Then, stale L2FIB entries
+// for delted tunnels won't cause possible packet or memory corrpution.
+static vlib_node_registration_t l2output_del_tunnel_node;
+
+static uword
+l2output_del_tunnel_node_fn (vlib_main_t * vm,
+                            vlib_node_runtime_t * node,
+                            vlib_frame_t * frame)
+{
+  u32 n_left_from, * from, * to_next;
+  l2output_next_t next_index = 0;
+
+  from = vlib_frame_vector_args (frame);
+  n_left_from = frame->n_vectors; // number of packets to process
+
+  while (n_left_from > 0)
+    {
+      u32 n_left_to_next;
+
+      // get space to enqueue frame to graph node "next_index"
+      vlib_get_next_frame (vm, node, next_index,
+                          to_next, n_left_to_next);
+
+      while (n_left_from >= 4 && n_left_to_next >= 2)
+       {
+          u32 bi0, bi1;
+         vlib_buffer_t * b0, * b1;
+
+         to_next[0] = bi0 = from[0];
+         to_next[1] = bi1 = from[1];
+         from += 2;
+         to_next += 2;
+         n_left_from -= 2;
+         n_left_to_next -= 2;
+         b0 = vlib_get_buffer (vm, bi0);
+         b1 = vlib_get_buffer (vm, bi1);
+         b0->error = node->errors[L2OUTPUT_DEL_TUNNEL_ERROR_DROP];
+         b1->error = node->errors[L2OUTPUT_DEL_TUNNEL_ERROR_DROP];
+        }
+      
+      while (n_left_from > 0 && n_left_to_next > 0)
+       {
+          u32 bi0;
+         vlib_buffer_t * b0;
+
+         bi0 = from[0];
+         to_next[0] = bi0;
+         from += 1;
+         to_next += 1;
+         n_left_from -= 1;
+         n_left_to_next -= 1;
+         b0 = vlib_get_buffer (vm, bi0);
+         b0->error = node->errors[L2OUTPUT_DEL_TUNNEL_ERROR_DROP];
+       }
+
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+
+  return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (l2output_del_tunnel_node,static) = {
+  .function = l2output_del_tunnel_node_fn,
+  .name = "l2-output-del-tunnel",
+  .vector_size = sizeof (u32),
+  .type = VLIB_NODE_TYPE_INTERNAL,
+  
+  .n_errors =  ARRAY_LEN(l2output_del_tunnel_error_strings),
+  .error_strings = l2output_del_tunnel_error_strings,
+
+  .n_next_nodes = 1,
+
+  /* edit / add dispositions here */
+  .next_nodes = {
+       [0] = "error-drop",
   },
 };
 
+
 VLIB_NODE_FUNCTION_MULTIARCH (l2output_node, l2output_node_fn)
 
 clib_error_t *l2output_init (vlib_main_t *vm)
index 1c7b033..b525a74 100644 (file)
@@ -104,6 +104,26 @@ typedef enum {
 #undef _
 } l2output_feat_masks_t;
            
+#define foreach_l2output_error                         \
+_(L2OUTPUT,     "L2 output packets")                   \
+_(EFP_DROP,     "L2 EFP filter pre-rewrite drops")     \
+_(VTR_DROP,     "L2 output tag rewrite drops")         \
+_(SHG_DROP,     "L2 split horizon drops")              \
+_(DROP,         "L2 output drops")
+
+typedef enum {
+  L2OUTPUT_NEXT_DROP,
+  L2OUTPUT_NEXT_DEL_TUNNEL,
+  L2OUTPUT_N_NEXT,
+} l2output_next_t;
+
+typedef enum {
+#define _(sym,str) L2OUTPUT_ERROR_##sym,
+  foreach_l2output_error
+#undef _
+  L2OUTPUT_N_ERROR,
+} l2output_error_t;
+
 // Return an array of strings containing graph node names of each feature
 char **l2output_get_feat_names(void);
 
index 387a728..e7d49b0 100644 (file)
@@ -21,8 +21,7 @@
 
 /* Statistics (not all errors) */
 #define foreach_vxlan_encap_error    \
-_(ENCAPSULATED, "good packets encapsulated") \
-_(DEL_TUNNEL, "deleted tunnel packets")
+_(ENCAPSULATED, "good packets encapsulated")
 
 static char * vxlan_encap_error_strings[] = {
 #define _(sym,string) string,
@@ -160,22 +159,6 @@ vxlan_encap (vlib_main_t * vm,
           if (PREDICT_FALSE(!is_ip4_0)) next0 = VXLAN_ENCAP_NEXT_IP6_LOOKUP;
           if (PREDICT_FALSE(!is_ip4_1)) next1 = VXLAN_ENCAP_NEXT_IP6_LOOKUP;
 
-         /* Check rewrite string and drop packet if tunnel is deleted */
-         if (PREDICT_FALSE(t0->rewrite == vxlan4_dummy_rewrite || 
-                            t0->rewrite == vxlan6_dummy_rewrite))
-           {
-             next0 = VXLAN_ENCAP_NEXT_DROP;
-             b0->error = node->errors[VXLAN_ENCAP_ERROR_DEL_TUNNEL];
-             pkts_encapsulated --;
-           }  /* Still go through normal encap with dummy rewrite */
-         if (PREDICT_FALSE(t1->rewrite == vxlan4_dummy_rewrite || 
-                            t1->rewrite == vxlan6_dummy_rewrite))
-           {
-             next1 = VXLAN_ENCAP_NEXT_DROP;
-             b1->error = node->errors[VXLAN_ENCAP_ERROR_DEL_TUNNEL];
-             pkts_encapsulated --;
-           }  /* Still go through normal encap with dummy rewrite */
-
          /* IP4 VXLAN header sizeof(ip4_vxlan_header_t) should be 36 octects */
           /* IP6 VXLAN header sizeof(ip6_vxlan_header_t) should be 56 octects */
          if (PREDICT_TRUE(is_ip4_0))
@@ -418,16 +401,6 @@ vxlan_encap (vlib_main_t * vm,
 
           if (PREDICT_FALSE(!is_ip4_0)) next0 = VXLAN_ENCAP_NEXT_IP6_LOOKUP;
 
-         /* Check rewrite string and drop packet if tunnel is deleted */
-         if (PREDICT_FALSE(t0->rewrite == vxlan4_dummy_rewrite || 
-                            t0->rewrite == vxlan6_dummy_rewrite))
-           {
-             next0 = VXLAN_ENCAP_NEXT_DROP;
-             b0->error = node->errors[VXLAN_ENCAP_ERROR_DEL_TUNNEL];
-             pkts_encapsulated --;
-           }  /* Still go through normal encap with dummy rewrite */
-
-
          /* IP4 VXLAN header sizeof(ip4_vxlan_header_t) should be 36 octets */
           /* IP6 VXLAN header sizeof(ip4_vxlan_header_t) should be 56 octets */
          if (PREDICT_TRUE(is_ip4_0))
index da893d5..133fc6f 100644 (file)
@@ -207,6 +207,7 @@ int vnet_vxlan_add_del_tunnel
   int rv;
   vxlan4_tunnel_key_t key4;
   vxlan6_tunnel_key_t key6;
+  l2output_main_t * l2om = &l2output_main;
 
   if (!a->is_ip6) {
     key4.src = a->dst.ip4.as_u32; /* decap src in key is encap dst in config */
@@ -318,6 +319,15 @@ int vnet_vxlan_add_del_tunnel
          l2im->configs[sw_if_index].feature_bitmap = L2INPUT_FEAT_DROP;
          l2im->configs[sw_if_index].bd_index = 0;
        }
+      
+      /* 
+       * Directs the l2 output path to work out the interface
+       * output next-arc itself. Needed when recycling a tunnel.
+       */
+      vec_validate_init_empty(l2om->next_nodes.output_node_index_vec, 
+                              sw_if_index, ~0);
+      l2om->next_nodes.output_node_index_vec[t->sw_if_index] 
+        = ~0;
       vnet_sw_interface_set_flags (vnm, sw_if_index, 
                                    VNET_SW_INTERFACE_FLAG_ADMIN_UP);
       if (!a->is_ip6) {
@@ -343,24 +353,16 @@ int vnet_vxlan_add_del_tunnel
 
       vxm->tunnel_index_by_sw_if_index[t->sw_if_index] = ~0;
 
+      /* Directs the l2 path to turf packets sent to this sw_if_index */
+      l2om->next_nodes.output_node_index_vec[t->sw_if_index] 
+        = L2OUTPUT_NEXT_DEL_TUNNEL;
+
       if (!a->is_ip6)
         hash_unset (vxm->vxlan4_tunnel_by_key, key4.as_u64);
       else
         hash_unset_mem (vxm->vxlan6_tunnel_by_key, t->key6);
 
       vec_free (t->rewrite);
-      if (!a->is_ip6)
-        {
-          t->rewrite = vxlan4_dummy_rewrite;
-          t->key4 = 0;
-        }
-      else
-        {
-          t->rewrite = vxlan6_dummy_rewrite;
-          clib_mem_free (t->key6);
-          t->key6 = 0;
-        }
-
       pool_put (vxm->tunnels, t);
     }
 
@@ -579,10 +581,6 @@ VLIB_CLI_COMMAND (show_vxlan_tunnel_command, static) = {
 clib_error_t *vxlan_init (vlib_main_t *vm)
 {
   vxlan_main_t * vxm = &vxlan_main;
-  ip4_vxlan_header_t * hdr4;
-  ip4_header_t * ip4;
-  ip6_vxlan_header_t * hdr6;
-  ip6_header_t * ip6;
   
   vxm->vnet_main = vnet_get_main();
   vxm->vlib_main = vm;
@@ -592,21 +590,6 @@ clib_error_t *vxlan_init (vlib_main_t *vm)
         sizeof(vxlan6_tunnel_key_t),
         sizeof(uword));
 
-  /* init dummy rewrite string for deleted vxlan tunnels */
-  _vec_len(vxlan4_dummy_rewrite) = sizeof(ip4_vxlan_header_t);
-  hdr4 = (ip4_vxlan_header_t *) vxlan4_dummy_rewrite;
-  ip4 = &hdr4->ip4;
-  /* minimal rewrite setup, see vxlan_rewite() above as reference */
-  ip4->ip_version_and_header_length = 0x45;
-  ip4->checksum = ip4_header_checksum (ip4);
-
-  /* Same again for IPv6 */
-  _vec_len(vxlan6_dummy_rewrite) = sizeof(ip6_vxlan_header_t);
-  hdr6 = (ip6_vxlan_header_t *) vxlan6_dummy_rewrite;
-  ip6 = &hdr6->ip6;
-  /* minimal rewrite setup, see vxlan_rewite() above as reference */
-  ip6->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32(6 << 28);
   udp_register_dst_port (vm, UDP_DST_PORT_vxlan, 
                          vxlan4_input_node.index, /* is_ip4 */ 1);
   udp_register_dst_port (vm, UDP_DST_PORT_vxlan6,
index 1c70c75..703741a 100644 (file)
@@ -20,6 +20,7 @@
 #include <vnet/vnet.h>
 #include <vnet/ip/ip.h>
 #include <vnet/l2/l2_input.h>
+#include <vnet/l2/l2_output.h>
 #include <vnet/l2/l2_bd.h>
 #include <vnet/ethernet/ethernet.h>
 #include <vnet/vxlan/vxlan_packet.h>
@@ -126,13 +127,6 @@ typedef struct {
   /* Free vlib hw_if_indices */
   u32 * free_vxlan_tunnel_hw_if_indices;
 
-  /* Dummy rewrite for deleted vxlan_tunnels with hw_if_indices as above */
-  u64 dummy4_str [sizeof(ip4_vxlan_header_t)/sizeof(u64) + 2];
-#define vxlan4_dummy_rewrite ((u8 *) &vxlan_main.dummy4_str[1])
-
-  u64 dummy6_str [sizeof(ip6_vxlan_header_t)/sizeof(u64) + 2];
-#define vxlan6_dummy_rewrite ((u8 *) &vxlan_main.dummy6_str[1])
-
   /* Mapping from sw_if_index to tunnel index */
   u32 * tunnel_index_by_sw_if_index;