VXLAN tunnel encap forwarding optimization with FIB 2.0 17/3717/13
authorJohn Lo <loj@cisco.com>
Mon, 7 Nov 2016 23:30:47 +0000 (18:30 -0500)
committerDamjan Marion <dmarion.lists@gmail.com>
Thu, 10 Nov 2016 12:45:00 +0000 (12:45 +0000)
Optimize VXLAN encap node so that vxlan4-encap node is used for IP4
and vxlan6-encap node is used for IP6 underlay. Also stack the VXLAN
encap nodes to the appropriate FIB IP4 or IP6 load-balance node
instead of ip4/ip6-lookup node to save IP lookup operation.

For VXLAN decap node, check VXLAN header FLAGS field for each packet
and remove the code to support decap-next for IP4 or IP6. These decap-
next values were intended for experimentation purposes and not needed
any more since VXLAN-GPE tunnel is supported. The decap-next field is
still kept in API for backward compatibility and its value has no
effect. Decap next for both vxlan4-decap and vxlan6-decap nodes is
always l2-input node.

Change-Id: I8ac95774946549ec403ab691f999df0c006b460f
Signed-off-by: John Lo <loj@cisco.com>
vnet/vnet/fib/fib_node.h
vnet/vnet/vxlan/decap.c
vnet/vnet/vxlan/encap.c
vnet/vnet/vxlan/vxlan.c
vnet/vnet/vxlan/vxlan.h
vnet/vnet/vxlan/vxlan_error.def
vnet/vnet/vxlan/vxlan_packet.h
vpp-api-test/vat/api_format.c
vpp/vpp-api/api.c

index c820546..791d63b 100644 (file)
@@ -38,6 +38,7 @@ typedef enum fib_node_type_t_ {
     FIB_NODE_TYPE_LISP_GPE_FWD_ENTRY,
     FIB_NODE_TYPE_LISP_ADJ,
     FIB_NODE_TYPE_GRE_TUNNEL,
+    FIB_NODE_TYPE_VXLAN_TUNNEL,
     /**
      * Marker. New types before this one. leave the test last.
      */
@@ -57,6 +58,7 @@ typedef enum fib_node_type_t_ {
     [FIB_NODE_TYPE_LISP_GPE_FWD_ENTRY] = "lisp-gpe-fwd-entry", \
     [FIB_NODE_TYPE_LISP_ADJ] = "lisp-adj", \
     [FIB_NODE_TYPE_GRE_TUNNEL] = "gre-tunnel", \
+    [FIB_NODE_TYPE_VXLAN_TUNNEL] = "vxlan-tunnel", \
 }
 
 /**
index 2b74ce2..812a841 100644 (file)
@@ -37,13 +37,13 @@ static u8 * format_vxlan_rx_trace (u8 * s, va_list * args)
 
   if (t->tunnel_index != ~0)
     {
-      s = format (s, "VXLAN: tunnel %d vni %d next %d error %d", 
+      s = format (s, "VXLAN decap from vxlan_tunnel%d vni %d next %d error %d",
                   t->tunnel_index, t->vni, t->next_index, t->error);
     }
   else
     {
-      s = format (s, "VXLAN: no tunnel for vni %d next %d error %d", 
-                  t->vni, t->next_index, t->error);
+      s = format (s, "VXLAN decap error - tunnel for vni %d does not exist", 
+                 t->vni);
     }
   return s;
 }
@@ -129,11 +129,13 @@ vxlan_input (vlib_main_t * vm,
           vxlan0 = vlib_buffer_get_current (b0);
           vxlan1 = vlib_buffer_get_current (b1);
 
+         next0 = next1 = VXLAN_INPUT_NEXT_L2_INPUT;
+
           if (is_ip4) {
-          vlib_buffer_advance 
-            (b0, -(word)(sizeof(udp_header_t)+sizeof(ip4_header_t)));
-          vlib_buffer_advance 
-            (b1, -(word)(sizeof(udp_header_t)+sizeof(ip4_header_t)));
+           vlib_buffer_advance
+             (b0, -(word)(sizeof(udp_header_t)+sizeof(ip4_header_t)));
+           vlib_buffer_advance
+             (b1, -(word)(sizeof(udp_header_t)+sizeof(ip4_header_t)));
             ip4_0 = vlib_buffer_get_current (b0);
             ip4_1 = vlib_buffer_get_current (b1);
          } else {
@@ -149,10 +151,10 @@ vxlan_input (vlib_main_t * vm,
           if (is_ip4) {
             vlib_buffer_advance
               (b0, sizeof(*ip4_0)+sizeof(udp_header_t)+sizeof(*vxlan0));
-          vlib_buffer_advance 
+           vlib_buffer_advance
               (b1, sizeof(*ip4_1)+sizeof(udp_header_t)+sizeof(*vxlan1));
           } else {
-          vlib_buffer_advance 
+           vlib_buffer_advance
               (b0, sizeof(*ip6_0)+sizeof(udp_header_t)+sizeof(*vxlan0));
             vlib_buffer_advance
               (b1, sizeof(*ip6_1)+sizeof(udp_header_t)+sizeof(*vxlan1));
@@ -164,6 +166,13 @@ vxlan_input (vlib_main_t * vm,
           tunnel_index1 = ~0;
           error1 = 0;
 
+         if (PREDICT_FALSE (vxlan0->flags != VXLAN_FLAGS_I))
+           {
+             error0 = VXLAN_ERROR_BAD_FLAGS;
+             next0 = VXLAN_INPUT_NEXT_DROP;
+             goto trace0;
+           }
+
           if (is_ip4) {
             key4_0.src = ip4_0->src_address.as_u32;
             key4_0.vni = vxlan0->vni_reserved;
@@ -209,7 +218,6 @@ vxlan_input (vlib_main_t * vm,
 
           t0 = pool_elt_at_index (vxm->tunnels, tunnel_index0);
 
-          next0 = t0->decap_next_index;
           sw_if_index0 = t0->sw_if_index;
           len0 = vlib_buffer_length_in_chain (vm, b0);
 
@@ -253,6 +261,12 @@ vxlan_input (vlib_main_t * vm,
               tr->vni = vnet_get_vni (vxlan0);
             }
 
+         if (PREDICT_FALSE (vxlan1->flags != VXLAN_FLAGS_I))
+           {
+             error1 = VXLAN_ERROR_BAD_FLAGS;
+             next1 = VXLAN_INPUT_NEXT_DROP;
+             goto trace1;
+           }
 
           if (is_ip4) {
             key4_1.src = ip4_1->src_address.as_u32;
@@ -299,7 +313,6 @@ vxlan_input (vlib_main_t * vm,
 
           t1 = pool_elt_at_index (vxm->tunnels, tunnel_index1);
 
-          next1 = t1->decap_next_index;
           sw_if_index1 = t1->sw_if_index;
           len1 = vlib_buffer_length_in_chain (vm, b1);
 
@@ -376,9 +389,11 @@ vxlan_input (vlib_main_t * vm,
           /* udp leaves current_data pointing at the vxlan header */
           vxlan0 = vlib_buffer_get_current (b0);
 
+         next0 = VXLAN_INPUT_NEXT_L2_INPUT;
+
           if (is_ip4) {
-          vlib_buffer_advance 
-            (b0, -(word)(sizeof(udp_header_t)+sizeof(ip4_header_t)));
+           vlib_buffer_advance
+             (b0, -(word)(sizeof(udp_header_t)+sizeof(ip4_header_t)));
             ip4_0 = vlib_buffer_get_current (b0);
           } else {
             vlib_buffer_advance
@@ -391,13 +406,20 @@ vxlan_input (vlib_main_t * vm,
             vlib_buffer_advance
               (b0, sizeof(*ip4_0)+sizeof(udp_header_t)+sizeof(*vxlan0));
           } else {
-          vlib_buffer_advance 
+           vlib_buffer_advance
               (b0, sizeof(*ip6_0)+sizeof(udp_header_t)+sizeof(*vxlan0));
           }
 
           tunnel_index0 = ~0;
           error0 = 0;
 
+         if (PREDICT_FALSE (vxlan0->flags != VXLAN_FLAGS_I))
+           {
+             error0 = VXLAN_ERROR_BAD_FLAGS;
+             next0 = VXLAN_INPUT_NEXT_DROP;
+             goto trace00;
+           }
+
           if (is_ip4) {
             key4_0.src = ip4_0->src_address.as_u32;
             key4_0.vni = vxlan0->vni_reserved;
@@ -443,7 +465,6 @@ vxlan_input (vlib_main_t * vm,
 
           t0 = pool_elt_at_index (vxm->tunnels, tunnel_index0);
 
-          next0 = t0->decap_next_index;
           sw_if_index0 = t0->sw_if_index;
           len0 = vlib_buffer_length_in_chain (vm, b0);
 
index e7d49b0..5b63064 100644 (file)
@@ -37,8 +37,6 @@ typedef enum {
 } vxlan_encap_error_t;
 
 typedef enum {
-    VXLAN_ENCAP_NEXT_IP4_LOOKUP,
-    VXLAN_ENCAP_NEXT_IP6_LOOKUP,
     VXLAN_ENCAP_NEXT_DROP,
     VXLAN_ENCAP_N_NEXT,
 } vxlan_encap_next_t;
@@ -55,7 +53,8 @@ u8 * format_vxlan_encap_trace (u8 * s, va_list * args)
   vxlan_encap_trace_t * t 
       = va_arg (*args, vxlan_encap_trace_t *);
 
-  s = format (s, "VXLAN-ENCAP: tunnel %d vni %d", t->tunnel_index, t->vni);
+  s = format (s, "VXLAN encap to vxlan_tunnel%d vni %d", 
+             t->tunnel_index, t->vni);
   return s;
 }
 
@@ -66,10 +65,11 @@ u8 * format_vxlan_encap_trace (u8 * s, va_list * args)
 #define foreach_fixed_header6_offset            \
     _(0) _(1) _(2) _(3) _(4) _(5) _(6)
 
-static uword
-vxlan_encap (vlib_main_t * vm,
-               vlib_node_runtime_t * node,
-               vlib_frame_t * from_frame)
+always_inline uword
+vxlan_encap_inline (vlib_main_t * vm,
+                   vlib_node_runtime_t * node,
+                   vlib_frame_t * from_frame,
+                   u32 is_ip4)
 {
   u32 n_left_from, next_index, * from, * to_next;
   vxlan_main_t * vxm = &vxlan_main;
@@ -79,6 +79,10 @@ vxlan_encap (vlib_main_t * vm,
   u16 old_l0 = 0, old_l1 = 0;
   u32 cpu_index = os_get_cpu_number();
   u32 stats_sw_if_index, stats_n_packets, stats_n_bytes;
+  u32 sw_if_index0 = 0, sw_if_index1 = 0;
+  u32 next0 = 0, next1 = 0;
+  vnet_hw_interface_t * hi0, * hi1;
+  vxlan_tunnel_t * t0 = NULL, * t1 = NULL;
 
   from = vlib_frame_vector_args (from_frame);
   n_left_from = from_frame->n_vectors;
@@ -99,10 +103,7 @@ vxlan_encap (vlib_main_t * vm,
           u32 bi0, bi1;
          vlib_buffer_t * b0, * b1;
           u32 flow_hash0, flow_hash1;
-         u32 next0 = VXLAN_ENCAP_NEXT_IP4_LOOKUP;
-          u32 next1 = VXLAN_ENCAP_NEXT_IP4_LOOKUP;
-         u32 sw_if_index0, sw_if_index1, len0, len1;
-          vnet_hw_interface_t * hi0, * hi1;
+         u32 len0, len1;
           ip4_header_t * ip4_0, * ip4_1;
           ip6_header_t * ip6_0, * ip6_1;
           udp_header_t * udp0, * udp1;
@@ -110,10 +111,8 @@ vxlan_encap (vlib_main_t * vm,
           u64 * copy_src1, * copy_dst1;
           u32 * copy_src_last0, * copy_dst_last0;
           u32 * copy_src_last1, * copy_dst_last1;
-          vxlan_tunnel_t * t0, * t1;
           u16 new_l0, new_l1;
           ip_csum_t sum0, sum1;
-          u8 is_ip4_0, is_ip4_1;
 
          /* Prefetch next iteration. */
          {
@@ -144,169 +143,147 @@ vxlan_encap (vlib_main_t * vm,
           flow_hash0 = vnet_l2_compute_flow_hash (b0);
           flow_hash1 = vnet_l2_compute_flow_hash (b1);
 
-          /* 1-wide cache? */
-         sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_TX];
-         sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_TX];
-          hi0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
-         hi1 = vnet_get_sup_hw_interface (vnm, sw_if_index1); 
-
-          t0 = &vxm->tunnels[hi0->dev_instance];
-          t1 = &vxm->tunnels[hi1->dev_instance];
-
-          is_ip4_0 = (t0->flags & VXLAN_TUNNEL_IS_IPV4);
-          is_ip4_1 = (t1->flags & VXLAN_TUNNEL_IS_IPV4);
-
-          if (PREDICT_FALSE(!is_ip4_0)) next0 = VXLAN_ENCAP_NEXT_IP6_LOOKUP;
-          if (PREDICT_FALSE(!is_ip4_1)) next1 = VXLAN_ENCAP_NEXT_IP6_LOOKUP;
+         /* Get next node index and adj index from tunnel next_dpo */
+         if (sw_if_index0 != vnet_buffer(b0)->sw_if_index[VLIB_TX])
+           {
+             sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_TX];
+             hi0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
+             t0 = &vxm->tunnels[hi0->dev_instance];
+             /* Note: change to always set next0 if it may be set to drop */
+             next0 = t0->next_dpo.dpoi_next_node;
+           }
+          vnet_buffer(b0)->ip.adj_index[VLIB_TX] = t0->next_dpo.dpoi_index;
 
-         /* IP4 VXLAN header sizeof(ip4_vxlan_header_t) should be 36 octects */
-          /* IP6 VXLAN header sizeof(ip6_vxlan_header_t) should be 56 octects */
-         if (PREDICT_TRUE(is_ip4_0))
-            ASSERT(vec_len(t0->rewrite) == 36);
-          else
-            ASSERT(vec_len(t0->rewrite) == 56);
-          if (PREDICT_TRUE(is_ip4_1))
-            ASSERT(vec_len(t1->rewrite) == 36);
-          else
-            ASSERT(vec_len(t1->rewrite) == 56);
+         /* Get next node index and adj index from tunnel next_dpo */
+         if (sw_if_index1 != vnet_buffer(b1)->sw_if_index[VLIB_TX])
+           {
+             sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_TX];
+             hi1 = vnet_get_sup_hw_interface (vnm, sw_if_index1); 
+             t1 = &vxm->tunnels[hi1->dev_instance];
+             /* Note: change to always set next1 if it may be set to drop */
+             next1 = t1->next_dpo.dpoi_next_node;
+           }
+          vnet_buffer(b1)->ip.adj_index[VLIB_TX] = t1->next_dpo.dpoi_index;
 
           /* Apply the rewrite string. $$$$ vnet_rewrite? */
           vlib_buffer_advance (b0, -(word)_vec_len(t0->rewrite));
           vlib_buffer_advance (b1, -(word)_vec_len(t1->rewrite));
 
-          /* assign both v4 and v6; avoid a branch, optimizer will help us */
-          ip4_0 = vlib_buffer_get_current(b0);
-          ip6_0 = (void *)ip4_0;
-          ip4_1 = vlib_buffer_get_current(b1);
-          ip6_1 = (void *)ip4_1;
-
-          /* Copy the fixed header (v4 and v6 variables point to the same
-           * place at this point)
-           */
-          copy_dst0 = (u64 *) ip4_0;
-          copy_src0 = (u64 *) t0->rewrite;
-
-          copy_dst1 = (u64 *) ip4_1;
-          copy_src1 = (u64 *) t1->rewrite;
-
-          /* Copy first 32 (ip4)/56 (ip6) octets 8-bytes at a time */
+         if (is_ip4)
+           {
+             /* IP4 VXLAN header should be 36 octects */
+              ASSERT(sizeof(ip4_vxlan_header_t) == 36);
+              ASSERT(vec_len(t0->rewrite) == sizeof(ip4_vxlan_header_t));
+             ASSERT(vec_len(t1->rewrite) == sizeof(ip4_vxlan_header_t));
+
+             ip4_0 = vlib_buffer_get_current(b0);
+             ip4_1 = vlib_buffer_get_current(b1);
+
+             /* Copy the fixed header */
+             copy_dst0 = (u64 *) ip4_0;
+             copy_src0 = (u64 *) t0->rewrite;
+             copy_dst1 = (u64 *) ip4_1;
+             copy_src1 = (u64 *) t1->rewrite;
+             /* Copy first 32 octets 8-bytes at a time */
 #define _(offs) copy_dst0[offs] = copy_src0[offs];
-          if (PREDICT_TRUE(is_ip4_0)) {
-            foreach_fixed_header4_offset;
-          } else {
-            foreach_fixed_header6_offset;
-          }
+             foreach_fixed_header4_offset;
 #undef _
 #define _(offs) copy_dst1[offs] = copy_src1[offs];
-          if (PREDICT_TRUE(is_ip4_1)) {
-            foreach_fixed_header4_offset;
-          } else {
-            foreach_fixed_header6_offset;
-          }
+             foreach_fixed_header4_offset;
 #undef _
-          /* Last 4 octets. Hopefully gcc will be our friend */
-          if (PREDICT_TRUE(is_ip4_0)) {
+             /* Last 4 octets. Hopefully gcc will be our friend */
               copy_dst_last0 = (u32 *)(&copy_dst0[4]);
               copy_src_last0 = (u32 *)(&copy_src0[4]);
               copy_dst_last0[0] = copy_src_last0[0];
-          }
-          if (PREDICT_TRUE(is_ip4_1)) {
               copy_dst_last1 = (u32 *)(&copy_dst1[4]);
               copy_src_last1 = (u32 *)(&copy_src1[4]);
               copy_dst_last1[0] = copy_src_last1[0];
-          }
 
-          if (PREDICT_TRUE(is_ip4_0)) {
-            /* fix the <bleep>ing outer-IP checksum */
-            sum0 = ip4_0->checksum;
-
-            /* old_l0 always 0, see the rewrite setup */
-            new_l0 = 
-              clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0));
+             /* Fix the IP4 checksum and length */
+             sum0 = ip4_0->checksum;
+             new_l0 = /* old_l0 always 0, see the rewrite setup */ 
+                clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0));
               sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t,
-                                   length /* changed member */);
-            ip4_0->checksum = ip_csum_fold (sum0);
-            ip4_0->length = new_l0;
-          } else {
-            new_l0 =
-              clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)
-                                           - sizeof(*ip6_0));
-            ip6_0->payload_length = new_l0;
-          }
-
-          if (PREDICT_TRUE(is_ip4_1)) {
-            /* fix the <bleep>ing outer-IP checksum */
-            sum1 = ip4_1->checksum;
-
-            /* old_l1 always 0, see the rewrite setup */
-            new_l1 = 
-              clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1));
+                                    length /* changed member */);
+             ip4_0->checksum = ip_csum_fold (sum0);
+             ip4_0->length = new_l0;
+             sum1 = ip4_1->checksum;
+             new_l1 = /* old_l1 always 0, see the rewrite setup */
+                clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1));
               sum1 = ip_csum_update (sum1, old_l1, new_l1, ip4_header_t,
-                                   length /* changed member */);
-            ip4_1->checksum = ip_csum_fold (sum1);
-            ip4_1->length = new_l1;
-          } else {
-            new_l1 =
-              clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1)
-                                           - sizeof(*ip6_1));
-            ip6_1->payload_length = new_l1;
-          }
-          
-          /* Fix UDP length */
-          if (PREDICT_TRUE(is_ip4_0)) {
-            udp0 = (udp_header_t *)(ip4_0+1);
-            new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)
-                                           - sizeof (*ip4_0));
-          } else {
-            udp0 = (udp_header_t *)(ip6_0+1);
-            new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)
-                                           - sizeof (*ip6_0));
-          }
-          if (PREDICT_TRUE(is_ip4_1)) {
-            udp1 = (udp_header_t *)(ip4_1+1);
-            new_l1 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1)
-                                           - sizeof (*ip4_1));
-          } else {
-            udp1 = (udp_header_t *)(ip6_1+1);
-            new_l1 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1)
-                                           - sizeof (*ip6_1));
-          }
-          
-          udp0->length = new_l0;
-          udp0->src_port = flow_hash0;
-
-          udp1->length = new_l1;
-          udp1->src_port = flow_hash1;
-
-          if (PREDICT_FALSE(!is_ip4_0)) {
-                int bogus = 0;
-                /* IPv6 UDP checksum is mandatory */
-                udp0->checksum = ip6_tcp_udp_icmp_compute_checksum(vm, b0,
-                                                        ip6_0, &bogus);
-                ASSERT(bogus == 0);
-                if (udp0->checksum == 0)
-                    udp0->checksum = 0xffff;
-          }
-
-          if (PREDICT_FALSE(!is_ip4_1)) {
-                int bogus = 0;
-                /* IPv6 UDP checksum is mandatory */
-                udp1->checksum = ip6_tcp_udp_icmp_compute_checksum(vm, b1,
+                                    length /* changed member */);
+             ip4_1->checksum = ip_csum_fold (sum1);
+             ip4_1->length = new_l1;
+
+             /* Fix UDP length and set source port */
+             udp0 = (udp_header_t *)(ip4_0+1);
+             new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain(vm, b0)
+                                            - sizeof (*ip4_0));
+             udp0->length = new_l0;
+             udp0->src_port = flow_hash0;
+             udp1 = (udp_header_t *)(ip4_1+1);
+             new_l1 = clib_host_to_net_u16 (vlib_buffer_length_in_chain(vm, b1)
+                                            - sizeof (*ip4_1));
+             udp1->length = new_l1;
+             udp1->src_port = flow_hash1;
+           }
+         else /* ipv6 */
+           {
+              int bogus = 0;
+
+             /* IP6 VXLAN header should be 56 octects */
+              ASSERT(sizeof(ip6_vxlan_header_t) == 56);
+              ASSERT(vec_len(t0->rewrite) == sizeof(ip6_vxlan_header_t));
+             ASSERT(vec_len(t1->rewrite) == sizeof(ip6_vxlan_header_t));
+             ip6_0 = vlib_buffer_get_current(b0);
+             ip6_1 = vlib_buffer_get_current(b1);
+
+             /* Copy the fixed header */
+             copy_dst0 = (u64 *) ip6_0;
+             copy_src0 = (u64 *) t0->rewrite;
+             copy_dst1 = (u64 *) ip6_1;
+             copy_src1 = (u64 *) t1->rewrite;
+             /* Copy first 56 (ip6) octets 8-bytes at a time */
+#define _(offs) copy_dst0[offs] = copy_src0[offs];
+             foreach_fixed_header6_offset;
+#undef _
+#define _(offs) copy_dst1[offs] = copy_src1[offs];
+             foreach_fixed_header6_offset;
+#undef _
+             /* Fix IP6 payload length */
+             new_l0 =
+                clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)
+                                     - sizeof(*ip6_0));
+             ip6_0->payload_length = new_l0;
+             new_l1 =
+                clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1)
+                                     - sizeof(*ip6_1));
+             ip6_1->payload_length = new_l1;
+
+             /* Fix UDP length  and set source port */
+             udp0 = (udp_header_t *)(ip6_0+1);
+             udp0->length = new_l0;
+             udp0->src_port = flow_hash0;
+             udp1 = (udp_header_t *)(ip6_1+1);
+             udp1->length = new_l1;
+             udp1->src_port = flow_hash1;
+
+             /* IPv6 UDP checksum is mandatory */
+             udp0->checksum = ip6_tcp_udp_icmp_compute_checksum(vm, b0,
+                                                                ip6_0, &bogus);
+             ASSERT(bogus == 0);
+             if (udp0->checksum == 0)
+               udp0->checksum = 0xffff;
+             udp1->checksum = ip6_tcp_udp_icmp_compute_checksum(vm, b1,
                                                         ip6_1, &bogus);
-                ASSERT(bogus == 0);
-                if (udp1->checksum == 0)
-                    udp1->checksum = 0xffff;
-          }
-
-          /* Reset to look up tunnel partner in the configured FIB */
-          vnet_buffer(b0)->sw_if_index[VLIB_TX] = t0->encap_fib_index;
-          vnet_buffer(b1)->sw_if_index[VLIB_TX] = t1->encap_fib_index;
-          vnet_buffer(b0)->sw_if_index[VLIB_RX] = sw_if_index0;
-          vnet_buffer(b1)->sw_if_index[VLIB_RX] = sw_if_index1;
-          pkts_encapsulated += 2;
+             ASSERT(bogus == 0);
+             if (udp1->checksum == 0)
+               udp1->checksum = 0xffff;
+           }
 
+          pkts_encapsulated += 2;
          len0 = vlib_buffer_length_in_chain (vm, b0);
-         len1 = vlib_buffer_length_in_chain (vm, b0);
+         len1 = vlib_buffer_length_in_chain (vm, b1);
          stats_n_packets += 2;
          stats_n_bytes += len0 + len1;
 
@@ -367,18 +344,14 @@ vxlan_encap (vlib_main_t * vm,
          u32 bi0;
          vlib_buffer_t * b0;
           u32 flow_hash0;
-         u32 next0 = VXLAN_ENCAP_NEXT_IP4_LOOKUP;
-         u32 sw_if_index0, len0;
-          vnet_hw_interface_t * hi0;
+         u32 len0;
           ip4_header_t * ip4_0;
           ip6_header_t * ip6_0;
           udp_header_t * udp0;
           u64 * copy_src0, * copy_dst0;
           u32 * copy_src_last0, * copy_dst_last0;
-          vxlan_tunnel_t * t0;
           u16 new_l0;
           ip_csum_t sum0;
-          u8 is_ip4_0;
 
          bi0 = from[0];
          to_next[0] = bi0;
@@ -391,102 +364,91 @@ vxlan_encap (vlib_main_t * vm,
 
           flow_hash0 = vnet_l2_compute_flow_hash(b0);
 
-          /* 1-wide cache? */
-         sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_TX];
-          hi0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
-
-          t0 = &vxm->tunnels[hi0->dev_instance];
-
-          is_ip4_0 = (t0->flags & VXLAN_TUNNEL_IS_IPV4);
-
-          if (PREDICT_FALSE(!is_ip4_0)) next0 = VXLAN_ENCAP_NEXT_IP6_LOOKUP;
-
-         /* IP4 VXLAN header sizeof(ip4_vxlan_header_t) should be 36 octets */
-          /* IP6 VXLAN header sizeof(ip4_vxlan_header_t) should be 56 octets */
-         if (PREDICT_TRUE(is_ip4_0))
-            ASSERT(vec_len(t0->rewrite) == 36);
-          else
-            ASSERT(vec_len(t0->rewrite) == 56);
+         /* Get next node index and adj index from tunnel next_dpo */
+         if (sw_if_index0 != vnet_buffer(b0)->sw_if_index[VLIB_TX])
+           {
+             sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_TX];
+             hi0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
+             t0 = &vxm->tunnels[hi0->dev_instance];
+             /* Note: change to always set next0 if it may be set to drop */
+             next0 = t0->next_dpo.dpoi_next_node;
+           }
+         vnet_buffer(b0)->ip.adj_index[VLIB_TX] = t0->next_dpo.dpoi_index;
 
           /* Apply the rewrite string. $$$$ vnet_rewrite? */
           vlib_buffer_advance (b0, -(word)_vec_len(t0->rewrite));
 
-          /* assign both v4 and v6; avoid a branch, optimizer will help us */
-          ip4_0 = vlib_buffer_get_current(b0);
-          ip6_0 = (void *)ip4_0;
+         if (is_ip4)
+           {
+             /* IP4 VXLAN header should be 36 octects */
+              ASSERT(sizeof(ip4_vxlan_header_t) == 36);
+              ASSERT(vec_len(t0->rewrite) == sizeof(ip4_vxlan_header_t));
+             ip4_0 = vlib_buffer_get_current(b0);
+
+             /* Copy the fixed header */
+             copy_dst0 = (u64 *) ip4_0;
+             copy_src0 = (u64 *) t0->rewrite;
+             /* Copy first 32 octets 8-bytes at a time */
+#define _(offs) copy_dst0[offs] = copy_src0[offs];
+             foreach_fixed_header4_offset;
+#undef _
+             /* Last 4 octets. Hopefully gcc will be our friend */
+              copy_dst_last0 = (u32 *)(&copy_dst0[4]);
+              copy_src_last0 = (u32 *)(&copy_src0[4]);
+              copy_dst_last0[0] = copy_src_last0[0];
 
-          /* Copy the fixed header (v4 and v6 variables point to the same
-           * place at this point)
-           */
-          copy_dst0 = (u64 *) ip4_0;
-          copy_src0 = (u64 *) t0->rewrite;
+             /* Fix the IP4 checksum and length */
+             sum0 = ip4_0->checksum;
+             new_l0 = /* old_l0 always 0, see the rewrite setup */ 
+                clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0));
+              sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t,
+                                    length /* changed member */);
+             ip4_0->checksum = ip_csum_fold (sum0);
+             ip4_0->length = new_l0;
+
+             /* Fix UDP length and set source port */
+             udp0 = (udp_header_t *)(ip4_0+1);
+             new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain(vm, b0)
+                                            - sizeof (*ip4_0));
+             udp0->length = new_l0;
+             udp0->src_port = flow_hash0;
+           }
 
-          /* Copy first 32 octets 8-bytes at a time */
+         else /* ip6 path */
+           {
+              int bogus = 0;
+
+             /* IP6 VXLAN header should be 56 octects */
+              ASSERT(sizeof(ip6_vxlan_header_t) == 56);
+              ASSERT(vec_len(t0->rewrite) == sizeof(ip6_vxlan_header_t));
+             ip6_0 = vlib_buffer_get_current(b0);
+             /* Copy the fixed header */
+             copy_dst0 = (u64 *) ip6_0;
+             copy_src0 = (u64 *) t0->rewrite;
+             /* Copy first 56 (ip6) octets 8-bytes at a time */
 #define _(offs) copy_dst0[offs] = copy_src0[offs];
-          if (PREDICT_TRUE(is_ip4_0)) {
-            foreach_fixed_header4_offset;
-          } else {
-            foreach_fixed_header6_offset;
-          }
+             foreach_fixed_header6_offset;
 #undef _
-          if (PREDICT_TRUE(is_ip4_0)) {
-            /* Last 4 octets. Hopefully gcc will be our friend */
-            copy_dst_last0 = (u32 *)(&copy_dst0[4]);
-            copy_src_last0 = (u32 *)(&copy_src0[4]);
-          
-            copy_dst_last0[0] = copy_src_last0[0];
-          }
-
-          if (PREDICT_TRUE(is_ip4_0)) {
-            /* fix the <bleep>ing outer-IP checksum */
-            sum0 = ip4_0->checksum;
-
-            /* old_l0 always 0, see the rewrite setup */
-            new_l0 = 
-              clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0));
-              sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t,
-                                 length /* changed member */);
-            ip4_0->checksum = ip_csum_fold (sum0);
-            ip4_0->length = new_l0;
-          } else {
-            new_l0 =
-              clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)
-                                           - sizeof(*ip6_0));
-            ip6_0->payload_length = new_l0;
-          }
-          
-          /* Fix UDP length */
-          if (PREDICT_TRUE(is_ip4_0)) {
-            udp0 = (udp_header_t *)(ip4_0+1);
-            new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)
-                                           - sizeof (*ip4_0));
-          } else {
-            udp0 = (udp_header_t *)(ip6_0+1);
-            new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)
-                                           - sizeof (*ip6_0));
-          }
-          
-          udp0->length = new_l0;
-          udp0->src_port = flow_hash0;
-
-          if (PREDICT_FALSE(!is_ip4_0)) {
-                int bogus = 0;
-                /* IPv6 UDP checksum is mandatory */
-                udp0->checksum = ip6_tcp_udp_icmp_compute_checksum(vm, b0,
-                                                        ip6_0, &bogus);
-                ASSERT(bogus == 0);
-                if (udp0->checksum == 0)
-                    udp0->checksum = 0xffff;
-          }
-
-
-          /* vnet_update_l2_len (b0);  do we need this? cluke */
-
-          /* Reset to look up tunnel partner in the configured FIB */
-          vnet_buffer(b0)->sw_if_index[VLIB_TX] = t0->encap_fib_index;
-          vnet_buffer(b0)->sw_if_index[VLIB_RX] = sw_if_index0;
-          pkts_encapsulated ++;
+             /* Fix IP6 payload length */
+             new_l0 =
+                clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)
+                                     - sizeof(*ip6_0));
+             ip6_0->payload_length = new_l0;
+
+             /* Fix UDP length  and set source port */
+             udp0 = (udp_header_t *)(ip6_0+1);
+             udp0->length = new_l0;
+             udp0->src_port = flow_hash0;
+
+             /* IPv6 UDP checksum is mandatory */
+             udp0->checksum = ip6_tcp_udp_icmp_compute_checksum(vm, b0,
+                                                                ip6_0, &bogus);
+             ASSERT(bogus == 0);
+             if (udp0->checksum == 0)
+               udp0->checksum = 0xffff;
+           }
 
+          pkts_encapsulated ++;
          len0 = vlib_buffer_length_in_chain (vm, b0);
          stats_n_packets += 1;
          stats_n_bytes += len0;
@@ -541,24 +503,51 @@ vxlan_encap (vlib_main_t * vm,
   return from_frame->n_vectors;
 }
 
-VLIB_REGISTER_NODE (vxlan_encap_node) = {
-  .function = vxlan_encap,
-  .name = "vxlan-encap",
+static uword
+vxlan4_encap (vlib_main_t * vm,
+             vlib_node_runtime_t * node,
+             vlib_frame_t * from_frame)
+{
+  return vxlan_encap_inline (vm, node, from_frame, /* is_ip4 */ 1);
+}
+
+static uword
+vxlan6_encap (vlib_main_t * vm,
+             vlib_node_runtime_t * node,
+             vlib_frame_t * from_frame)
+{
+  return vxlan_encap_inline (vm, node, from_frame, /* is_ip4 */ 0);
+}
+
+VLIB_REGISTER_NODE (vxlan4_encap_node) = {
+  .function = vxlan4_encap,
+  .name = "vxlan4-encap",
   .vector_size = sizeof (u32),
   .format_trace = format_vxlan_encap_trace,
   .type = VLIB_NODE_TYPE_INTERNAL,
-
   .n_errors = ARRAY_LEN(vxlan_encap_error_strings),
   .error_strings = vxlan_encap_error_strings,
-
   .n_next_nodes = VXLAN_ENCAP_N_NEXT,
+  .next_nodes = {
+        [VXLAN_ENCAP_NEXT_DROP] = "error-drop",
+  },
+};
 
+VLIB_NODE_FUNCTION_MULTIARCH (vxlan4_encap_node, vxlan4_encap)
+
+VLIB_REGISTER_NODE (vxlan6_encap_node) = {
+  .function = vxlan6_encap,
+  .name = "vxlan6-encap",
+  .vector_size = sizeof (u32),
+  .format_trace = format_vxlan_encap_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+  .n_errors = ARRAY_LEN(vxlan_encap_error_strings),
+  .error_strings = vxlan_encap_error_strings,
+  .n_next_nodes = VXLAN_ENCAP_N_NEXT,
   .next_nodes = {
-        [VXLAN_ENCAP_NEXT_IP4_LOOKUP] = "ip4-lookup",
-        [VXLAN_ENCAP_NEXT_IP6_LOOKUP] = "ip6-lookup",
         [VXLAN_ENCAP_NEXT_DROP] = "error-drop",
   },
 };
 
-VLIB_NODE_FUNCTION_MULTIARCH (vxlan_encap_node, vxlan_encap)
+VLIB_NODE_FUNCTION_MULTIARCH (vxlan6_encap_node, vxlan6_encap)
 
index 9ec4c74..d37e9d6 100644 (file)
@@ -14,6 +14,8 @@
  */
 #include <vnet/vxlan/vxlan.h>
 #include <vnet/ip/format.h>
+#include <vnet/fib/fib_entry.h>
+#include <vnet/fib/fib_table.h>
 
 /**
  * @file
 
 vxlan_main_t vxlan_main;
 
-static u8 * format_decap_next (u8 * s, va_list * args)
-{
-  u32 next_index = va_arg (*args, u32);
-
-  switch (next_index)
-    {
-    case VXLAN_INPUT_NEXT_DROP:
-      return format (s, "drop");
-    case VXLAN_INPUT_NEXT_L2_INPUT:
-      return format (s, "l2");
-    case VXLAN_INPUT_NEXT_IP4_INPUT:
-      return format (s, "ip4");
-    case VXLAN_INPUT_NEXT_IP6_INPUT:
-      return format (s, "ip6");
-    default:
-      return format (s, "unknown %d", next_index);
-    }
-  return s;
-}
-
 u8 * format_vxlan_tunnel (u8 * s, va_list * args)
 {
   vxlan_tunnel_t * t = va_arg (*args, vxlan_tunnel_t *);
   vxlan_main_t * ngm = &vxlan_main;
 
   s = format (s, 
-              "[%d] %U (src) %U (dst) vni %d encap_fib_index %d",
+             "[%d] src %U dst %U vni %d encap_fib_index %d sw_if_index %d "
+             "fib_entry_index %d\n",
               t - ngm->tunnels,
               format_ip46_address, &t->src, IP46_TYPE_ANY,
               format_ip46_address, &t->dst, IP46_TYPE_ANY,
-              t->vni,
-              t->encap_fib_index);
-  s = format (s, " decap_next %U\n", format_decap_next, t->decap_next_index);
+              t->vni,  t->encap_fib_index, t->sw_if_index, t->fib_entry_index);
   return s;
 }
 
@@ -116,10 +97,85 @@ VNET_HW_INTERFACE_CLASS (vxlan_hw_class) = {
   .build_rewrite = default_build_rewrite,
 };
 
+
+static vxlan_tunnel_t *
+vxlan_tunnel_from_fib_node (fib_node_t *node)
+{
+#if (CLIB_DEBUG > 0)
+    ASSERT(FIB_NODE_TYPE_VXLAN_TUNNEL == node->fn_type);
+#endif
+    return ((vxlan_tunnel_t*) (((char*)node) -
+                              STRUCT_OFFSET_OF(vxlan_tunnel_t, node)));
+}
+
+/**
+ * Function definition to backwalk a FIB node -
+ * Here we will restack the new dpo of VXLAN DIP to encap node.
+ */
+static fib_node_back_walk_rc_t
+vxlan_tunnel_back_walk (fib_node_t *node,
+                       fib_node_back_walk_ctx_t *ctx)
+{
+    vxlan_tunnel_t *t = vxlan_tunnel_from_fib_node(node);
+    dpo_id_t dpo = DPO_INVALID;
+
+    if (ip46_address_is_ip4(&t->dst)) {
+       fib_entry_contribute_forwarding
+           (t->fib_entry_index, FIB_FORW_CHAIN_TYPE_UNICAST_IP4, &dpo);
+       dpo_stack_from_node
+           (vxlan4_encap_node.index, &t->next_dpo, &dpo);
+    } else {
+       fib_entry_contribute_forwarding
+           (t->fib_entry_index, FIB_FORW_CHAIN_TYPE_UNICAST_IP6, &dpo);
+       dpo_stack_from_node
+           (vxlan6_encap_node.index, &t->next_dpo, &dpo);
+    }
+    dpo_reset(&dpo);
+
+    return (FIB_NODE_BACK_WALK_CONTINUE);
+}
+
+/**
+ * Function definition to get a FIB node from its index
+ */
+static fib_node_t*
+vxlan_tunnel_fib_node_get (fib_node_index_t index)
+{
+    vxlan_tunnel_t * t;
+    vxlan_main_t * vxm = &vxlan_main;
+
+    t = pool_elt_at_index(vxm->tunnels, index);
+
+    return (&t->node);
+}
+
+/**
+ * Function definition to inform the FIB node that its last lock has gone.
+ */
+static void
+vxlan_tunnel_last_lock_gone (fib_node_t *node)
+{
+    /*
+     * The VXLAN tunnel is a root of the graph. As such
+     * it never has children and thus is never locked.
+     */
+    ASSERT(0);
+}
+
+/*
+ * Virtual function table registered by VXLAN tunnels
+ * for participation in the FIB object graph.
+ */
+const static fib_node_vft_t vxlan_vft = {
+    .fnv_get = vxlan_tunnel_fib_node_get,
+    .fnv_last_lock = vxlan_tunnel_last_lock_gone,
+    .fnv_back_walk = vxlan_tunnel_back_walk,
+};
+
+
 #define foreach_copy_field                      \
 _(vni)                                          \
-_(encap_fib_index)                              \
-_(decap_next_index)
+_(encap_fib_index)
 
 #define foreach_copy_ipv4 {                     \
   _(src.ip4.as_u32)                             \
@@ -205,8 +261,6 @@ int vnet_vxlan_add_del_tunnel
   vxlan_main_t * vxm = &vxlan_main;
   vxlan_tunnel_t *t = 0;
   vnet_main_t * vnm = vxm->vnet_main;
-  ip4_main_t * im4 = &ip4_main;
-  ip6_main_t * im6 = &ip6_main;
   vnet_hw_interface_t * hi;
   uword * p;
   u32 hw_if_index = ~0;
@@ -214,8 +268,9 @@ int vnet_vxlan_add_del_tunnel
   int rv;
   vxlan4_tunnel_key_t key4;
   vxlan6_tunnel_key_t key6;
+  u32 is_ip6 = a->is_ip6;
 
-  if (!a->is_ip6) {
+  if (!is_ip6) {
     key4.src = a->dst.ip4.as_u32; /* decap src in key is encap dst in config */
     key4.vni = clib_host_to_net_u32 (a->vni << 8);
   
@@ -230,28 +285,24 @@ int vnet_vxlan_add_del_tunnel
   
   if (a->is_add)
     {
+      l2input_main_t * l2im = &l2input_main;
+
       /* adding a tunnel: tunnel must not already exist */
       if (p)
         return VNET_API_ERROR_TUNNEL_EXIST;
 
-      if (a->decap_next_index == ~0)
-         a->decap_next_index = VXLAN_INPUT_NEXT_L2_INPUT;
-
-      if (a->decap_next_index >= VXLAN_INPUT_N_NEXT)
-        return VNET_API_ERROR_INVALID_DECAP_NEXT;
-      
       pool_get_aligned (vxm->tunnels, t, CLIB_CACHE_LINE_BYTES);
       memset (t, 0, sizeof (*t));
       
       /* copy from arg structure */
 #define _(x) t->x = a->x;
       foreach_copy_field;
-      if (!a->is_ip6) foreach_copy_ipv4
-      else            foreach_copy_ipv6
+      if (!is_ip6) foreach_copy_ipv4
+      else         foreach_copy_ipv6
 #undef _
       
       /* copy the key */
-      if (a->is_ip6)
+      if (is_ip6)
         {
           t->key6 = clib_mem_alloc (sizeof(vxlan6_tunnel_key_t));
           clib_memcpy (t->key6, &key6, sizeof(key6));
@@ -261,9 +312,7 @@ int vnet_vxlan_add_del_tunnel
           t->key4 = 0; /* not yet used */
         }
 
-      if (!a->is_ip6) t->flags |= VXLAN_TUNNEL_IS_IPV4;
-
-      if (!a->is_ip6) {
+      if (!is_ip6) {
         rv = vxlan4_rewrite (t);
       } else {
         rv = vxlan6_rewrite (t);
@@ -275,7 +324,7 @@ int vnet_vxlan_add_del_tunnel
           return rv;
         }
 
-      if (!a->is_ip6)
+      if (!is_ip6)
         hash_set (vxm->vxlan4_tunnel_by_key, key4.as_u64, t - vxm->tunnels);
       else
         hash_set_mem (vxm->vxlan6_tunnel_by_key, t->key6, t - vxm->tunnels);
@@ -308,7 +357,6 @@ int vnet_vxlan_add_del_tunnel
             (vnm, vxlan_device_class.index, t - vxm->tunnels,
              vxlan_hw_class.index, t - vxm->tunnels);
           hi = vnet_get_hw_interface (vnm, hw_if_index);
-          hi->output_node_index = vxlan_encap_node.index;
         }
       
       t->hw_if_index = hw_if_index;
@@ -317,26 +365,73 @@ int vnet_vxlan_add_del_tunnel
       vec_validate_init_empty (vxm->tunnel_index_by_sw_if_index, sw_if_index, ~0);
       vxm->tunnel_index_by_sw_if_index[sw_if_index] = t - vxm->tunnels;
 
-      if (a->decap_next_index == VXLAN_INPUT_NEXT_L2_INPUT)
-        {
-         l2input_main_t * l2im = &l2input_main;
-         /* setup l2 input config with l2 feature and bd 0 to drop packet */
-         vec_validate (l2im->configs, sw_if_index);
-         l2im->configs[sw_if_index].feature_bitmap = L2INPUT_FEAT_DROP;
-         l2im->configs[sw_if_index].bd_index = 0;
-       }
+      /* setup l2 input config with l2 feature and bd 0 to drop packet */
+      vec_validate (l2im->configs, sw_if_index);
+      l2im->configs[sw_if_index].feature_bitmap = L2INPUT_FEAT_DROP;
+      l2im->configs[sw_if_index].bd_index = 0;
       
       vnet_sw_interface_set_flags (vnm, sw_if_index, 
                                    VNET_SW_INTERFACE_FLAG_ADMIN_UP);
-      if (!a->is_ip6) {
-        vec_validate (im4->fib_index_by_sw_if_index, sw_if_index);
-        im4->fib_index_by_sw_if_index[sw_if_index] = t->encap_fib_index;
-        ip4_sw_interface_enable_disable(sw_if_index, 1);
-      } else {
-        vec_validate (im6->fib_index_by_sw_if_index, sw_if_index);
-        im6->fib_index_by_sw_if_index[sw_if_index] = t->encap_fib_index;
-        ip6_sw_interface_enable_disable(sw_if_index, 1);
-      }
+      /*
+       * source the FIB entry for the tunnel's destination
+       * and become a child thereof. The tunnel will then get poked
+       * when the forwarding for the entry updates, and the tunnel can
+       * re-stack accordingly
+       */
+      fib_node_init(&t->node, FIB_NODE_TYPE_VXLAN_TUNNEL);
+      if (!is_ip6)
+        {
+         dpo_id_t dpo = DPO_INVALID;
+         const fib_prefix_t tun_dst_pfx = 
+           {
+             .fp_len = 32,
+             .fp_proto = FIB_PROTOCOL_IP4,
+             .fp_addr = 
+               {
+                 .ip4 = t->dst.ip4,
+               }
+           };
+
+         t->fib_entry_index = fib_table_entry_special_add
+           (t->encap_fib_index, &tun_dst_pfx,
+            FIB_SOURCE_RR, FIB_ENTRY_FLAG_NONE, ADJ_INDEX_INVALID);
+         t->sibling_index = fib_entry_child_add
+           (t->fib_entry_index, FIB_NODE_TYPE_VXLAN_TUNNEL, t - vxm->tunnels);
+         fib_entry_contribute_forwarding
+           (t->fib_entry_index, FIB_FORW_CHAIN_TYPE_UNICAST_IP4, &dpo);
+         dpo_stack_from_node (vxlan4_encap_node.index, &t->next_dpo, &dpo);
+         dpo_reset(&dpo);
+
+         /* Set vxlan tunnel output node to ip4 version */
+         hi->output_node_index = vxlan4_encap_node.index;
+        }
+      else
+        {
+         dpo_id_t dpo = DPO_INVALID;
+         const fib_prefix_t tun_dst_pfx = 
+           {
+             .fp_len = 128,
+             .fp_proto = FIB_PROTOCOL_IP6,
+             .fp_addr = 
+               {
+                 .ip6 = t->dst.ip6,
+               }
+           };
+         t->fib_entry_index = fib_table_entry_special_add
+           (t->encap_fib_index, &tun_dst_pfx,
+            FIB_SOURCE_RR, FIB_ENTRY_FLAG_NONE, ADJ_INDEX_INVALID);
+         t->sibling_index = fib_entry_child_add
+           (t->fib_entry_index, FIB_NODE_TYPE_VXLAN_TUNNEL, t - vxm->tunnels);
+         fib_entry_contribute_forwarding
+           (t->fib_entry_index, FIB_FORW_CHAIN_TYPE_UNICAST_IP6, &dpo);
+         dpo_stack_from_node
+           (vxlan6_encap_node.index, &t->next_dpo, &dpo);
+         dpo_reset(&dpo);
+
+         /* Set vxlan tunnel output node to ip6 version */
+         hi->output_node_index = vxlan6_encap_node.index;
+        }
     }
   else
     {
@@ -353,16 +448,18 @@ int vnet_vxlan_add_del_tunnel
 
       vxm->tunnel_index_by_sw_if_index[t->sw_if_index] = ~0;
 
-      if (!a->is_ip6)
+      fib_entry_child_remove(t->fib_entry_index, t->sibling_index);
+      fib_table_entry_delete_index(t->fib_entry_index, FIB_SOURCE_RR);
+      fib_node_deinit(&t->node);
+
+      if (!is_ip6)
         {
           hash_unset (vxm->vxlan4_tunnel_by_key, key4.as_u64);
-          ip4_sw_interface_enable_disable(t->sw_if_index, 1);
        }
       else
         {
          hash_unset_mem (vxm->vxlan6_tunnel_by_key, t->key6);
          clib_mem_free (t->key6);
-          ip6_sw_interface_enable_disable(t->sw_if_index, 1);
        }
       vec_free (t->rewrite);
       pool_put (vxm->tunnels, t);
@@ -405,12 +502,6 @@ static uword unformat_decap_next (unformat_input_t * input, va_list * args)
   
   if (unformat (input, "l2"))
     *result = VXLAN_INPUT_NEXT_L2_INPUT;
-  else if (unformat (input, "drop"))
-    *result = VXLAN_INPUT_NEXT_DROP;
-  else if (unformat (input, "ip4"))
-    *result = VXLAN_INPUT_NEXT_IP4_INPUT;
-  else if (unformat (input, "ip6"))
-    *result = VXLAN_INPUT_NEXT_IP6_INPUT;
   else if (unformat (input, "%d", &tmp))
     *result = tmp;
   else
@@ -528,10 +619,9 @@ vxlan_add_del_tunnel_command_fn (vlib_main_t * vm,
     {
     case 0:
       if (is_add)
-        vlib_cli_output(vm, "%U\n", format_vnet_sw_if_index_name, vnet_get_main(), sw_if_index);
+        vlib_cli_output(vm, "%U\n", format_vnet_sw_if_index_name, 
+                       vnet_get_main(), sw_if_index);
       break;
-    case VNET_API_ERROR_INVALID_DECAP_NEXT:
-      return clib_error_return (0, "invalid decap-next...");
 
     case VNET_API_ERROR_TUNNEL_EXIST:
       return clib_error_return (0, "tunnel already exists...");
@@ -563,7 +653,7 @@ vxlan_add_del_tunnel_command_fn (vlib_main_t * vm,
  *
  * @cliexpar
  * Example of how to create a VXLAN Tunnel:
- * @cliexcmd{create vxlan tunnel src 10.0.3.1 dst 10.0.3.3 vni 13 encap-vrf-id 7 decap-next l2}
+ * @cliexcmd{create vxlan tunnel src 10.0.3.1 dst 10.0.3.3 vni 13 encap-vrf-id 7}
  * Example of how to delete a VXLAN Tunnel:
  * @cliexcmd{create vxlan tunnel src 10.0.3.1 dst 10.0.3.3 vni 13 del}
  ?*/
@@ -572,7 +662,7 @@ VLIB_CLI_COMMAND (create_vxlan_tunnel_command, static) = {
   .path = "create vxlan tunnel",
   .short_help = 
   "create vxlan tunnel src <local-vtep-addr> dst <remote-vtep-addr> vni <nn>" 
-  " [encap-vrf-id <nn>] [decap-next [l2|ip4|ip6]] [del]",
+  " [encap-vrf-id <nn>]",
   .function = vxlan_add_del_tunnel_command_fn,
 };
 /* *INDENT-ON* */
@@ -602,7 +692,7 @@ show_vxlan_tunnel_command_fn (vlib_main_t * vm,
  * @cliexpar
  * Example of how to display the VXLAN Tunnel entries:
  * @cliexstart{show vxlan tunnel}
- * [0] 10.0.3.1 (src) 10.0.3.3 (dst) vni 13 encap_fib_index 1 decap_next l2
+ * [0] src 10.0.3.1 dst 10.0.3.3 vni 13 encap_fib_index 0 sw_if_index 5 
  * @cliexend
  ?*/
 /* *INDENT-OFF* */
@@ -630,6 +720,9 @@ clib_error_t *vxlan_init (vlib_main_t *vm)
                          vxlan4_input_node.index, /* is_ip4 */ 1);
   udp_register_dst_port (vm, UDP_DST_PORT_vxlan6,
                          vxlan6_input_node.index, /* is_ip4 */ 0);
+
+  fib_node_register_type(FIB_NODE_TYPE_VXLAN_TUNNEL, &vxlan_vft);
+
   return 0;
 }
 
index 703741a..e37f09a 100644 (file)
@@ -27,6 +27,7 @@
 #include <vnet/ip/ip4_packet.h>
 #include <vnet/ip/ip6_packet.h>
 #include <vnet/ip/udp.h>
+#include <vnet/dpo/dpo.h>
 
 typedef CLIB_PACKED (struct {
   ip4_header_t ip4;            /* 20 bytes */
@@ -67,40 +68,49 @@ typedef struct {
   /* Rewrite string. $$$$ embed vnet_rewrite header */
   u8 * rewrite;
 
-  /* tunnel src and dst addresses */
-  ip46_address_t src;
-  ip46_address_t dst;
+  /* FIB DPO for IP forwarding of VXLAN encap packet */
+  dpo_id_t next_dpo;  
+
+  /* storage for the hash key */
+  union {
+    vxlan4_tunnel_key_t *key4; /* unused for now */
+    vxlan6_tunnel_key_t *key6;
+  };
 
   /* vxlan VNI in HOST byte order */
   u32 vni;
 
-  /* decap next index */
-  u32 decap_next_index;
+  /* tunnel src and dst addresses */
+  ip46_address_t src;
+  ip46_address_t dst;
 
-  /* L3 FIB index and L2 BD ID */
-  u16 encap_fib_index;          /* tunnel partner IP lookup here */
+  /* The FIB index for src/dst addresses */
+  u32 encap_fib_index;
 
-  /* vnet intfc hw/sw_if_index */
-  u16 hw_if_index;
+  /* vnet intfc index */
   u32 sw_if_index;
+  u32 hw_if_index;
 
-  union { /* storage for the hash key */
-    vxlan4_tunnel_key_t *key4;
-    vxlan6_tunnel_key_t *key6;
-  };
+  /**
+   * Linkage into the FIB object graph
+   */
+  fib_node_t node;
 
-  /* flags */
-  u32 flags;
-} vxlan_tunnel_t;
+  /* The FIB entry sourced by the tunnel for its destination prefix */
+  fib_node_index_t fib_entry_index;
 
-/* Flags for vxlan_tunnel_t.flags */
-#define VXLAN_TUNNEL_IS_IPV4   1
+  /**
+   * The tunnel is a child of the FIB entry for its desintion. This is
+   * so it receives updates when the forwarding information for that entry
+   * changes.
+   * The tunnels sibling index on the FIB entry's dependency list.
+   */
+  u32 sibling_index;
+} vxlan_tunnel_t;
 
 #define foreach_vxlan_input_next        \
 _(DROP, "error-drop")                   \
-_(L2_INPUT, "l2-input")                 \
-_(IP4_INPUT, "ip4-input")               \
-_(IP6_INPUT, "ip6-input")
+_(L2_INPUT, "l2-input")
 
 typedef enum {
 #define _(s,n) VXLAN_INPUT_NEXT_##s,
@@ -139,7 +149,8 @@ vxlan_main_t vxlan_main;
 
 extern vlib_node_registration_t vxlan4_input_node;
 extern vlib_node_registration_t vxlan6_input_node;
-extern vlib_node_registration_t vxlan_encap_node;
+extern vlib_node_registration_t vxlan4_encap_node;
+extern vlib_node_registration_t vxlan6_encap_node;
 
 u8 * format_vxlan_encap_trace (u8 * s, va_list * args);
 
index 3ead986..17f9059 100644 (file)
@@ -14,3 +14,4 @@
  */
 vxlan_error (DECAPSULATED, "good packets decapsulated")
 vxlan_error (NO_SUCH_TUNNEL, "no such tunnel packets")
+vxlan_error (BAD_FLAGS, "packets with bad flags field in vxlan header")
index 8a9a3b8..5f93a36 100644 (file)
  */
 
 typedef struct {
-  u32 flags;
+  u8 flags;
+  u8 res1;
+  u8 res2;
+  u8 res3;
   u32 vni_reserved;
 } vxlan_header_t;
 
-#define VXLAN_FLAGS_VALID_HOST_BYTE_ORDER (1<<27)
-#define VXLAN_FLAGS_VALID_NET_BYTE_ORDER (clib_host_to_net_u32(1<<27))
+#define VXLAN_FLAGS_I 0x08
 
 static inline u32 vnet_get_vni (vxlan_header_t * h)
 {
@@ -60,7 +62,8 @@ static inline u32 vnet_get_vni (vxlan_header_t * h)
 static inline void vnet_set_vni_and_flags (vxlan_header_t * h, u32 vni)
 {
   h->vni_reserved = clib_host_to_net_u32 (vni<<8);
-  h->flags = VXLAN_FLAGS_VALID_NET_BYTE_ORDER;
+  * (u32 *) h = 0;
+  h->flags = VXLAN_FLAGS_I;
 }
 
 #endif /* __included_vxlan_packet_h__ */
index fb3b2d3..08c1079 100644 (file)
@@ -10066,13 +10066,7 @@ static uword unformat_vxlan_decap_next
   u32 *result = va_arg (*args, u32 *);
   u32 tmp;
 
-  if (unformat (input, "drop"))
-    *result = VXLAN_INPUT_NEXT_DROP;
-  else if (unformat (input, "ip4"))
-    *result = VXLAN_INPUT_NEXT_IP4_INPUT;
-  else if (unformat (input, "ip6"))
-    *result = VXLAN_INPUT_NEXT_IP6_INPUT;
-  else if (unformat (input, "l2"))
+  if (unformat (input, "l2"))
     *result = VXLAN_INPUT_NEXT_L2_INPUT;
   else if (unformat (input, "%d", &tmp))
     *result = tmp;
index 6e0f27e..e606673 100644 (file)
@@ -4925,7 +4925,7 @@ static void send_vxlan_tunnel_details
   vl_api_vxlan_tunnel_details_t *rmp;
   ip4_main_t *im4 = &ip4_main;
   ip6_main_t *im6 = &ip6_main;
-  u8 is_ipv6 = !(t->flags & VXLAN_TUNNEL_IS_IPV4);
+  u8 is_ipv6 = !ip46_address_is_ip4 (&t->dst);
 
   rmp = vl_msg_api_alloc (sizeof (*rmp));
   memset (rmp, 0, sizeof (*rmp));
@@ -4943,7 +4943,8 @@ static void send_vxlan_tunnel_details
       rmp->encap_vrf_id = htonl (im4->fibs[t->encap_fib_index].ft_table_id);
     }
   rmp->vni = htonl (t->vni);
-  rmp->decap_next_index = htonl (t->decap_next_index);
+  /* decap_next_index is deprecated, hard code to l2-input */
+  rmp->decap_next_index = htonl (VXLAN_INPUT_NEXT_L2_INPUT);
   rmp->sw_if_index = htonl (t->sw_if_index);
   rmp->is_ipv6 = is_ipv6;
   rmp->context = context;