VXLAN tunnel encap forwarding optimization with FIB 2.0
[vpp.git] / vnet / vnet / vxlan / encap.c
index e7d49b0..5b63064 100644 (file)
@@ -37,8 +37,6 @@ typedef enum {
 } vxlan_encap_error_t;
 
 typedef enum {
-    VXLAN_ENCAP_NEXT_IP4_LOOKUP,
-    VXLAN_ENCAP_NEXT_IP6_LOOKUP,
     VXLAN_ENCAP_NEXT_DROP,
     VXLAN_ENCAP_N_NEXT,
 } vxlan_encap_next_t;
@@ -55,7 +53,8 @@ u8 * format_vxlan_encap_trace (u8 * s, va_list * args)
   vxlan_encap_trace_t * t 
       = va_arg (*args, vxlan_encap_trace_t *);
 
-  s = format (s, "VXLAN-ENCAP: tunnel %d vni %d", t->tunnel_index, t->vni);
+  s = format (s, "VXLAN encap to vxlan_tunnel%d vni %d", 
+             t->tunnel_index, t->vni);
   return s;
 }
 
@@ -66,10 +65,11 @@ u8 * format_vxlan_encap_trace (u8 * s, va_list * args)
 #define foreach_fixed_header6_offset            \
     _(0) _(1) _(2) _(3) _(4) _(5) _(6)
 
-static uword
-vxlan_encap (vlib_main_t * vm,
-               vlib_node_runtime_t * node,
-               vlib_frame_t * from_frame)
+always_inline uword
+vxlan_encap_inline (vlib_main_t * vm,
+                   vlib_node_runtime_t * node,
+                   vlib_frame_t * from_frame,
+                   u32 is_ip4)
 {
   u32 n_left_from, next_index, * from, * to_next;
   vxlan_main_t * vxm = &vxlan_main;
@@ -79,6 +79,10 @@ vxlan_encap (vlib_main_t * vm,
   u16 old_l0 = 0, old_l1 = 0;
   u32 cpu_index = os_get_cpu_number();
   u32 stats_sw_if_index, stats_n_packets, stats_n_bytes;
+  u32 sw_if_index0 = 0, sw_if_index1 = 0;
+  u32 next0 = 0, next1 = 0;
+  vnet_hw_interface_t * hi0, * hi1;
+  vxlan_tunnel_t * t0 = NULL, * t1 = NULL;
 
   from = vlib_frame_vector_args (from_frame);
   n_left_from = from_frame->n_vectors;
@@ -99,10 +103,7 @@ vxlan_encap (vlib_main_t * vm,
           u32 bi0, bi1;
          vlib_buffer_t * b0, * b1;
           u32 flow_hash0, flow_hash1;
-         u32 next0 = VXLAN_ENCAP_NEXT_IP4_LOOKUP;
-          u32 next1 = VXLAN_ENCAP_NEXT_IP4_LOOKUP;
-         u32 sw_if_index0, sw_if_index1, len0, len1;
-          vnet_hw_interface_t * hi0, * hi1;
+         u32 len0, len1;
           ip4_header_t * ip4_0, * ip4_1;
           ip6_header_t * ip6_0, * ip6_1;
           udp_header_t * udp0, * udp1;
@@ -110,10 +111,8 @@ vxlan_encap (vlib_main_t * vm,
           u64 * copy_src1, * copy_dst1;
           u32 * copy_src_last0, * copy_dst_last0;
           u32 * copy_src_last1, * copy_dst_last1;
-          vxlan_tunnel_t * t0, * t1;
           u16 new_l0, new_l1;
           ip_csum_t sum0, sum1;
-          u8 is_ip4_0, is_ip4_1;
 
          /* Prefetch next iteration. */
          {
@@ -144,169 +143,147 @@ vxlan_encap (vlib_main_t * vm,
           flow_hash0 = vnet_l2_compute_flow_hash (b0);
           flow_hash1 = vnet_l2_compute_flow_hash (b1);
 
-          /* 1-wide cache? */
-         sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_TX];
-         sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_TX];
-          hi0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
-         hi1 = vnet_get_sup_hw_interface (vnm, sw_if_index1); 
-
-          t0 = &vxm->tunnels[hi0->dev_instance];
-          t1 = &vxm->tunnels[hi1->dev_instance];
-
-          is_ip4_0 = (t0->flags & VXLAN_TUNNEL_IS_IPV4);
-          is_ip4_1 = (t1->flags & VXLAN_TUNNEL_IS_IPV4);
-
-          if (PREDICT_FALSE(!is_ip4_0)) next0 = VXLAN_ENCAP_NEXT_IP6_LOOKUP;
-          if (PREDICT_FALSE(!is_ip4_1)) next1 = VXLAN_ENCAP_NEXT_IP6_LOOKUP;
+         /* Get next node index and adj index from tunnel next_dpo */
+         if (sw_if_index0 != vnet_buffer(b0)->sw_if_index[VLIB_TX])
+           {
+             sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_TX];
+             hi0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
+             t0 = &vxm->tunnels[hi0->dev_instance];
+             /* Note: change to always set next0 if it may be set to drop */
+             next0 = t0->next_dpo.dpoi_next_node;
+           }
+          vnet_buffer(b0)->ip.adj_index[VLIB_TX] = t0->next_dpo.dpoi_index;
 
-         /* IP4 VXLAN header sizeof(ip4_vxlan_header_t) should be 36 octects */
-          /* IP6 VXLAN header sizeof(ip6_vxlan_header_t) should be 56 octects */
-         if (PREDICT_TRUE(is_ip4_0))
-            ASSERT(vec_len(t0->rewrite) == 36);
-          else
-            ASSERT(vec_len(t0->rewrite) == 56);
-          if (PREDICT_TRUE(is_ip4_1))
-            ASSERT(vec_len(t1->rewrite) == 36);
-          else
-            ASSERT(vec_len(t1->rewrite) == 56);
+         /* Get next node index and adj index from tunnel next_dpo */
+         if (sw_if_index1 != vnet_buffer(b1)->sw_if_index[VLIB_TX])
+           {
+             sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_TX];
+             hi1 = vnet_get_sup_hw_interface (vnm, sw_if_index1); 
+             t1 = &vxm->tunnels[hi1->dev_instance];
+             /* Note: change to always set next1 if it may be set to drop */
+             next1 = t1->next_dpo.dpoi_next_node;
+           }
+          vnet_buffer(b1)->ip.adj_index[VLIB_TX] = t1->next_dpo.dpoi_index;
 
           /* Apply the rewrite string. $$$$ vnet_rewrite? */
           vlib_buffer_advance (b0, -(word)_vec_len(t0->rewrite));
           vlib_buffer_advance (b1, -(word)_vec_len(t1->rewrite));
 
-          /* assign both v4 and v6; avoid a branch, optimizer will help us */
-          ip4_0 = vlib_buffer_get_current(b0);
-          ip6_0 = (void *)ip4_0;
-          ip4_1 = vlib_buffer_get_current(b1);
-          ip6_1 = (void *)ip4_1;
-
-          /* Copy the fixed header (v4 and v6 variables point to the same
-           * place at this point)
-           */
-          copy_dst0 = (u64 *) ip4_0;
-          copy_src0 = (u64 *) t0->rewrite;
-
-          copy_dst1 = (u64 *) ip4_1;
-          copy_src1 = (u64 *) t1->rewrite;
-
-          /* Copy first 32 (ip4)/56 (ip6) octets 8-bytes at a time */
+         if (is_ip4)
+           {
+             /* IP4 VXLAN header should be 36 octects */
+              ASSERT(sizeof(ip4_vxlan_header_t) == 36);
+              ASSERT(vec_len(t0->rewrite) == sizeof(ip4_vxlan_header_t));
+             ASSERT(vec_len(t1->rewrite) == sizeof(ip4_vxlan_header_t));
+
+             ip4_0 = vlib_buffer_get_current(b0);
+             ip4_1 = vlib_buffer_get_current(b1);
+
+             /* Copy the fixed header */
+             copy_dst0 = (u64 *) ip4_0;
+             copy_src0 = (u64 *) t0->rewrite;
+             copy_dst1 = (u64 *) ip4_1;
+             copy_src1 = (u64 *) t1->rewrite;
+             /* Copy first 32 octets 8-bytes at a time */
 #define _(offs) copy_dst0[offs] = copy_src0[offs];
-          if (PREDICT_TRUE(is_ip4_0)) {
-            foreach_fixed_header4_offset;
-          } else {
-            foreach_fixed_header6_offset;
-          }
+             foreach_fixed_header4_offset;
 #undef _
 #define _(offs) copy_dst1[offs] = copy_src1[offs];
-          if (PREDICT_TRUE(is_ip4_1)) {
-            foreach_fixed_header4_offset;
-          } else {
-            foreach_fixed_header6_offset;
-          }
+             foreach_fixed_header4_offset;
 #undef _
-          /* Last 4 octets. Hopefully gcc will be our friend */
-          if (PREDICT_TRUE(is_ip4_0)) {
+             /* Last 4 octets. Hopefully gcc will be our friend */
               copy_dst_last0 = (u32 *)(&copy_dst0[4]);
               copy_src_last0 = (u32 *)(&copy_src0[4]);
               copy_dst_last0[0] = copy_src_last0[0];
-          }
-          if (PREDICT_TRUE(is_ip4_1)) {
               copy_dst_last1 = (u32 *)(&copy_dst1[4]);
               copy_src_last1 = (u32 *)(&copy_src1[4]);
               copy_dst_last1[0] = copy_src_last1[0];
-          }
 
-          if (PREDICT_TRUE(is_ip4_0)) {
-            /* fix the <bleep>ing outer-IP checksum */
-            sum0 = ip4_0->checksum;
-
-            /* old_l0 always 0, see the rewrite setup */
-            new_l0 = 
-              clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0));
+             /* Fix the IP4 checksum and length */
+             sum0 = ip4_0->checksum;
+             new_l0 = /* old_l0 always 0, see the rewrite setup */ 
+                clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0));
               sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t,
-                                   length /* changed member */);
-            ip4_0->checksum = ip_csum_fold (sum0);
-            ip4_0->length = new_l0;
-          } else {
-            new_l0 =
-              clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)
-                                           - sizeof(*ip6_0));
-            ip6_0->payload_length = new_l0;
-          }
-
-          if (PREDICT_TRUE(is_ip4_1)) {
-            /* fix the <bleep>ing outer-IP checksum */
-            sum1 = ip4_1->checksum;
-
-            /* old_l1 always 0, see the rewrite setup */
-            new_l1 = 
-              clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1));
+                                    length /* changed member */);
+             ip4_0->checksum = ip_csum_fold (sum0);
+             ip4_0->length = new_l0;
+             sum1 = ip4_1->checksum;
+             new_l1 = /* old_l1 always 0, see the rewrite setup */
+                clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1));
               sum1 = ip_csum_update (sum1, old_l1, new_l1, ip4_header_t,
-                                   length /* changed member */);
-            ip4_1->checksum = ip_csum_fold (sum1);
-            ip4_1->length = new_l1;
-          } else {
-            new_l1 =
-              clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1)
-                                           - sizeof(*ip6_1));
-            ip6_1->payload_length = new_l1;
-          }
-          
-          /* Fix UDP length */
-          if (PREDICT_TRUE(is_ip4_0)) {
-            udp0 = (udp_header_t *)(ip4_0+1);
-            new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)
-                                           - sizeof (*ip4_0));
-          } else {
-            udp0 = (udp_header_t *)(ip6_0+1);
-            new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)
-                                           - sizeof (*ip6_0));
-          }
-          if (PREDICT_TRUE(is_ip4_1)) {
-            udp1 = (udp_header_t *)(ip4_1+1);
-            new_l1 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1)
-                                           - sizeof (*ip4_1));
-          } else {
-            udp1 = (udp_header_t *)(ip6_1+1);
-            new_l1 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1)
-                                           - sizeof (*ip6_1));
-          }
-          
-          udp0->length = new_l0;
-          udp0->src_port = flow_hash0;
-
-          udp1->length = new_l1;
-          udp1->src_port = flow_hash1;
-
-          if (PREDICT_FALSE(!is_ip4_0)) {
-                int bogus = 0;
-                /* IPv6 UDP checksum is mandatory */
-                udp0->checksum = ip6_tcp_udp_icmp_compute_checksum(vm, b0,
-                                                        ip6_0, &bogus);
-                ASSERT(bogus == 0);
-                if (udp0->checksum == 0)
-                    udp0->checksum = 0xffff;
-          }
-
-          if (PREDICT_FALSE(!is_ip4_1)) {
-                int bogus = 0;
-                /* IPv6 UDP checksum is mandatory */
-                udp1->checksum = ip6_tcp_udp_icmp_compute_checksum(vm, b1,
+                                    length /* changed member */);
+             ip4_1->checksum = ip_csum_fold (sum1);
+             ip4_1->length = new_l1;
+
+             /* Fix UDP length and set source port */
+             udp0 = (udp_header_t *)(ip4_0+1);
+             new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain(vm, b0)
+                                            - sizeof (*ip4_0));
+             udp0->length = new_l0;
+             udp0->src_port = flow_hash0;
+             udp1 = (udp_header_t *)(ip4_1+1);
+             new_l1 = clib_host_to_net_u16 (vlib_buffer_length_in_chain(vm, b1)
+                                            - sizeof (*ip4_1));
+             udp1->length = new_l1;
+             udp1->src_port = flow_hash1;
+           }
+         else /* ipv6 */
+           {
+              int bogus = 0;
+
+             /* IP6 VXLAN header should be 56 octects */
+              ASSERT(sizeof(ip6_vxlan_header_t) == 56);
+              ASSERT(vec_len(t0->rewrite) == sizeof(ip6_vxlan_header_t));
+             ASSERT(vec_len(t1->rewrite) == sizeof(ip6_vxlan_header_t));
+             ip6_0 = vlib_buffer_get_current(b0);
+             ip6_1 = vlib_buffer_get_current(b1);
+
+             /* Copy the fixed header */
+             copy_dst0 = (u64 *) ip6_0;
+             copy_src0 = (u64 *) t0->rewrite;
+             copy_dst1 = (u64 *) ip6_1;
+             copy_src1 = (u64 *) t1->rewrite;
+             /* Copy first 56 (ip6) octets 8-bytes at a time */
+#define _(offs) copy_dst0[offs] = copy_src0[offs];
+             foreach_fixed_header6_offset;
+#undef _
+#define _(offs) copy_dst1[offs] = copy_src1[offs];
+             foreach_fixed_header6_offset;
+#undef _
+             /* Fix IP6 payload length */
+             new_l0 =
+                clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)
+                                     - sizeof(*ip6_0));
+             ip6_0->payload_length = new_l0;
+             new_l1 =
+                clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1)
+                                     - sizeof(*ip6_1));
+             ip6_1->payload_length = new_l1;
+
+             /* Fix UDP length  and set source port */
+             udp0 = (udp_header_t *)(ip6_0+1);
+             udp0->length = new_l0;
+             udp0->src_port = flow_hash0;
+             udp1 = (udp_header_t *)(ip6_1+1);
+             udp1->length = new_l1;
+             udp1->src_port = flow_hash1;
+
+             /* IPv6 UDP checksum is mandatory */
+             udp0->checksum = ip6_tcp_udp_icmp_compute_checksum(vm, b0,
+                                                                ip6_0, &bogus);
+             ASSERT(bogus == 0);
+             if (udp0->checksum == 0)
+               udp0->checksum = 0xffff;
+             udp1->checksum = ip6_tcp_udp_icmp_compute_checksum(vm, b1,
                                                         ip6_1, &bogus);
-                ASSERT(bogus == 0);
-                if (udp1->checksum == 0)
-                    udp1->checksum = 0xffff;
-          }
-
-          /* Reset to look up tunnel partner in the configured FIB */
-          vnet_buffer(b0)->sw_if_index[VLIB_TX] = t0->encap_fib_index;
-          vnet_buffer(b1)->sw_if_index[VLIB_TX] = t1->encap_fib_index;
-          vnet_buffer(b0)->sw_if_index[VLIB_RX] = sw_if_index0;
-          vnet_buffer(b1)->sw_if_index[VLIB_RX] = sw_if_index1;
-          pkts_encapsulated += 2;
+             ASSERT(bogus == 0);
+             if (udp1->checksum == 0)
+               udp1->checksum = 0xffff;
+           }
 
+          pkts_encapsulated += 2;
          len0 = vlib_buffer_length_in_chain (vm, b0);
-         len1 = vlib_buffer_length_in_chain (vm, b0);
+         len1 = vlib_buffer_length_in_chain (vm, b1);
          stats_n_packets += 2;
          stats_n_bytes += len0 + len1;
 
@@ -367,18 +344,14 @@ vxlan_encap (vlib_main_t * vm,
          u32 bi0;
          vlib_buffer_t * b0;
           u32 flow_hash0;
-         u32 next0 = VXLAN_ENCAP_NEXT_IP4_LOOKUP;
-         u32 sw_if_index0, len0;
-          vnet_hw_interface_t * hi0;
+         u32 len0;
           ip4_header_t * ip4_0;
           ip6_header_t * ip6_0;
           udp_header_t * udp0;
           u64 * copy_src0, * copy_dst0;
           u32 * copy_src_last0, * copy_dst_last0;
-          vxlan_tunnel_t * t0;
           u16 new_l0;
           ip_csum_t sum0;
-          u8 is_ip4_0;
 
          bi0 = from[0];
          to_next[0] = bi0;
@@ -391,102 +364,91 @@ vxlan_encap (vlib_main_t * vm,
 
           flow_hash0 = vnet_l2_compute_flow_hash(b0);
 
-          /* 1-wide cache? */
-         sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_TX];
-          hi0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
-
-          t0 = &vxm->tunnels[hi0->dev_instance];
-
-          is_ip4_0 = (t0->flags & VXLAN_TUNNEL_IS_IPV4);
-
-          if (PREDICT_FALSE(!is_ip4_0)) next0 = VXLAN_ENCAP_NEXT_IP6_LOOKUP;
-
-         /* IP4 VXLAN header sizeof(ip4_vxlan_header_t) should be 36 octets */
-          /* IP6 VXLAN header sizeof(ip4_vxlan_header_t) should be 56 octets */
-         if (PREDICT_TRUE(is_ip4_0))
-            ASSERT(vec_len(t0->rewrite) == 36);
-          else
-            ASSERT(vec_len(t0->rewrite) == 56);
+         /* Get next node index and adj index from tunnel next_dpo */
+         if (sw_if_index0 != vnet_buffer(b0)->sw_if_index[VLIB_TX])
+           {
+             sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_TX];
+             hi0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
+             t0 = &vxm->tunnels[hi0->dev_instance];
+             /* Note: change to always set next0 if it may be set to drop */
+             next0 = t0->next_dpo.dpoi_next_node;
+           }
+         vnet_buffer(b0)->ip.adj_index[VLIB_TX] = t0->next_dpo.dpoi_index;
 
           /* Apply the rewrite string. $$$$ vnet_rewrite? */
           vlib_buffer_advance (b0, -(word)_vec_len(t0->rewrite));
 
-          /* assign both v4 and v6; avoid a branch, optimizer will help us */
-          ip4_0 = vlib_buffer_get_current(b0);
-          ip6_0 = (void *)ip4_0;
+         if (is_ip4)
+           {
+             /* IP4 VXLAN header should be 36 octects */
+              ASSERT(sizeof(ip4_vxlan_header_t) == 36);
+              ASSERT(vec_len(t0->rewrite) == sizeof(ip4_vxlan_header_t));
+             ip4_0 = vlib_buffer_get_current(b0);
+
+             /* Copy the fixed header */
+             copy_dst0 = (u64 *) ip4_0;
+             copy_src0 = (u64 *) t0->rewrite;
+             /* Copy first 32 octets 8-bytes at a time */
+#define _(offs) copy_dst0[offs] = copy_src0[offs];
+             foreach_fixed_header4_offset;
+#undef _
+             /* Last 4 octets. Hopefully gcc will be our friend */
+              copy_dst_last0 = (u32 *)(&copy_dst0[4]);
+              copy_src_last0 = (u32 *)(&copy_src0[4]);
+              copy_dst_last0[0] = copy_src_last0[0];
 
-          /* Copy the fixed header (v4 and v6 variables point to the same
-           * place at this point)
-           */
-          copy_dst0 = (u64 *) ip4_0;
-          copy_src0 = (u64 *) t0->rewrite;
+             /* Fix the IP4 checksum and length */
+             sum0 = ip4_0->checksum;
+             new_l0 = /* old_l0 always 0, see the rewrite setup */ 
+                clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0));
+              sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t,
+                                    length /* changed member */);
+             ip4_0->checksum = ip_csum_fold (sum0);
+             ip4_0->length = new_l0;
+
+             /* Fix UDP length and set source port */
+             udp0 = (udp_header_t *)(ip4_0+1);
+             new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain(vm, b0)
+                                            - sizeof (*ip4_0));
+             udp0->length = new_l0;
+             udp0->src_port = flow_hash0;
+           }
 
-          /* Copy first 32 octets 8-bytes at a time */
+         else /* ip6 path */
+           {
+              int bogus = 0;
+
+             /* IP6 VXLAN header should be 56 octects */
+              ASSERT(sizeof(ip6_vxlan_header_t) == 56);
+              ASSERT(vec_len(t0->rewrite) == sizeof(ip6_vxlan_header_t));
+             ip6_0 = vlib_buffer_get_current(b0);
+             /* Copy the fixed header */
+             copy_dst0 = (u64 *) ip6_0;
+             copy_src0 = (u64 *) t0->rewrite;
+             /* Copy first 56 (ip6) octets 8-bytes at a time */
 #define _(offs) copy_dst0[offs] = copy_src0[offs];
-          if (PREDICT_TRUE(is_ip4_0)) {
-            foreach_fixed_header4_offset;
-          } else {
-            foreach_fixed_header6_offset;
-          }
+             foreach_fixed_header6_offset;
 #undef _
-          if (PREDICT_TRUE(is_ip4_0)) {
-            /* Last 4 octets. Hopefully gcc will be our friend */
-            copy_dst_last0 = (u32 *)(&copy_dst0[4]);
-            copy_src_last0 = (u32 *)(&copy_src0[4]);
-          
-            copy_dst_last0[0] = copy_src_last0[0];
-          }
-
-          if (PREDICT_TRUE(is_ip4_0)) {
-            /* fix the <bleep>ing outer-IP checksum */
-            sum0 = ip4_0->checksum;
-
-            /* old_l0 always 0, see the rewrite setup */
-            new_l0 = 
-              clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0));
-              sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t,
-                                 length /* changed member */);
-            ip4_0->checksum = ip_csum_fold (sum0);
-            ip4_0->length = new_l0;
-          } else {
-            new_l0 =
-              clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)
-                                           - sizeof(*ip6_0));
-            ip6_0->payload_length = new_l0;
-          }
-          
-          /* Fix UDP length */
-          if (PREDICT_TRUE(is_ip4_0)) {
-            udp0 = (udp_header_t *)(ip4_0+1);
-            new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)
-                                           - sizeof (*ip4_0));
-          } else {
-            udp0 = (udp_header_t *)(ip6_0+1);
-            new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)
-                                           - sizeof (*ip6_0));
-          }
-          
-          udp0->length = new_l0;
-          udp0->src_port = flow_hash0;
-
-          if (PREDICT_FALSE(!is_ip4_0)) {
-                int bogus = 0;
-                /* IPv6 UDP checksum is mandatory */
-                udp0->checksum = ip6_tcp_udp_icmp_compute_checksum(vm, b0,
-                                                        ip6_0, &bogus);
-                ASSERT(bogus == 0);
-                if (udp0->checksum == 0)
-                    udp0->checksum = 0xffff;
-          }
-
-
-          /* vnet_update_l2_len (b0);  do we need this? cluke */
-
-          /* Reset to look up tunnel partner in the configured FIB */
-          vnet_buffer(b0)->sw_if_index[VLIB_TX] = t0->encap_fib_index;
-          vnet_buffer(b0)->sw_if_index[VLIB_RX] = sw_if_index0;
-          pkts_encapsulated ++;
+             /* Fix IP6 payload length */
+             new_l0 =
+                clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)
+                                     - sizeof(*ip6_0));
+             ip6_0->payload_length = new_l0;
+
+             /* Fix UDP length  and set source port */
+             udp0 = (udp_header_t *)(ip6_0+1);
+             udp0->length = new_l0;
+             udp0->src_port = flow_hash0;
+
+             /* IPv6 UDP checksum is mandatory */
+             udp0->checksum = ip6_tcp_udp_icmp_compute_checksum(vm, b0,
+                                                                ip6_0, &bogus);
+             ASSERT(bogus == 0);
+             if (udp0->checksum == 0)
+               udp0->checksum = 0xffff;
+           }
 
+          pkts_encapsulated ++;
          len0 = vlib_buffer_length_in_chain (vm, b0);
          stats_n_packets += 1;
          stats_n_bytes += len0;
@@ -541,24 +503,51 @@ vxlan_encap (vlib_main_t * vm,
   return from_frame->n_vectors;
 }
 
-VLIB_REGISTER_NODE (vxlan_encap_node) = {
-  .function = vxlan_encap,
-  .name = "vxlan-encap",
+static uword
+vxlan4_encap (vlib_main_t * vm,
+             vlib_node_runtime_t * node,
+             vlib_frame_t * from_frame)
+{
+  return vxlan_encap_inline (vm, node, from_frame, /* is_ip4 */ 1);
+}
+
+static uword
+vxlan6_encap (vlib_main_t * vm,
+             vlib_node_runtime_t * node,
+             vlib_frame_t * from_frame)
+{
+  return vxlan_encap_inline (vm, node, from_frame, /* is_ip4 */ 0);
+}
+
+VLIB_REGISTER_NODE (vxlan4_encap_node) = {
+  .function = vxlan4_encap,
+  .name = "vxlan4-encap",
   .vector_size = sizeof (u32),
   .format_trace = format_vxlan_encap_trace,
   .type = VLIB_NODE_TYPE_INTERNAL,
-
   .n_errors = ARRAY_LEN(vxlan_encap_error_strings),
   .error_strings = vxlan_encap_error_strings,
-
   .n_next_nodes = VXLAN_ENCAP_N_NEXT,
+  .next_nodes = {
+        [VXLAN_ENCAP_NEXT_DROP] = "error-drop",
+  },
+};
 
+VLIB_NODE_FUNCTION_MULTIARCH (vxlan4_encap_node, vxlan4_encap)
+
+VLIB_REGISTER_NODE (vxlan6_encap_node) = {
+  .function = vxlan6_encap,
+  .name = "vxlan6-encap",
+  .vector_size = sizeof (u32),
+  .format_trace = format_vxlan_encap_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+  .n_errors = ARRAY_LEN(vxlan_encap_error_strings),
+  .error_strings = vxlan_encap_error_strings,
+  .n_next_nodes = VXLAN_ENCAP_N_NEXT,
   .next_nodes = {
-        [VXLAN_ENCAP_NEXT_IP4_LOOKUP] = "ip4-lookup",
-        [VXLAN_ENCAP_NEXT_IP6_LOOKUP] = "ip6-lookup",
         [VXLAN_ENCAP_NEXT_DROP] = "error-drop",
   },
 };
 
-VLIB_NODE_FUNCTION_MULTIARCH (vxlan_encap_node, vxlan_encap)
+VLIB_NODE_FUNCTION_MULTIARCH (vxlan6_encap_node, vxlan6_encap)