gtpu: support non-G-PDU packets and PDU Session
[vpp.git] / src / plugins / gtpu / gtpu_encap.c
index 8ad53c5..2c3c46a 100644 (file)
@@ -48,23 +48,6 @@ typedef enum {
     GTPU_ENCAP_N_NEXT,
 } gtpu_encap_next_t;
 
-typedef struct {
-  u32 tunnel_index;
-  u32 teid;
-} gtpu_encap_trace_t;
-
-u8 * format_gtpu_encap_trace (u8 * s, va_list * args)
-{
-  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
-  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
-  gtpu_encap_trace_t * t
-      = va_arg (*args, gtpu_encap_trace_t *);
-
-  s = format (s, "GTPU encap to gtpu_tunnel%d teid %d",
-             t->tunnel_index, t->teid);
-  return s;
-}
-
 
 #define foreach_fixed_header4_offset            \
     _(0) _(1) _(2) _(3)
@@ -216,7 +199,8 @@ gtpu_encap_inline (vlib_main_t * vm,
              copy_dst3 = (u64 *) ip4_3;
              copy_src3 = (u64 *) t3->rewrite;
 
-             /* Copy first 32 octets 8-bytes at a time */
+             /* Copy first 32 octets 8-bytes at a time (minimum size)
+              * TODO: check if clib_memcpy_fast is better */
 #define _(offs) copy_dst0[offs] = copy_src0[offs];
              foreach_fixed_header4_offset;
 #undef _
@@ -229,19 +213,83 @@ gtpu_encap_inline (vlib_main_t * vm,
 #define _(offs) copy_dst3[offs] = copy_src3[offs];
              foreach_fixed_header4_offset;
 #undef _
-             /* Last 4 octets. Hopefully gcc will be our friend */
-              copy_dst_last0 = (u32 *)(&copy_dst0[4]);
-              copy_src_last0 = (u32 *)(&copy_src0[4]);
-              copy_dst_last0[0] = copy_src_last0[0];
-              copy_dst_last1 = (u32 *)(&copy_dst1[4]);
-              copy_src_last1 = (u32 *)(&copy_src1[4]);
-              copy_dst_last1[0] = copy_src_last1[0];
-              copy_dst_last2 = (u32 *)(&copy_dst2[4]);
-              copy_src_last2 = (u32 *)(&copy_src2[4]);
-              copy_dst_last2[0] = copy_src_last2[0];
-              copy_dst_last3 = (u32 *)(&copy_dst3[4]);
-              copy_src_last3 = (u32 *)(&copy_src3[4]);
-              copy_dst_last3[0] = copy_src_last3[0];
+
+             /* Copy last octets */
+             if (_vec_len (t0->rewrite) == 36)
+               {
+                 /* Last 4 octets. Hopefully gcc will be our friend */
+                 copy_dst_last0 = (u32 *) (&copy_dst0[4]);
+                 copy_src_last0 = (u32 *) (&copy_src0[4]);
+                 copy_dst_last0[0] = copy_src_last0[0];
+               }
+             else
+               {
+                 /* Near last 8 octets. */
+#define _(offs) copy_dst0[offs] = copy_src0[offs];
+                 _ (4);
+#undef _
+                 /* Last 4 octets. Hopefully gcc will be our friend */
+                 copy_dst_last0 = (u32 *) (&copy_dst0[5]);
+                 copy_src_last0 = (u32 *) (&copy_src0[5]);
+                 copy_dst_last0[0] = copy_src_last0[0];
+               }
+
+             if (_vec_len (t1->rewrite) == 36)
+               {
+                 /* Last 4 octets. Hopefully gcc will be our friend */
+                 copy_dst_last1 = (u32 *) (&copy_dst1[4]);
+                 copy_src_last1 = (u32 *) (&copy_src1[4]);
+                 copy_dst_last1[0] = copy_src_last1[0];
+               }
+             else
+               {
+                 /* Near last 8 octets. */
+#define _(offs) copy_dst1[offs] = copy_src1[offs];
+                 _ (4);
+#undef _
+                 /* Last 4 octets. Hopefully gcc will be our friend */
+                 copy_dst_last1 = (u32 *) (&copy_dst1[5]);
+                 copy_src_last1 = (u32 *) (&copy_src1[5]);
+                 copy_dst_last1[0] = copy_src_last1[0];
+               }
+
+             if (_vec_len (t2->rewrite) == 36)
+               {
+                 /* Last 4 octets. Hopefully gcc will be our friend */
+                 copy_dst_last2 = (u32 *) (&copy_dst2[4]);
+                 copy_src_last2 = (u32 *) (&copy_src2[4]);
+                 copy_dst_last2[0] = copy_src_last2[0];
+               }
+             else
+               {
+                 /* Near last 8 octets. */
+#define _(offs) copy_dst2[offs] = copy_src2[offs];
+                 _ (4);
+#undef _
+                 /* Last 4 octets. Hopefully gcc will be our friend */
+                 copy_dst_last2 = (u32 *) (&copy_dst2[5]);
+                 copy_src_last2 = (u32 *) (&copy_src2[5]);
+                 copy_dst_last2[0] = copy_src_last2[0];
+               }
+
+             if (_vec_len (t3->rewrite) == 36)
+               {
+                 /* Last 4 octets. Hopefully gcc will be our friend */
+                 copy_dst_last3 = (u32 *) (&copy_dst3[4]);
+                 copy_src_last3 = (u32 *) (&copy_src3[4]);
+                 copy_dst_last3[0] = copy_src_last3[0];
+               }
+             else
+               {
+                 /* Near last 8 octets. */
+#define _(offs) copy_dst3[offs] = copy_src3[offs];
+                 _ (4);
+#undef _
+                 /* Last 4 octets. Hopefully gcc will be our friend */
+                 copy_dst_last3 = (u32 *) (&copy_dst3[5]);
+                 copy_src_last3 = (u32 *) (&copy_src3[5]);
+                 copy_dst_last3[0] = copy_src_last3[0];
+               }
 
              /* Fix the IP4 checksum and length */
              sum0 = ip4_0->checksum;
@@ -298,19 +346,23 @@ gtpu_encap_inline (vlib_main_t * vm,
              /* Fix GTPU length */
              gtpu0 = (gtpu_header_t *)(udp0+1);
              new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain(vm, b0)
-                                            - sizeof (*ip4_0) - sizeof(*udp0));
+                                            - sizeof (*ip4_0) - sizeof(*udp0)
+                                            - GTPU_V1_HDR_LEN);
              gtpu0->length = new_l0;
              gtpu1 = (gtpu_header_t *)(udp1+1);
              new_l1 = clib_host_to_net_u16 (vlib_buffer_length_in_chain(vm, b1)
-                                            - sizeof (*ip4_1) - sizeof(*udp1));
+                                            - sizeof (*ip4_1) - sizeof(*udp1)
+                                            - GTPU_V1_HDR_LEN);
              gtpu1->length = new_l1;
              gtpu2 = (gtpu_header_t *)(udp2+1);
              new_l2 = clib_host_to_net_u16 (vlib_buffer_length_in_chain(vm, b2)
-                                            - sizeof (*ip4_2) - sizeof(*udp2));
+                                            - sizeof (*ip4_2) - sizeof(*udp2)
+                                            - GTPU_V1_HDR_LEN);
              gtpu2->length = new_l2;
-             gtpu3 = (gtpu_header_t *)(udp1+3);
+             gtpu3 = (gtpu_header_t *)(udp3+1);
              new_l3 = clib_host_to_net_u16 (vlib_buffer_length_in_chain(vm, b3)
-                                            - sizeof (*ip4_3) - sizeof(*udp3));
+                                            - sizeof (*ip4_3) - sizeof(*udp3)
+                                            - GTPU_V1_HDR_LEN);
              gtpu3->length = new_l3;
            }
          else /* ipv6 */
@@ -331,7 +383,7 @@ gtpu_encap_inline (vlib_main_t * vm,
              copy_src2 = (u64 *) t2->rewrite;
              copy_dst3 = (u64 *) ip6_3;
              copy_src3 = (u64 *) t3->rewrite;
-             /* Copy first 56 (ip6) octets 8-bytes at a time */
+             /* Copy first 56 (ip6) octets 8-bytes at a time (minimum size) */
 #define _(offs) copy_dst0[offs] = copy_src0[offs];
              foreach_fixed_header6_offset;
 #undef _
@@ -344,6 +396,40 @@ gtpu_encap_inline (vlib_main_t * vm,
 #define _(offs) copy_dst3[offs] = copy_src3[offs];
              foreach_fixed_header6_offset;
 #undef _
+
+             /* Copy last octets */
+             if (_vec_len (t0->rewrite) == 64)
+               {
+                 /* Last 8 octets.  */
+#define _(offs) copy_dst0[offs] = copy_src0[offs];
+                 _ (7);
+#undef _
+               }
+
+             if (_vec_len (t1->rewrite) == 64)
+               {
+                 /* Last 8 octets.  */
+#define _(offs) copy_dst1[offs] = copy_src1[offs];
+                 _ (7);
+#undef _
+               }
+
+             if (_vec_len (t2->rewrite) == 64)
+               {
+                 /* Last 8 octets.  */
+#define _(offs) copy_dst2[offs] = copy_src2[offs];
+                 _ (7);
+#undef _
+               }
+
+             if (_vec_len (t3->rewrite) == 64)
+               {
+                 /* Last 8 octets.  */
+#define _(offs) copy_dst3[offs] = copy_src3[offs];
+                 _ (7);
+#undef _
+               }
+
              /* Fix IP6 payload length */
              new_l0 =
                 clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)
@@ -376,13 +462,35 @@ gtpu_encap_inline (vlib_main_t * vm,
              udp3->length = new_l3;
              udp3->src_port = flow_hash3;
 
+             /* Fix GTPU length */
+             gtpu0 = (gtpu_header_t *)(udp0+1);
+             new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain(vm, b0)
+                                            - sizeof (*ip6_0) - sizeof(*udp0)
+                                            - GTPU_V1_HDR_LEN);
+             gtpu0->length = new_l0;
+             gtpu1 = (gtpu_header_t *)(udp1+1);
+             new_l1 = clib_host_to_net_u16 (vlib_buffer_length_in_chain(vm, b1)
+                                            - sizeof (*ip6_1) - sizeof(*udp1)
+                                            - GTPU_V1_HDR_LEN);
+             gtpu1->length = new_l1;
+             gtpu2 = (gtpu_header_t *)(udp2+1);
+             new_l2 = clib_host_to_net_u16 (vlib_buffer_length_in_chain(vm, b2)
+                                            - sizeof (*ip6_2) - sizeof(*udp2)
+                                            - GTPU_V1_HDR_LEN);
+             gtpu2->length = new_l2;
+             gtpu3 = (gtpu_header_t *)(udp3+1);
+             new_l3 = clib_host_to_net_u16 (vlib_buffer_length_in_chain(vm, b3)
+                                            - sizeof (*ip6_3) - sizeof(*udp3)
+                                            - GTPU_V1_HDR_LEN);
+             gtpu3->length = new_l3;
+
              /* IPv6 UDP checksum is mandatory */
              udp0->checksum = ip6_tcp_udp_icmp_compute_checksum(vm, b0,
                                                                 ip6_0, &bogus);
              if (udp0->checksum == 0)
                udp0->checksum = 0xffff;
              udp1->checksum = ip6_tcp_udp_icmp_compute_checksum(vm, b1,
-                                                        ip6_1, &bogus);
+                                                                ip6_1, &bogus);
              if (udp1->checksum == 0)
                udp1->checksum = 0xffff;
              udp2->checksum = ip6_tcp_udp_icmp_compute_checksum(vm, b2,
@@ -390,27 +498,10 @@ gtpu_encap_inline (vlib_main_t * vm,
              if (udp2->checksum == 0)
                udp2->checksum = 0xffff;
              udp3->checksum = ip6_tcp_udp_icmp_compute_checksum(vm, b3,
-                                                        ip6_3, &bogus);
+                                                                ip6_3, &bogus);
              if (udp3->checksum == 0)
                udp3->checksum = 0xffff;
 
-             /* Fix GTPU length */
-             gtpu0 = (gtpu_header_t *)(udp0+1);
-             new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain(vm, b0)
-                                            - sizeof (*ip4_0) - sizeof(*udp0));
-             gtpu0->length = new_l0;
-             gtpu1 = (gtpu_header_t *)(udp1+1);
-             new_l1 = clib_host_to_net_u16 (vlib_buffer_length_in_chain(vm, b1)
-                                            - sizeof (*ip4_1) - sizeof(*udp1));
-             gtpu1->length = new_l1;
-             gtpu2 = (gtpu_header_t *)(udp2+1);
-             new_l2 = clib_host_to_net_u16 (vlib_buffer_length_in_chain(vm, b2)
-                                            - sizeof (*ip4_2) - sizeof(*udp2));
-             gtpu2->length = new_l2;
-             gtpu3 = (gtpu_header_t *)(udp3+1);
-             new_l3 = clib_host_to_net_u16 (vlib_buffer_length_in_chain(vm, b3)
-                                            - sizeof (*ip4_3) - sizeof(*udp3));
-             gtpu3->length = new_l3;
            }
 
           pkts_encapsulated += 4;
@@ -421,6 +512,12 @@ gtpu_encap_inline (vlib_main_t * vm,
          stats_n_packets += 4;
          stats_n_bytes += len0 + len1 + len2 + len3;
 
+          /* save inner packet flow_hash for load-balance node */
+          vnet_buffer (b0)->ip.flow_hash = flow_hash0;
+          vnet_buffer (b1)->ip.flow_hash = flow_hash1;
+          vnet_buffer (b2)->ip.flow_hash = flow_hash2;
+          vnet_buffer (b3)->ip.flow_hash = flow_hash3;
+
          /* Batch stats increment on the same gtpu tunnel so counter is not
             incremented per packet. Note stats are still incremented for deleted
             and admin-down tunnel where packets are dropped. It is not worthwhile
@@ -467,16 +564,40 @@ gtpu_encap_inline (vlib_main_t * vm,
               gtpu_encap_trace_t *tr =
                 vlib_add_trace (vm, node, b0, sizeof (*tr));
               tr->tunnel_index = t0 - gtm->tunnels;
-              tr->teid = t0->teid;
-           }
+              tr->tteid = t0->tteid;
+             tr->pdu_extension = t0->pdu_extension;
+             tr->qfi = t0->qfi;
+           }
+
+         if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
+           {
+             gtpu_encap_trace_t *tr =
+               vlib_add_trace (vm, node, b1, sizeof (*tr));
+             tr->tunnel_index = t1 - gtm->tunnels;
+             tr->tteid = t1->tteid;
+             tr->pdu_extension = t1->pdu_extension;
+             tr->qfi = t1->qfi;
+           }
 
-          if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
+         if (PREDICT_FALSE(b2->flags & VLIB_BUFFER_IS_TRACED))
             {
               gtpu_encap_trace_t *tr =
-                vlib_add_trace (vm, node, b1, sizeof (*tr));
-              tr->tunnel_index = t1 - gtm->tunnels;
-              tr->teid = t1->teid;
-            }
+                vlib_add_trace (vm, node, b2, sizeof (*tr));
+              tr->tunnel_index = t2 - gtm->tunnels;
+              tr->tteid = t2->tteid;
+             tr->pdu_extension = t2->pdu_extension;
+             tr->qfi = t2->qfi;
+           }
+
+         if (PREDICT_FALSE (b3->flags & VLIB_BUFFER_IS_TRACED))
+           {
+             gtpu_encap_trace_t *tr =
+               vlib_add_trace (vm, node, b3, sizeof (*tr));
+             tr->tunnel_index = t3 - gtm->tunnels;
+             tr->tteid = t3->tteid;
+             tr->pdu_extension = t3->pdu_extension;
+             tr->qfi = t3->qfi;
+           }
 
          vlib_validate_buffer_enqueue_x4 (vm, node, next_index,
                                           to_next, n_left_to_next,
@@ -518,8 +639,9 @@ gtpu_encap_inline (vlib_main_t * vm,
          next0 = t0->next_dpo.dpoi_next_node;
          vnet_buffer(b0)->ip.adj_index[VLIB_TX] = t0->next_dpo.dpoi_index;
 
-          /* Apply the rewrite string. $$$$ vnet_rewrite? */
-          vlib_buffer_advance (b0, -(word)_vec_len(t0->rewrite));
+         /* Apply the rewrite string. $$$$ vnet_rewrite.
+          * The correct total size is set in ip_udp_gtpu_rewrite() */
+         vlib_buffer_advance (b0, -(word) _vec_len (t0->rewrite));
 
          if (is_ip4)
            {
@@ -532,10 +654,26 @@ gtpu_encap_inline (vlib_main_t * vm,
 #define _(offs) copy_dst0[offs] = copy_src0[offs];
              foreach_fixed_header4_offset;
 #undef _
-             /* Last 4 octets. Hopefully gcc will be our friend */
-              copy_dst_last0 = (u32 *)(&copy_dst0[4]);
-              copy_src_last0 = (u32 *)(&copy_src0[4]);
-              copy_dst_last0[0] = copy_src_last0[0];
+
+             /* Copy last octets */
+             if (_vec_len (t0->rewrite) == 36)
+               {
+                 /* Last 4 octets. Hopefully gcc will be our friend */
+                 copy_dst_last0 = (u32 *) (&copy_dst0[4]);
+                 copy_src_last0 = (u32 *) (&copy_src0[4]);
+                 copy_dst_last0[0] = copy_src_last0[0];
+               }
+             else
+               {
+                 /* Near last 8 octets. */
+#define _(offs) copy_dst0[offs] = copy_src0[offs];
+                 _ (4);
+#undef _
+                 /* Last 4 octets. Hopefully gcc will be our friend */
+                 copy_dst_last0 = (u32 *) (&copy_dst0[5]);
+                 copy_src_last0 = (u32 *) (&copy_src0[5]);
+                 copy_dst_last0[0] = copy_src_last0[0];
+               }
 
              /* Fix the IP4 checksum and length */
              sum0 = ip4_0->checksum;
@@ -556,7 +694,8 @@ gtpu_encap_inline (vlib_main_t * vm,
              /* Fix GTPU length */
              gtpu0 = (gtpu_header_t *)(udp0+1);
              new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain(vm, b0)
-                                            - sizeof (*ip4_0) - sizeof(*udp0));
+                                            - sizeof (*ip4_0) - sizeof(*udp0)
+                                            - GTPU_V1_HDR_LEN);
              gtpu0->length = new_l0;
            }
 
@@ -572,6 +711,16 @@ gtpu_encap_inline (vlib_main_t * vm,
 #define _(offs) copy_dst0[offs] = copy_src0[offs];
              foreach_fixed_header6_offset;
 #undef _
+
+             /* Copy last octets */
+             if (_vec_len (t0->rewrite) == 64)
+               {
+                 /* Last 8 octets.  */
+#define _(offs) copy_dst0[offs] = copy_src0[offs];
+                 _ (7);
+#undef _
+               }
+
              /* Fix IP6 payload length */
              new_l0 =
                 clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)
@@ -583,17 +732,18 @@ gtpu_encap_inline (vlib_main_t * vm,
              udp0->length = new_l0;
              udp0->src_port = flow_hash0;
 
+             /* Fix GTPU length */
+             gtpu0 = (gtpu_header_t *)(udp0+1);
+             new_l0 = clib_host_to_net_u16 (
+               vlib_buffer_length_in_chain (vm, b0) - sizeof (*ip6_0) -
+               sizeof (*udp0) - GTPU_V1_HDR_LEN);
+             gtpu0->length = new_l0;
+
              /* IPv6 UDP checksum is mandatory */
              udp0->checksum = ip6_tcp_udp_icmp_compute_checksum(vm, b0,
                                                                 ip6_0, &bogus);
              if (udp0->checksum == 0)
                udp0->checksum = 0xffff;
-
-             /* Fix GTPU length */
-             gtpu0 = (gtpu_header_t *)(udp0+1);
-             new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain(vm, b0)
-                                            - sizeof (*ip4_0) - sizeof(*udp0));
-             gtpu0->length = new_l0;
            }
 
           pkts_encapsulated ++;
@@ -601,6 +751,9 @@ gtpu_encap_inline (vlib_main_t * vm,
          stats_n_packets += 1;
          stats_n_bytes += len0;
 
+          /* save inner packet flow_hash for load-balance node */
+          vnet_buffer (b0)->ip.flow_hash = flow_hash0;
+
          /* Batch stats increment on the same gtpu tunnel so counter is not
             incremented per packet. Note stats are still incremented for deleted
             and admin-down tunnel where packets are dropped. It is not worthwhile
@@ -624,8 +777,10 @@ gtpu_encap_inline (vlib_main_t * vm,
               gtpu_encap_trace_t *tr =
                 vlib_add_trace (vm, node, b0, sizeof (*tr));
               tr->tunnel_index = t0 - gtm->tunnels;
-              tr->teid = t0->teid;
-            }
+              tr->tteid = t0->tteid;
+             tr->pdu_extension = t0->pdu_extension;
+             tr->qfi = t0->qfi;
+           }
          vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
                                           to_next, n_left_to_next,
                                           bi0, next0);
@@ -651,16 +806,14 @@ gtpu_encap_inline (vlib_main_t * vm,
   return from_frame->n_vectors;
 }
 
-static uword
-gtpu4_encap (vlib_main_t * vm,
+VLIB_NODE_FN (gtpu4_encap_node) (vlib_main_t * vm,
              vlib_node_runtime_t * node,
              vlib_frame_t * from_frame)
 {
   return gtpu_encap_inline (vm, node, from_frame, /* is_ip4 */ 1);
 }
 
-static uword
-gtpu6_encap (vlib_main_t * vm,
+VLIB_NODE_FN (gtpu6_encap_node) (vlib_main_t * vm,
              vlib_node_runtime_t * node,
              vlib_frame_t * from_frame)
 {
@@ -668,7 +821,6 @@ gtpu6_encap (vlib_main_t * vm,
 }
 
 VLIB_REGISTER_NODE (gtpu4_encap_node) = {
-  .function = gtpu4_encap,
   .name = "gtpu4-encap",
   .vector_size = sizeof (u32),
   .format_trace = format_gtpu_encap_trace,
@@ -683,10 +835,7 @@ VLIB_REGISTER_NODE (gtpu4_encap_node) = {
   },
 };
 
-VLIB_NODE_FUNCTION_MULTIARCH (gtpu4_encap_node, gtpu4_encap)
-
 VLIB_REGISTER_NODE (gtpu6_encap_node) = {
-  .function = gtpu6_encap,
   .name = "gtpu6-encap",
   .vector_size = sizeof (u32),
   .format_trace = format_gtpu_encap_trace,
@@ -701,5 +850,3 @@ VLIB_REGISTER_NODE (gtpu6_encap_node) = {
   },
 };
 
-VLIB_NODE_FUNCTION_MULTIARCH (gtpu6_encap_node, gtpu6_encap)
-