VPP-547: Fix for co-existence of HbH and RH header in Segment routing: 50/2850/21
authorShwetha <shwethab@cisco.com>
Tue, 13 Sep 2016 10:51:00 +0000 (11:51 +0100)
committerDamjan Marion <dmarion.lists@gmail.com>
Fri, 9 Dec 2016 14:57:21 +0000 (14:57 +0000)
1. sr-rewrite - SR insertion in v6 : SR RH is inserted immediately after v6 header
but if hbh header is present as per RFC2460 it should immediately follow
v6 header. This is fixed.
2. sr-local : v6 packet destined to a sr segment is received if hbh is present
it is not handed over to sr-local for processing. fixed ip6-local handling to
skip hbh as there is no register handler for hbh for now.
3. sr-replicate - update in dual of sr_rewrite to handle replicate, fixes in
sr-replicate to handle presence of hbh header

Change-Id: I034523a42d2fedf97134761f956ab534babb8b36
Signed-off-by: Shwetha <shwethab@cisco.com>
vnet/vnet/ip/ip6_forward.c
vnet/vnet/ip/ip6_packet.h
vnet/vnet/sr/sr.c
vnet/vnet/sr/sr_replicate.c

index a4ce65a..6b74b7c 100644 (file)
@@ -1230,6 +1230,77 @@ u32 ip6_tcp_udp_icmp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
   return p0->flags;
 }
 
+/* ip6_locate_header
+ *
+ * This function is to search for the header specified by the find_hdr number.
+ *   1. If the find_hdr < 0 then it finds and returns the protocol number and
+ *   offset stored in *offset of the transport or ESP header in the chain if
+ *   found.
+ *   2. If a header with find_hdr > 0 protocol number is found then the
+ *      offset is stored in *offset and protocol number of the header is
+ *      returned.
+ *   3. If find_hdr header is not found or packet is malformed or
+ *      it is a non-first fragment -1 is returned.
+ */
+always_inline int ip6_locate_header (vlib_buffer_t *p0,
+                               ip6_header_t *ip0,
+                               int find_hdr,
+                               u32 *offset)
+{
+  u8 next_proto = ip0->protocol;
+  u8 *next_header;
+  u8 done = 0;
+  u32 cur_offset;
+  u8  *temp_nxthdr = 0;
+  u32 exthdr_len = 0;
+
+  next_header = ip6_next_header(ip0);
+  cur_offset = sizeof(ip6_header_t);
+  while(1)
+    {
+      done = (next_proto == find_hdr);
+      if (PREDICT_FALSE(next_header >= (u8 *)vlib_buffer_get_current(p0) + p0->current_length))
+       {
+         //A malicious packet could set an extension header with a too big size
+         return(-1);
+       }
+      if (done)
+       break;
+      if ((!ip6_ext_hdr(next_proto)) || next_proto == IP_PROTOCOL_IP6_NONXT)
+       {
+         if (find_hdr < 0)
+           break;
+         return -1;
+        }
+      if (next_proto == IP_PROTOCOL_IPV6_FRAGMENTATION)
+       {
+         ip6_frag_hdr_t *frag_hdr = (ip6_frag_hdr_t *)next_header;
+         u16 frag_off = ip6_frag_hdr_offset(frag_hdr);
+          /* Non first fragment return -1 */
+         if (frag_off)
+           return(-1);
+         exthdr_len = sizeof(ip6_frag_hdr_t);
+          temp_nxthdr = next_header + exthdr_len;
+       }
+      else if (next_proto == IP_PROTOCOL_IPSEC_AH)
+       {
+         exthdr_len = ip6_ext_authhdr_len(((ip6_ext_header_t *)next_header));
+         temp_nxthdr = next_header + exthdr_len;
+       }
+      else
+       {
+         exthdr_len = ip6_ext_header_len(((ip6_ext_header_t *)next_header));
+         temp_nxthdr = next_header + exthdr_len;
+       }
+       next_proto = ((ip6_ext_header_t *)next_header)->next_hdr;
+       next_header = temp_nxthdr;
+       cur_offset += exthdr_len;
+    }
+
+  *offset = cur_offset;
+  return(next_proto);
+}
+
 static uword
 ip6_local (vlib_main_t * vm,
           vlib_node_runtime_t * node,
@@ -1263,6 +1334,7 @@ ip6_local (vlib_main_t * vm,
          i32 len_diff0, len_diff1;
          u8 error0, type0, good_l4_checksum0;
          u8 error1, type1, good_l4_checksum1;
+         u32 udp_offset0, udp_offset1;
 
          pi0 = to_next[0] = from[0];
          pi1 = to_next[1] = from[1];
@@ -1288,26 +1360,48 @@ ip6_local (vlib_main_t * vm,
 
          good_l4_checksum0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
          good_l4_checksum1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
+         len_diff0 = 0;
+         len_diff1 = 0;
 
-         udp0 = ip6_next_header (ip0);
-         udp1 = ip6_next_header (ip1);
-
-         /* Don't verify UDP checksum for packets with explicit zero checksum. */
-         good_l4_checksum0 |= type0 == IP_BUILTIN_PROTOCOL_UDP && udp0->checksum == 0;
-         good_l4_checksum1 |= type1 == IP_BUILTIN_PROTOCOL_UDP && udp1->checksum == 0;
+         /* Skip HBH local processing */
+          if (PREDICT_FALSE (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+           {
+             ip6_hop_by_hop_ext_t *ext_hdr = (ip6_hop_by_hop_ext_t  *)ip6_next_header(ip0);
+             next0 = lm->local_next_by_ip_protocol[ext_hdr->next_hdr];
+             type0 = lm->builtin_protocol_by_ip_protocol[ext_hdr->next_hdr];
+           }
+          if (PREDICT_FALSE (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+           {
+             ip6_hop_by_hop_ext_t *ext_hdr = (ip6_hop_by_hop_ext_t  *)ip6_next_header(ip1);
+             next1 = lm->local_next_by_ip_protocol[ext_hdr->next_hdr];
+             type1 = lm->builtin_protocol_by_ip_protocol[ext_hdr->next_hdr];
+           }
+         if (PREDICT_TRUE(IP_PROTOCOL_UDP == ip6_locate_header(p0, ip0,
+                                                          IP_PROTOCOL_UDP, &udp_offset0)))
+           {
+             udp0 = (udp_header_t *)((u8 *)ip0 + udp_offset0);
+             /* Don't verify UDP checksum for packets with explicit zero checksum. */
+             good_l4_checksum0 |= type0 == IP_BUILTIN_PROTOCOL_UDP && udp0->checksum == 0;
+             /* Verify UDP length. */
+             ip_len0 = clib_net_to_host_u16 (ip0->payload_length);
+             udp_len0 = clib_net_to_host_u16 (udp0->length);
+             len_diff0 = ip_len0 - udp_len0;
+           }
+         if (PREDICT_TRUE(IP_PROTOCOL_UDP == ip6_locate_header(p1, ip1,
+                                                          IP_PROTOCOL_UDP, &udp_offset1)))
+           {
+             udp1 = (udp_header_t *)((u8 *)ip1 + udp_offset1);
+             /* Don't verify UDP checksum for packets with explicit zero checksum. */
+             good_l4_checksum1 |= type1 == IP_BUILTIN_PROTOCOL_UDP && udp1->checksum == 0;
+             /* Verify UDP length. */
+             ip_len1 = clib_net_to_host_u16 (ip1->payload_length);
+             udp_len1 = clib_net_to_host_u16 (udp1->length);
+             len_diff1 = ip_len1 - udp_len1;
+           }
 
          good_l4_checksum0 |= type0 == IP_BUILTIN_PROTOCOL_UNKNOWN;
          good_l4_checksum1 |= type1 == IP_BUILTIN_PROTOCOL_UNKNOWN;
 
-         /* Verify UDP length. */
-         ip_len0 = clib_net_to_host_u16 (ip0->payload_length);
-         ip_len1 = clib_net_to_host_u16 (ip1->payload_length);
-         udp_len0 = clib_net_to_host_u16 (udp0->length);
-         udp_len1 = clib_net_to_host_u16 (udp1->length);
-
-         len_diff0 = ip_len0 - udp_len0;
-         len_diff1 = ip_len1 - udp_len1;
-
          len_diff0 = type0 == IP_BUILTIN_PROTOCOL_UDP ? len_diff0 : 0;
          len_diff1 = type1 == IP_BUILTIN_PROTOCOL_UDP ? len_diff1 : 0;
 
@@ -1382,6 +1476,7 @@ ip6_local (vlib_main_t * vm,
          u32 pi0, ip_len0, udp_len0, flags0, next0;
          i32 len_diff0;
          u8 error0, type0, good_l4_checksum0;
+          u32 udp_offset0;
 
          pi0 = to_next[0] = from[0];
          from += 1;
@@ -1399,20 +1494,28 @@ ip6_local (vlib_main_t * vm,
          flags0 = p0->flags;
 
          good_l4_checksum0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
+         len_diff0 = 0;
 
-         udp0 = ip6_next_header (ip0);
-
-         /* Don't verify UDP checksum for packets with explicit zero checksum. */
-         good_l4_checksum0 |= type0 == IP_BUILTIN_PROTOCOL_UDP && udp0->checksum == 0;
+         /* Skip HBH local processing */
+          if (PREDICT_FALSE (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+           {
+             ip6_hop_by_hop_ext_t *ext_hdr = (ip6_hop_by_hop_ext_t  *)ip6_next_header(ip0);
+             next0 = lm->local_next_by_ip_protocol[ext_hdr->next_hdr];
+             type0 = lm->builtin_protocol_by_ip_protocol[ext_hdr->next_hdr];
+           }
+         if (PREDICT_TRUE(IP_PROTOCOL_UDP == ip6_locate_header(p0, ip0,
+                                                          IP_PROTOCOL_UDP, &udp_offset0)))
+           {
+             udp0 = (udp_header_t *)((u8 *)ip0 + udp_offset0);
+             /* Don't verify UDP checksum for packets with explicit zero checksum. */
+             good_l4_checksum0 |= type0 == IP_BUILTIN_PROTOCOL_UDP && udp0->checksum == 0;
+             /* Verify UDP length. */
+             ip_len0 = clib_net_to_host_u16 (ip0->payload_length);
+             udp_len0 = clib_net_to_host_u16 (udp0->length);
+             len_diff0 = ip_len0 - udp_len0;
+           }
 
          good_l4_checksum0 |= type0 == IP_BUILTIN_PROTOCOL_UNKNOWN;
-
-         /* Verify UDP length. */
-         ip_len0 = clib_net_to_host_u16 (ip0->payload_length);
-         udp_len0 = clib_net_to_host_u16 (udp0->length);
-
-         len_diff0 = ip_len0 - udp_len0;
-
          len_diff0 = type0 == IP_BUILTIN_PROTOCOL_UDP ? len_diff0 : 0;
 
          if (PREDICT_FALSE (type0 != IP_BUILTIN_PROTOCOL_UNKNOWN
index 456c011..d29a069 100644 (file)
@@ -408,6 +408,32 @@ typedef CLIB_PACKED (struct {
   u16 value;
 }) ip6_router_alert_option_t;
 
+typedef CLIB_PACKED (struct {
+  u8 next_hdr;
+  /* Length of this header plus option data in 8 byte units. */
+  u8 n_data_u64s;
+}) ip6_ext_header_t;
+
+always_inline u8 ip6_ext_hdr(u8 nexthdr)
+{
+  /*
+   * find out if nexthdr is an extension header or a protocol
+   */
+  return   (nexthdr == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) ||
+    (nexthdr == IP_PROTOCOL_IP6_NONXT) ||
+    (nexthdr == IP_PROTOCOL_IPV6_FRAGMENTATION)  ||
+    (nexthdr == IP_PROTOCOL_IPSEC_AH)      ||
+    (nexthdr == IP_PROTOCOL_IPV6_ROUTE)      ||
+    (nexthdr == IP_PROTOCOL_IP6_DESTINATION_OPTIONS);
+}
+
+#define ip6_ext_header_len(p)  (((p)->n_data_u64s+1) << 3)
+#define ip6_ext_authhdr_len(p) (((p)->n_data_u64s+2) << 2)
+
+always_inline void *
+ip6_ext_next_header (ip6_ext_header_t *ext_hdr )
+{ return (void *)((u8 *) ext_hdr + ip6_ext_header_len(ext_hdr)); }
+
 typedef CLIB_PACKED (struct {
   u8 next_hdr;
   /* Length of this header plus option data in 8 byte units. */
index 287d521..5d0275d 100644 (file)
@@ -360,8 +360,6 @@ sr_rewrite (vlib_main_t * vm,
            vlib_node_runtime_t * node, vlib_frame_t * from_frame)
 {
   u32 n_left_from, next_index, *from, *to_next;
-  ip6_main_t *im = &ip6_main;
-  ip_lookup_main_t *lm = &im->lookup_main;
   ip6_sr_main_t *sm = &sr_main;
   u32 (*sr_local_cb) (vlib_main_t *, vlib_node_runtime_t *,
                      vlib_buffer_t *, ip6_header_t *, ip6_sr_header_t *);
@@ -384,7 +382,6 @@ sr_rewrite (vlib_main_t * vm,
          u32 bi0, bi1;
          vlib_buffer_t *b0, *b1;
          ip6_header_t *ip0, *ip1;
-         ip_adjacency_t *adj0, *adj1;
          ip6_sr_header_t *sr0, *sr1;
          ip6_sr_tunnel_t *t0, *t1;
          u32 next0 = SR_REWRITE_NEXT_IP6_LOOKUP;
@@ -419,15 +416,12 @@ sr_rewrite (vlib_main_t * vm,
           * $$$ parse through header(s) to pick the point
           * where we punch in the SR extention header
           */
-
-         adj0 =
-           ip_get_adjacency (lm, vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
-         adj1 =
-           ip_get_adjacency (lm, vnet_buffer (b1)->ip.adj_index[VLIB_TX]);
          t0 =
-           pool_elt_at_index (sm->tunnels, adj0->rewrite_header.sw_if_index);
+           pool_elt_at_index (sm->tunnels,
+                              vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
          t1 =
-           pool_elt_at_index (sm->tunnels, adj1->rewrite_header.sw_if_index);
+           pool_elt_at_index (sm->tunnels,
+                              vnet_buffer (b1)->ip.adj_index[VLIB_TX]);
 
          ASSERT (VLIB_BUFFER_PRE_DATA_SIZE
                  >= ((word) vec_len (t0->rewrite)) + b0->current_data);
@@ -439,6 +433,16 @@ sr_rewrite (vlib_main_t * vm,
 
          ip0 = vlib_buffer_get_current (b0);
          ip1 = vlib_buffer_get_current (b1);
+#if DPDK > 0                   /* Cannot call replication node yet without DPDK */
+         /* add a replication node */
+         if (PREDICT_FALSE (t0->policy_index != ~0))
+           {
+             vnet_buffer (b0)->ip.save_protocol = t0->policy_index;
+             next0 = SR_REWRITE_NEXT_SR_REPLICATE;
+             sr0 = (ip6_sr_header_t *) (t0->rewrite);
+             goto processnext;
+           }
+#endif /* DPDK */
 
          /*
           * SR-unaware service chaining case: pkt coming back from
@@ -454,22 +458,41 @@ sr_rewrite (vlib_main_t * vm,
            }
          else
            {
+             u32 len_bytes = sizeof (ip6_header_t);
+             u8 next_hdr = ip0->protocol;
+
+             /* HBH must immediately follow ipv6 header */
+             if (PREDICT_FALSE
+                 (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+               {
+                 ip6_hop_by_hop_ext_t *ext_hdr =
+                   (ip6_hop_by_hop_ext_t *) ip6_next_header (ip0);
+                 len_bytes +=
+                   ip6_ext_header_len ((ip6_ext_header_t *) ext_hdr);
+                 /* Ignoring the sr_local for now, if RH follows HBH here */
+                 next_hdr = ext_hdr->next_hdr;
+                 ext_hdr->next_hdr = IPPROTO_IPV6_ROUTE;
+               }
+             else
+               {
+                 ip0->protocol = IPPROTO_IPV6_ROUTE;   /* routing extension header */
+               }
              /*
               * Copy data before the punch-in point left by the
               * required amount. Assume (for the moment) that only
               * the main packet header needs to be copied.
               */
              clib_memcpy (((u8 *) ip0) - vec_len (t0->rewrite),
-                          ip0, sizeof (ip6_header_t));
+                          ip0, len_bytes);
              vlib_buffer_advance (b0, -(word) vec_len (t0->rewrite));
              ip0 = vlib_buffer_get_current (b0);
-             sr0 = (ip6_sr_header_t *) (ip0 + 1);
+             sr0 = (ip6_sr_header_t *) ((u8 *) ip0 + len_bytes);
              /* $$$ tune */
              clib_memcpy (sr0, t0->rewrite, vec_len (t0->rewrite));
 
              /* Fix the next header chain */
-             sr0->protocol = ip0->protocol;
-             ip0->protocol = IPPROTO_IPV6_ROUTE;       /* routing extension header */
+             sr0->protocol = next_hdr;
+
              new_l0 = clib_net_to_host_u16 (ip0->payload_length) +
                vec_len (t0->rewrite);
              ip0->payload_length = clib_host_to_net_u16 (new_l0);
@@ -496,7 +519,17 @@ sr_rewrite (vlib_main_t * vm,
                    b0->error = node->errors[SR_REWRITE_ERROR_APP_CALLBACK];
                }
            }
-
+#if DPDK > 0                   /* Cannot call replication node yet without DPDK */
+       processnext:
+         /* add a replication node */
+         if (PREDICT_FALSE (t1->policy_index != ~0))
+           {
+             vnet_buffer (b1)->ip.save_protocol = t1->policy_index;
+             next1 = SR_REWRITE_NEXT_SR_REPLICATE;
+             sr1 = (ip6_sr_header_t *) (t1->rewrite);
+             goto trace00;
+           }
+#endif /* DPDK */
          if (PREDICT_FALSE (ip1->protocol == IPPROTO_IPV6_ROUTE))
            {
              vlib_buffer_advance (b1, sizeof (ip1));
@@ -506,15 +539,38 @@ sr_rewrite (vlib_main_t * vm,
            }
          else
            {
-             clib_memcpy (((u8 *) ip0) - vec_len (t0->rewrite),
-                          ip0, sizeof (ip6_header_t));
+             u32 len_bytes = sizeof (ip6_header_t);
+             u8 next_hdr = ip1->protocol;
+
+             /* HBH must immediately follow ipv6 header */
+             if (PREDICT_FALSE
+                 (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+               {
+                 ip6_hop_by_hop_ext_t *ext_hdr =
+                   (ip6_hop_by_hop_ext_t *) ip6_next_header (ip1);
+                 len_bytes +=
+                   ip6_ext_header_len ((ip6_ext_header_t *) ext_hdr);
+                 /* Ignoring the sr_local for now, if RH follows HBH here */
+                 next_hdr = ext_hdr->next_hdr;
+                 ext_hdr->next_hdr = IPPROTO_IPV6_ROUTE;
+               }
+             else
+               {
+                 ip1->protocol = IPPROTO_IPV6_ROUTE;
+               }
+             /*
+              * Copy data before the punch-in point left by the
+              * required amount. Assume (for the moment) that only
+              * the main packet header needs to be copied.
+              */
+             clib_memcpy (((u8 *) ip1) - vec_len (t1->rewrite),
+                          ip1, len_bytes);
              vlib_buffer_advance (b1, -(word) vec_len (t1->rewrite));
              ip1 = vlib_buffer_get_current (b1);
-             sr1 = (ip6_sr_header_t *) (ip1 + 1);
+             sr1 = (ip6_sr_header_t *) ((u8 *) ip1 + len_bytes);
              clib_memcpy (sr1, t1->rewrite, vec_len (t1->rewrite));
 
-             sr1->protocol = ip1->protocol;
-             ip1->protocol = IPPROTO_IPV6_ROUTE;
+             sr1->protocol = next_hdr;
              new_l1 = clib_net_to_host_u16 (ip1->payload_length) +
                vec_len (t1->rewrite);
              ip1->payload_length = clib_host_to_net_u16 (new_l1);
@@ -541,6 +597,9 @@ sr_rewrite (vlib_main_t * vm,
                    b1->error = node->errors[SR_REWRITE_ERROR_APP_CALLBACK];
                }
            }
+#if DPDK > 0                   /* Cannot run replicate without DPDK and only replicate uses this label */
+       trace00:
+#endif /* DPDK */
 
          if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
            {
@@ -553,7 +612,8 @@ sr_rewrite (vlib_main_t * vm,
                           sizeof (tr->dst.as_u8));
              tr->length = new_l0;
              tr->next_index = next0;
-             clib_memcpy (tr->sr, sr0, sizeof (tr->sr));
+             if (sr0)
+               clib_memcpy (tr->sr, sr0, sizeof (tr->sr));
            }
          if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
            {
@@ -566,9 +626,9 @@ sr_rewrite (vlib_main_t * vm,
                           sizeof (tr->dst.as_u8));
              tr->length = new_l1;
              tr->next_index = next1;
-             clib_memcpy (tr->sr, sr1, sizeof (tr->sr));
+             if (sr1)
+               clib_memcpy (tr->sr, sr1, sizeof (tr->sr));
            }
-
          vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
                                           to_next, n_left_to_next,
                                           bi0, bi1, next0, next1);
@@ -579,7 +639,6 @@ sr_rewrite (vlib_main_t * vm,
          u32 bi0;
          vlib_buffer_t *b0;
          ip6_header_t *ip0 = 0;
-         ip_adjacency_t *adj0;
          ip6_sr_header_t *sr0 = 0;
          ip6_sr_tunnel_t *t0;
          u32 next0 = SR_REWRITE_NEXT_IP6_LOOKUP;
@@ -594,22 +653,21 @@ sr_rewrite (vlib_main_t * vm,
 
          b0 = vlib_get_buffer (vm, bi0);
 
+
          /*
           * $$$ parse through header(s) to pick the point
           * where we punch in the SR extention header
           */
-
-         adj0 =
-           ip_get_adjacency (lm, vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
          t0 =
-           pool_elt_at_index (sm->tunnels, adj0->rewrite_header.sw_if_index);
-
+           pool_elt_at_index (sm->tunnels,
+                              vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
 #if DPDK > 0                   /* Cannot call replication node yet without DPDK */
          /* add a replication node */
          if (PREDICT_FALSE (t0->policy_index != ~0))
            {
              vnet_buffer (b0)->ip.save_protocol = t0->policy_index;
              next0 = SR_REWRITE_NEXT_SR_REPLICATE;
+             sr0 = (ip6_sr_header_t *) (t0->rewrite);
              goto trace0;
            }
 #endif /* DPDK */
@@ -635,22 +693,40 @@ sr_rewrite (vlib_main_t * vm,
            }
          else
            {
+             u32 len_bytes = sizeof (ip6_header_t);
+             u8 next_hdr = ip0->protocol;
+
+             /* HBH must immediately follow ipv6 header */
+             if (PREDICT_FALSE
+                 (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+               {
+                 ip6_hop_by_hop_ext_t *ext_hdr =
+                   (ip6_hop_by_hop_ext_t *) ip6_next_header (ip0);
+                 len_bytes +=
+                   ip6_ext_header_len ((ip6_ext_header_t *) ext_hdr);
+                 next_hdr = ext_hdr->next_hdr;
+                 ext_hdr->next_hdr = IPPROTO_IPV6_ROUTE;
+                 /* Ignoring the sr_local for now, if RH follows HBH here */
+               }
+             else
+               {
+                 ip0->protocol = IPPROTO_IPV6_ROUTE;   /* routing extension header */
+               }
              /*
               * Copy data before the punch-in point left by the
               * required amount. Assume (for the moment) that only
               * the main packet header needs to be copied.
               */
              clib_memcpy (((u8 *) ip0) - vec_len (t0->rewrite),
-                          ip0, sizeof (ip6_header_t));
+                          ip0, len_bytes);
              vlib_buffer_advance (b0, -(word) vec_len (t0->rewrite));
              ip0 = vlib_buffer_get_current (b0);
-             sr0 = (ip6_sr_header_t *) (ip0 + 1);
+             sr0 = (ip6_sr_header_t *) ((u8 *) ip0 + len_bytes);
              /* $$$ tune */
              clib_memcpy (sr0, t0->rewrite, vec_len (t0->rewrite));
 
              /* Fix the next header chain */
-             sr0->protocol = ip0->protocol;
-             ip0->protocol = IPPROTO_IPV6_ROUTE;       /* routing extension header */
+             sr0->protocol = next_hdr;
              new_l0 = clib_net_to_host_u16 (ip0->payload_length) +
                vec_len (t0->rewrite);
              ip0->payload_length = clib_host_to_net_u16 (new_l0);
@@ -677,10 +753,10 @@ sr_rewrite (vlib_main_t * vm,
                    b0->error = node->errors[SR_REWRITE_ERROR_APP_CALLBACK];
                }
            }
-
 #if DPDK > 0                   /* Cannot run replicate without DPDK and only replicate uses this label */
        trace0:
 #endif /* DPDK */
+
          if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
            {
              sr_rewrite_trace_t *tr = vlib_add_trace (vm, node,
@@ -695,14 +771,13 @@ sr_rewrite (vlib_main_t * vm,
                }
              tr->length = new_l0;
              tr->next_index = next0;
-             clib_memcpy (tr->sr, sr0, sizeof (tr->sr));
+             if (sr0)
+               clib_memcpy (tr->sr, sr0, sizeof (tr->sr));
            }
-
          vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
                                           to_next, n_left_to_next,
                                           bi0, next0);
        }
-
       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
     }
   return from_frame->n_vectors;
@@ -2462,6 +2537,7 @@ sr_local (vlib_main_t * vm,
          ip6_address_t *new_dst0, *new_dst1;
          u32 next0 = SR_LOCAL_NEXT_IP6_LOOKUP;
          u32 next1 = SR_LOCAL_NEXT_IP6_LOOKUP;
+
          /* Prefetch next iteration. */
          {
            vlib_buffer_t *p2, *p3;
@@ -2489,6 +2565,15 @@ sr_local (vlib_main_t * vm,
          b0 = vlib_get_buffer (vm, bi0);
          ip0 = vlib_buffer_get_current (b0);
          sr0 = (ip6_sr_header_t *) (ip0 + 1);
+         if (PREDICT_FALSE
+             (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+           {
+             ip6_hop_by_hop_ext_t *ext_hdr =
+               (ip6_hop_by_hop_ext_t *) ip6_next_header (ip0);
+             sr0 =
+               (ip6_sr_header_t *) ip6_ext_next_header ((ip6_ext_header_t *)
+                                                        ext_hdr);
+           }
 
          if (PREDICT_FALSE (sr0->type != ROUTING_HEADER_TYPE_SR))
            {
@@ -2550,13 +2635,26 @@ sr_local (vlib_main_t * vm,
            {
              u64 *copy_dst0, *copy_src0;
              u16 new_l0;
+             u32 copy_len_u64s0 = 0;
+             int i;
+
              /*
               * Copy the ip6 header right by the (real) length of the
-              * sr header. Here's another place which assumes that
-              * the sr header is the only extention header.
+              * sr header.
               */
-
-             ip0->protocol = sr0->protocol;
+             if (PREDICT_FALSE
+                 (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+               {
+                 ip6_hop_by_hop_ext_t *ext_hdr =
+                   (ip6_hop_by_hop_ext_t *) ip6_next_header (ip0);
+                 copy_len_u64s0 =
+                   (((ip6_ext_header_t *) ext_hdr)->n_data_u64s) + 1;
+                 ext_hdr->next_hdr = sr0->protocol;
+               }
+             else
+               {
+                 ip0->protocol = sr0->protocol;
+               }
              vlib_buffer_advance (b0, (sr0->length + 1) * 8);
 
              new_l0 = clib_net_to_host_u16 (ip0->payload_length) -
@@ -2566,11 +2664,16 @@ sr_local (vlib_main_t * vm,
              copy_src0 = (u64 *) ip0;
              copy_dst0 = copy_src0 + (sr0->length + 1);
 
-             copy_dst0[4] = copy_src0[4];
-             copy_dst0[3] = copy_src0[3];
-             copy_dst0[2] = copy_src0[2];
-             copy_dst0[1] = copy_src0[1];
-             copy_dst0[0] = copy_src0[0];
+             copy_dst0[4 + copy_len_u64s0] = copy_src0[4 + copy_len_u64s0];
+             copy_dst0[3 + copy_len_u64s0] = copy_src0[3 + copy_len_u64s0];
+             copy_dst0[2 + copy_len_u64s0] = copy_src0[2 + copy_len_u64s0];
+             copy_dst0[1 + copy_len_u64s0] = copy_src0[1 + copy_len_u64s0];
+             copy_dst0[0 + copy_len_u64s0] = copy_src0[0 + copy_len_u64s0];
+
+             for (i = copy_len_u64s0 - 1; i >= 0; i--)
+               {
+                 copy_dst0[i] = copy_src0[i];
+               }
 
              sr0 = 0;
            }
@@ -2594,6 +2697,16 @@ sr_local (vlib_main_t * vm,
          b1 = vlib_get_buffer (vm, bi1);
          ip1 = vlib_buffer_get_current (b1);
          sr1 = (ip6_sr_header_t *) (ip1 + 1);
+         if (PREDICT_FALSE
+             (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+           {
+
+             ip6_hop_by_hop_ext_t *ext_hdr =
+               (ip6_hop_by_hop_ext_t *) ip6_next_header (ip1);
+             sr1 =
+               (ip6_sr_header_t *) ip6_ext_next_header ((ip6_ext_header_t *)
+                                                        ext_hdr);
+           }
 
          if (PREDICT_FALSE (sr1->type != ROUTING_HEADER_TYPE_SR))
            {
@@ -2655,13 +2768,26 @@ sr_local (vlib_main_t * vm,
            {
              u64 *copy_dst1, *copy_src1;
              u16 new_l1;
+             u32 copy_len_u64s1 = 0;
+             int i;
+
              /*
               * Copy the ip6 header right by the (real) length of the
-              * sr header. Here's another place which assumes that
-              * the sr header is the only extention header.
+              * sr header.
               */
-
-             ip1->protocol = sr1->protocol;
+             if (PREDICT_FALSE
+                 (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+               {
+                 ip6_hop_by_hop_ext_t *ext_hdr =
+                   (ip6_hop_by_hop_ext_t *) ip6_next_header (ip1);
+                 copy_len_u64s1 =
+                   (((ip6_ext_header_t *) ext_hdr)->n_data_u64s) + 1;
+                 ext_hdr->next_hdr = sr1->protocol;
+               }
+             else
+               {
+                 ip1->protocol = sr1->protocol;
+               }
              vlib_buffer_advance (b1, (sr1->length + 1) * 8);
 
              new_l1 = clib_net_to_host_u16 (ip1->payload_length) -
@@ -2671,11 +2797,16 @@ sr_local (vlib_main_t * vm,
              copy_src1 = (u64 *) ip1;
              copy_dst1 = copy_src1 + (sr1->length + 1);
 
-             copy_dst1[4] = copy_src1[4];
-             copy_dst1[3] = copy_src1[3];
-             copy_dst1[2] = copy_src1[2];
-             copy_dst1[1] = copy_src1[1];
-             copy_dst1[0] = copy_src1[0];
+             copy_dst1[4 + copy_len_u64s1] = copy_src1[4 + copy_len_u64s1];
+             copy_dst1[3 + copy_len_u64s1] = copy_src1[3 + copy_len_u64s1];
+             copy_dst1[2 + copy_len_u64s1] = copy_src1[2 + copy_len_u64s1];
+             copy_dst1[1 + copy_len_u64s1] = copy_src1[1 + copy_len_u64s1];
+             copy_dst1[0 + copy_len_u64s1] = copy_src1[0 + copy_len_u64s1];
+
+             for (i = copy_len_u64s1 - 1; i >= 0; i--)
+               {
+                 copy_dst1[i] = copy_src1[i];
+               }
 
              sr1 = 0;
            }
@@ -2721,6 +2852,15 @@ sr_local (vlib_main_t * vm,
          ip0 = vlib_buffer_get_current (b0);
          sr0 = (ip6_sr_header_t *) (ip0 + 1);
 
+         if (PREDICT_FALSE
+             (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+           {
+             ip6_hop_by_hop_ext_t *ext_hdr =
+               (ip6_hop_by_hop_ext_t *) ip6_next_header (ip0);
+             sr0 =
+               (ip6_sr_header_t *) ip6_ext_next_header ((ip6_ext_header_t *)
+                                                        ext_hdr);
+           }
          if (PREDICT_FALSE (sr0->type != ROUTING_HEADER_TYPE_SR))
            {
              next0 = SR_LOCAL_NEXT_ERROR;
@@ -2781,13 +2921,27 @@ sr_local (vlib_main_t * vm,
            {
              u64 *copy_dst0, *copy_src0;
              u16 new_l0;
+             u32 copy_len_u64s0 = 0;
+             int i;
+
              /*
               * Copy the ip6 header right by the (real) length of the
-              * sr header. Here's another place which assumes that
-              * the sr header is the only extention header.
+              * sr header.
               */
+             if (PREDICT_FALSE
+                 (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+               {
+                 ip6_hop_by_hop_ext_t *ext_hdr =
+                   (ip6_hop_by_hop_ext_t *) ip6_next_header (ip0);
+                 copy_len_u64s0 =
+                   (((ip6_ext_header_t *) ext_hdr)->n_data_u64s) + 1;
+                 ext_hdr->next_hdr = sr0->protocol;
+               }
+             else
+               {
+                 ip0->protocol = sr0->protocol;
+               }
 
-             ip0->protocol = sr0->protocol;
              vlib_buffer_advance (b0, (sr0->length + 1) * 8);
 
              new_l0 = clib_net_to_host_u16 (ip0->payload_length) -
@@ -2796,12 +2950,16 @@ sr_local (vlib_main_t * vm,
 
              copy_src0 = (u64 *) ip0;
              copy_dst0 = copy_src0 + (sr0->length + 1);
+             copy_dst0[4 + copy_len_u64s0] = copy_src0[4 + copy_len_u64s0];
+             copy_dst0[3 + copy_len_u64s0] = copy_src0[3 + copy_len_u64s0];
+             copy_dst0[2 + copy_len_u64s0] = copy_src0[2 + copy_len_u64s0];
+             copy_dst0[1 + copy_len_u64s0] = copy_src0[1 + copy_len_u64s0];
+             copy_dst0[0 + copy_len_u64s0] = copy_src0[0 + copy_len_u64s0];
 
-             copy_dst0[4] = copy_src0[4];
-             copy_dst0[3] = copy_src0[3];
-             copy_dst0[2] = copy_src0[2];
-             copy_dst0[1] = copy_src0[1];
-             copy_dst0[0] = copy_src0[0];
+             for (i = copy_len_u64s0 - 1; i >= 0; i--)
+               {
+                 copy_dst0[i] = copy_src0[i];
+               }
 
              sr0 = 0;
            }
index 9aa5787..34cde3d 100644 (file)
@@ -175,6 +175,8 @@ sr_replicate_node_fn (vlib_main_t * vm,
          ip6_header_t *ip0 = 0, *hdr_ip0 = 0;
          int num_replicas = 0;
          int i;
+         u32 len_bytes = sizeof (ip6_header_t);
+         u8 next_hdr, ip_next_hdr = IPPROTO_IPV6_ROUTE;
 
          bi0 = from[0];
 
@@ -187,6 +189,24 @@ sr_replicate_node_fn (vlib_main_t * vm,
          ip0 = vlib_buffer_get_current (b0);
          /* Skip forward to the punch-in point */
          vlib_buffer_advance (b0, sizeof (*ip0));
+         next_hdr = ip0->protocol;
+
+         /* HBH must immediately follow ipv6 header */
+         if (PREDICT_FALSE
+             (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+           {
+             ip6_hop_by_hop_ext_t *ext_hdr =
+               (ip6_hop_by_hop_ext_t *) ip6_next_header (ip0);
+             u32 ext_hdr_len = 0;
+             ext_hdr_len = ip6_ext_header_len ((ip6_ext_header_t *) ext_hdr);
+             len_bytes += ext_hdr_len;
+             next_hdr = ext_hdr->next_hdr;
+             ext_hdr->next_hdr = IPPROTO_IPV6_ROUTE;
+             ip_next_hdr = IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS;
+             /* Skip forward to the punch-in point */
+             vlib_buffer_advance (b0, ext_hdr_len);
+
+           }
 
          orig_mb0 = rte_mbuf_from_vlib_buffer (b0);
 
@@ -198,8 +218,7 @@ sr_replicate_node_fn (vlib_main_t * vm,
 
          orig_mb0->data_len = new_data_len0;
          orig_mb0->pkt_len = new_pkt_len0;
-         orig_mb0->data_off =
-           (u16) (RTE_PKTMBUF_HEADROOM + b0->current_data);
+         orig_mb0->data_off += (u16) (b0->current_data);
 
          /*
             Before entering loop determine if we can allocate:
@@ -222,14 +241,49 @@ sr_replicate_node_fn (vlib_main_t * vm,
 
          for (i = 0; i < num_replicas; i++)
            {
+             uint8_t nb_seg;
+             struct rte_mbuf *clone0i;
+             vlib_buffer_t *clone0_c, *clone_b0;
+
+             t0 = vec_elt_at_index (sm->tunnels, pol0->tunnel_indices[i]);
              hdr_mb0 = rte_pktmbuf_alloc (bm->pktmbuf_pools[socket_id]);
 
              if (i < (num_replicas - 1))
-               /* Not the last tunnel to process */
-               clone0 = rte_pktmbuf_clone
-                 (orig_mb0, bm->pktmbuf_pools[socket_id]);
+               {
+                 /* Not the last tunnel to process */
+                 clone0 = rte_pktmbuf_clone
+                   (orig_mb0, bm->pktmbuf_pools[socket_id]);
+                 nb_seg = 0;
+                 clone0i = clone0;
+                 clone0_c = NULL;
+                 while ((clone0->nb_segs >= 1) && (nb_seg < clone0->nb_segs))
+                   {
+
+                     clone_b0 = vlib_buffer_from_rte_mbuf (clone0i);
+                     vlib_buffer_init_for_free_list (clone_b0, fl);
+
+                     ASSERT ((clone_b0->flags & VLIB_BUFFER_NEXT_PRESENT) ==
+                             0);
+                     ASSERT (clone_b0->current_data == 0);
+
+                     clone_b0->current_data =
+                       (clone0i->buf_addr + clone0i->data_off) -
+                       (void *) clone_b0->data;
+
+                     clone_b0->current_length = clone0i->data_len;
+                     if (PREDICT_FALSE (clone0_c != NULL))
+                       {
+                         clone0_c->flags |= VLIB_BUFFER_NEXT_PRESENT;
+                         clone0_c->next_buffer =
+                           vlib_get_buffer_index (vm, clone_b0);
+                       }
+                     clone0_c = clone_b0;
+                     clone0i = clone0i->next;
+                     nb_seg++;
+                   }
+               }
              else
-               /* Last tunnel to process, use original MB */
+               /* First tunnel to process, use original MB */
                clone0 = orig_mb0;
 
 
@@ -260,14 +314,14 @@ sr_replicate_node_fn (vlib_main_t * vm,
          for (i = 0; i < num_replicas; i++)
            {
              vlib_buffer_t *hdr_b0;
+             u16 new_l0 = 0;
 
              t0 = vec_elt_at_index (sm->tunnels, pol0->tunnel_indices[i]);
-
              /* Our replicas */
              hdr_mb0 = hdr_vec[i];
              clone0 = rte_mbuf_vec[i];
 
-             hdr_mb0->data_len = sizeof (*ip0) + vec_len (t0->rewrite);
+             hdr_mb0->data_len = len_bytes + vec_len (t0->rewrite);
              hdr_mb0->pkt_len = hdr_mb0->data_len +
                vlib_buffer_length_in_chain (vm, orig_b0);
 
@@ -275,24 +329,33 @@ sr_replicate_node_fn (vlib_main_t * vm,
 
              vlib_buffer_init_for_free_list (hdr_b0, fl);
 
-             memcpy (hdr_b0->data, ip0, sizeof (*ip0));
-             memcpy (hdr_b0->data + sizeof (*ip0), t0->rewrite,
+             memcpy (hdr_b0->data, ip0, len_bytes);
+             memcpy (hdr_b0->data + len_bytes, t0->rewrite,
                      vec_len (t0->rewrite));
 
              hdr_b0->current_data = 0;
-             hdr_b0->current_length = sizeof (*ip0) + vec_len (t0->rewrite);
+             hdr_b0->current_length = len_bytes + vec_len (t0->rewrite);
              hdr_b0->flags = orig_b0->flags | VLIB_BUFFER_NEXT_PRESENT;
-
+             hdr_b0->trace_index = orig_b0->trace_index;
+             vnet_buffer (hdr_b0)->l2_classify.opaque_index = 0;
 
              hdr_b0->total_length_not_including_first_buffer =
                hdr_mb0->pkt_len - hdr_b0->current_length;
+             vnet_buffer (hdr_b0)->sw_if_index[VLIB_TX] = t0->tx_fib_index;
 
              hdr_ip0 = (ip6_header_t *) hdr_b0->data;
-             hdr_ip0->payload_length =
-               clib_host_to_net_u16 (hdr_mb0->data_len);
-             hdr_sr0 = (ip6_sr_header_t *) (hdr_ip0 + 1);
-             hdr_sr0->protocol = hdr_ip0->protocol;
-             hdr_ip0->protocol = 43;
+             new_l0 = clib_net_to_host_u16 (ip0->payload_length) +
+               vec_len (t0->rewrite);
+             hdr_ip0->payload_length = clib_host_to_net_u16 (new_l0);
+             hdr_sr0 = (ip6_sr_header_t *) ((u8 *) hdr_ip0 + len_bytes);
+             /* $$$ tune */
+             clib_memcpy (hdr_sr0, t0->rewrite, vec_len (t0->rewrite));
+             hdr_sr0->protocol = next_hdr;
+             hdr_ip0->protocol = ip_next_hdr;
+
+             /* Copy dst address into the DA slot in the segment list */
+             clib_memcpy (hdr_sr0->segments, ip0->dst_address.as_u64,
+                          sizeof (ip6_address_t));
 
              /* Rewrite the ip6 dst address */
              hdr_ip0->dst_address.as_u64[0] = t0->first_hop.as_u64[0];
@@ -318,7 +381,7 @@ sr_replicate_node_fn (vlib_main_t * vm,
              hdr_mb0->tx_offload = clone0->tx_offload;
              hdr_mb0->hash = clone0->hash;
 
-             hdr_mb0->ol_flags = clone0->ol_flags;
+             hdr_mb0->ol_flags = clone0->ol_flags & ~(IND_ATTACHED_MBUF);
 
              __rte_mbuf_sanity_check (hdr_mb0, 1);