ip: reassembly: handle atomic fragments correctly 00/34700/3
authorKlement Sekera <ksekera@cisco.com>
Tue, 7 Dec 2021 09:49:53 +0000 (09:49 +0000)
committerOle Tr�an <otroan@employees.org>
Fri, 10 Dec 2021 08:41:00 +0000 (08:41 +0000)
If a fragment arrives with fragment offset = 0 and M = 0, it means that
this is actually a complete packet and per RFC 8200, it should be
treated independently from other fragments. This patch does that.
Fragmentation header is stripped and fragment is forwarded irregardles
of other existing reassemblies in case of full reassembly and treated
the same way as regular packet in shallow virtual reassembly.

Type: improvement
Change-Id: If3322d5e3160cd755b8465a642702a9166d46cc2
Signed-off-by: Klement Sekera <ksekera@cisco.com>
src/vnet/ip/reass/ip6_full_reass.c
src/vnet/ip/reass/ip6_sv_reass.c
test/framework.py
test/test_reassembly.py

index 901da99..fc7fa18 100644 (file)
@@ -498,11 +498,11 @@ ip6_full_reass_on_timeout (vlib_main_t * vm, vlib_node_runtime_t * node,
 }
 
 always_inline ip6_full_reass_t *
-ip6_full_reass_find_or_create (vlib_main_t * vm, vlib_node_runtime_t * node,
-                              ip6_full_reass_main_t * rm,
-                              ip6_full_reass_per_thread_t * rt,
-                              ip6_full_reass_kv_t * kv, u32 * icmp_bi,
-                              u8 * do_handoff)
+ip6_full_reass_find_or_create (vlib_main_t *vm, vlib_node_runtime_t *node,
+                              ip6_full_reass_main_t *rm,
+                              ip6_full_reass_per_thread_t *rt,
+                              ip6_full_reass_kv_t *kv, u32 *icmp_bi,
+                              u8 *do_handoff, int skip_bihash)
 {
   ip6_full_reass_t *reass;
   f64 now;
@@ -512,7 +512,7 @@ again:
   reass = NULL;
   now = vlib_time_now (vm);
 
-  if (!clib_bihash_search_48_8 (&rm->hash, &kv->kv, &kv->kv))
+  if (!skip_bihash && !clib_bihash_search_48_8 (&rm->hash, &kv->kv, &kv->kv))
     {
       if (vm->thread_index != kv->v.memory_owner_thread_index)
        {
@@ -558,24 +558,37 @@ again:
       ++rt->reass_n;
     }
 
-  reass->key.as_u64[0] = kv->kv.key[0];
-  reass->key.as_u64[1] = kv->kv.key[1];
-  reass->key.as_u64[2] = kv->kv.key[2];
-  reass->key.as_u64[3] = kv->kv.key[3];
-  reass->key.as_u64[4] = kv->kv.key[4];
-  reass->key.as_u64[5] = kv->kv.key[5];
   kv->v.reass_index = (reass - rt->pool);
   kv->v.memory_owner_thread_index = vm->thread_index;
   reass->last_heard = now;
 
-  int rv = clib_bihash_add_del_48_8 (&rm->hash, &kv->kv, 2);
-  if (rv)
+  if (!skip_bihash)
     {
-      ip6_full_reass_free (rm, rt, reass);
-      reass = NULL;
-      // if other worker created a context already work with the other copy
-      if (-2 == rv)
-       goto again;
+      reass->key.as_u64[0] = kv->kv.key[0];
+      reass->key.as_u64[1] = kv->kv.key[1];
+      reass->key.as_u64[2] = kv->kv.key[2];
+      reass->key.as_u64[3] = kv->kv.key[3];
+      reass->key.as_u64[4] = kv->kv.key[4];
+      reass->key.as_u64[5] = kv->kv.key[5];
+
+      int rv = clib_bihash_add_del_48_8 (&rm->hash, &kv->kv, 2);
+      if (rv)
+       {
+         ip6_full_reass_free (rm, rt, reass);
+         reass = NULL;
+         // if other worker created a context already work with the other copy
+         if (-2 == rv)
+           goto again;
+       }
+    }
+  else
+    {
+      reass->key.as_u64[0] = ~0;
+      reass->key.as_u64[1] = ~0;
+      reass->key.as_u64[2] = ~0;
+      reass->key.as_u64[3] = ~0;
+      reass->key.as_u64[4] = ~0;
+      reass->key.as_u64[5] = ~0;
     }
 
   return reass;
@@ -843,12 +856,13 @@ ip6_full_reass_insert_range_in_chain (vlib_main_t * vm,
 }
 
 always_inline ip6_full_reass_rc_t
-ip6_full_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node,
-                      ip6_full_reass_main_t * rm,
-                      ip6_full_reass_per_thread_t * rt,
-                      ip6_full_reass_t * reass, u32 * bi0, u32 * next0,
-                      u32 * error0, ip6_frag_hdr_t * frag_hdr,
-                      bool is_custom_app, u32 * handoff_thread_idx)
+ip6_full_reass_update (vlib_main_t *vm, vlib_node_runtime_t *node,
+                      ip6_full_reass_main_t *rm,
+                      ip6_full_reass_per_thread_t *rt,
+                      ip6_full_reass_t *reass, u32 *bi0, u32 *next0,
+                      u32 *error0, ip6_frag_hdr_t *frag_hdr,
+                      bool is_custom_app, u32 *handoff_thread_idx,
+                      int skip_bihash)
 {
   int consumed = 0;
   vlib_buffer_t *fb = vlib_get_buffer (vm, *bi0);
@@ -956,6 +970,12 @@ check_if_done_maybe:
                                    ~0);
        }
     }
+  else if (skip_bihash)
+    {
+      // if this reassembly is not in bihash, then the packet must have been
+      // consumed
+      return IP6_FULL_REASS_RC_INTERNAL_ERROR;
+    }
   if (~0 != reass->last_packet_octet &&
       reass->data_len == reass->last_packet_octet + 1)
     {
@@ -973,6 +993,12 @@ check_if_done_maybe:
     }
   else
     {
+      if (skip_bihash)
+       {
+         // if this reassembly is not in bihash, it should've been an atomic
+         // fragment and thus finalized
+         return IP6_FULL_REASS_RC_INTERNAL_ERROR;
+       }
       if (consumed)
        {
          *bi0 = ~0;
@@ -1113,22 +1139,33 @@ ip6_full_reassembly_inline (vlib_main_t * vm,
              next0 = IP6_FULL_REASSEMBLY_NEXT_ICMP_ERROR;
              goto skip_reass;
            }
+
+         int skip_bihash = 0;
          ip6_full_reass_kv_t kv;
          u8 do_handoff = 0;
 
-         kv.k.as_u64[0] = ip0->src_address.as_u64[0];
-         kv.k.as_u64[1] = ip0->src_address.as_u64[1];
-         kv.k.as_u64[2] = ip0->dst_address.as_u64[0];
-         kv.k.as_u64[3] = ip0->dst_address.as_u64[1];
-         kv.k.as_u64[4] =
-           ((u64) vec_elt (ip6_main.fib_index_by_sw_if_index,
-                           vnet_buffer (b0)->sw_if_index[VLIB_RX])) << 32 |
-           (u64) frag_hdr->identification;
-         kv.k.as_u64[5] = ip0->protocol;
+         if (0 == ip6_frag_hdr_offset (frag_hdr) &&
+             !ip6_frag_hdr_more (frag_hdr))
+           {
+             // this is atomic fragment and needs to be processed separately
+             skip_bihash = 1;
+           }
+         else
+           {
+             kv.k.as_u64[0] = ip0->src_address.as_u64[0];
+             kv.k.as_u64[1] = ip0->src_address.as_u64[1];
+             kv.k.as_u64[2] = ip0->dst_address.as_u64[0];
+             kv.k.as_u64[3] = ip0->dst_address.as_u64[1];
+             kv.k.as_u64[4] =
+               ((u64) vec_elt (ip6_main.fib_index_by_sw_if_index,
+                               vnet_buffer (b0)->sw_if_index[VLIB_RX]))
+                 << 32 |
+               (u64) frag_hdr->identification;
+             kv.k.as_u64[5] = ip0->protocol;
+           }
 
-         ip6_full_reass_t *reass =
-           ip6_full_reass_find_or_create (vm, node, rm, rt, &kv, &icmp_bi,
-                                          &do_handoff);
+         ip6_full_reass_t *reass = ip6_full_reass_find_or_create (
+           vm, node, rm, rt, &kv, &icmp_bi, &do_handoff, skip_bihash);
 
          if (reass)
            {
@@ -1148,9 +1185,9 @@ ip6_full_reassembly_inline (vlib_main_t * vm,
            {
              u32 handoff_thread_idx;
              u32 counter = ~0;
-             switch (ip6_full_reass_update
-                     (vm, node, rm, rt, reass, &bi0, &next0, &error0,
-                      frag_hdr, is_custom_app, &handoff_thread_idx))
+             switch (ip6_full_reass_update (
+               vm, node, rm, rt, reass, &bi0, &next0, &error0, frag_hdr,
+               is_custom_app, &handoff_thread_idx, skip_bihash))
                {
                case IP6_FULL_REASS_RC_OK:
                  /* nothing to do here */
index fb435ba..3656c5a 100644 (file)
@@ -215,7 +215,7 @@ format_ip6_sv_reass_trace (u8 * s, va_list * args)
                clib_net_to_host_u16 (t->l4_dst_port));
       break;
     case REASS_PASSTHROUGH:
-      s = format (s, "[not-fragmented]");
+      s = format (s, "[not fragmented or atomic fragment]");
       break;
     }
   return s;
@@ -532,13 +532,24 @@ ip6_sv_reassembly_inline (vlib_main_t * vm,
          ip6_header_t *ip0 = vlib_buffer_get_current (b0);
          ip6_frag_hdr_t *frag_hdr;
          ip6_ext_hdr_chain_t hdr_chain;
+         bool is_atomic_fragment = false;
 
          int res = ip6_ext_header_walk (
            b0, ip0, IP_PROTOCOL_IPV6_FRAGMENTATION, &hdr_chain);
+         if (res >= 0 &&
+             hdr_chain.eh[res].protocol == IP_PROTOCOL_IPV6_FRAGMENTATION)
+           {
+             frag_hdr =
+               ip6_ext_next_header_offset (ip0, hdr_chain.eh[res].offset);
+             is_atomic_fragment = (0 == ip6_frag_hdr_offset (frag_hdr) &&
+                                   !ip6_frag_hdr_more (frag_hdr));
+           }
+
          if (res < 0 ||
-             hdr_chain.eh[res].protocol != IP_PROTOCOL_IPV6_FRAGMENTATION)
+             hdr_chain.eh[res].protocol != IP_PROTOCOL_IPV6_FRAGMENTATION ||
+             is_atomic_fragment)
            {
-             // this is a regular packet - no fragmentation
+             // this is a regular unfragmented packet or an atomic fragment
              if (!ip6_get_port
                  (vm, b0, ip0, b0->current_length,
                   &(vnet_buffer (b0)->ip.reass.ip_proto),
@@ -565,10 +576,10 @@ ip6_sv_reassembly_inline (vlib_main_t * vm,
                }
              goto packet_enqueue;
            }
-         frag_hdr =
-           ip6_ext_next_header_offset (ip0, hdr_chain.eh[res].offset);
+
          vnet_buffer (b0)->ip.reass.ip6_frag_hdr_offset =
            hdr_chain.eh[res].offset;
+
          if (0 == ip6_frag_hdr_offset (frag_hdr))
            {
              // first fragment - verify upper-layer is present
index 02e455b..f4b168b 100644 (file)
@@ -1293,7 +1293,8 @@ class VppTestCase(CPUInterface, unittest.TestCase):
         self.pg_enable_capture(self.pg_interfaces)
         self.pg_start(trace=trace)
 
-    def send_and_assert_no_replies(self, intf, pkts, remark="", timeout=None):
+    def send_and_assert_no_replies(self, intf, pkts, remark="", timeout=None,
+                                   trace=True):
         self.pg_send(intf, pkts)
         if not timeout:
             timeout = 1
@@ -1301,6 +1302,8 @@ class VppTestCase(CPUInterface, unittest.TestCase):
             i.get_capture(0, timeout=timeout)
             i.assert_nothing_captured(remark=remark)
             timeout = 0.1
+        if trace:
+            self.logger.debug(self.vapi.cli("show trace"))
 
     def send_and_expect(self, intf, pkts, output, n_rx=None, worker=None,
                         trace=True):
index 254b2ae..bd622a9 100644 (file)
@@ -1448,13 +1448,54 @@ class TestIPv6Reassembly(VppTestCase):
                     nh=44, plen=2) /
                IPv6ExtHdrFragment(nh=6))
 
-        self.send_and_assert_no_replies(self.pg0, [pkt], self.pg0)
+        self.send_and_assert_no_replies(self.pg0, [pkt])
 
         pkt = (Ether(src=self.pg0.local_mac, dst=self.pg0.remote_mac) /
                IPv6(src=self.pg0.remote_ip6, dst=self.pg0.remote_ip6) /
                ICMPv6EchoRequest())
         rx = self.send_and_expect(self.pg0, [pkt], self.pg0)
 
+    def test_one_fragment(self):
+        """ whole packet in one fragment processed independently """
+        pkt = (Ether(src=self.pg0.local_mac, dst=self.pg0.remote_mac) /
+               IPv6(src=self.pg0.remote_ip6, dst=self.pg0.local_ip6) /
+               ICMPv6EchoRequest()/Raw('X' * 1600))
+        frags = fragment_rfc8200(pkt, 1, 400)
+
+        # send a fragment with known id
+        self.send_and_assert_no_replies(self.pg0, [frags[0]])
+
+        # send an atomic fragment with same id - should be reassembled
+        pkt = (Ether(src=self.pg0.local_mac, dst=self.pg0.remote_mac) /
+               IPv6(src=self.pg0.remote_ip6, dst=self.pg0.local_ip6) /
+               IPv6ExtHdrFragment(id=1)/ICMPv6EchoRequest())
+        rx = self.send_and_expect(self.pg0, [pkt], self.pg0)
+        self.assertNotIn(IPv6ExtHdrFragment, rx)
+
+        # now finish the original reassembly, this should still be possible
+        rx = self.send_and_expect(self.pg0, frags[1:], self.pg0, n_rx=1)
+        self.assertNotIn(IPv6ExtHdrFragment, rx)
+
+    def test_bunch_of_fragments(self):
+        """ valid fragments followed by rogue fragments and atomic fragment"""
+        pkt = (Ether(src=self.pg0.local_mac, dst=self.pg0.remote_mac) /
+               IPv6(src=self.pg0.remote_ip6, dst=self.pg0.local_ip6) /
+               ICMPv6EchoRequest()/Raw('X' * 1600))
+        frags = fragment_rfc8200(pkt, 1, 400)
+        self.send_and_expect(self.pg0, frags, self.pg0, n_rx=1)
+
+        inc_frag = (Ether(src=self.pg0.local_mac, dst=self.pg0.remote_mac) /
+                    IPv6(src=self.pg0.remote_ip6, dst=self.pg0.local_ip6) /
+                    IPv6ExtHdrFragment(id=1, nh=58, offset=608)/Raw('X'*308))
+
+        self.send_and_assert_no_replies(self.pg0, inc_frag*604)
+
+        pkt = (Ether(src=self.pg0.local_mac, dst=self.pg0.remote_mac) /
+               IPv6(src=self.pg0.remote_ip6, dst=self.pg0.local_ip6) /
+               IPv6ExtHdrFragment(id=1)/ICMPv6EchoRequest())
+        rx = self.send_and_expect(self.pg0, [pkt], self.pg0)
+        self.assertNotIn(IPv6ExtHdrFragment, rx)
+
 
 class TestIPv6MWReassembly(VppTestCase):
     """ IPv6 Reassembly (multiple workers) """
@@ -1838,6 +1879,44 @@ class TestIPv6SVReassembly(VppTestCase):
             self.assertEqual(sent[IPv6].dst, recvd[IPv6].dst)
             self.assertEqual(sent[Raw].payload, recvd[Raw].payload)
 
+    def test_one_fragment(self):
+        """ whole packet in one fragment processed independently """
+        pkt = (Ether(src=self.src_if.local_mac, dst=self.src_if.remote_mac) /
+               IPv6(src=self.src_if.remote_ip6, dst=self.dst_if.remote_ip6) /
+               ICMPv6EchoRequest()/Raw('X' * 1600))
+        frags = fragment_rfc8200(pkt, 1, 400)
+
+        # send a fragment with known id
+        self.send_and_expect(self.src_if, [frags[0]], self.dst_if)
+
+        # send an atomic fragment with same id - should be reassembled
+        pkt = (Ether(src=self.src_if.local_mac, dst=self.src_if.remote_mac) /
+               IPv6(src=self.src_if.remote_ip6, dst=self.dst_if.remote_ip6) /
+               IPv6ExtHdrFragment(id=1)/ICMPv6EchoRequest())
+        rx = self.send_and_expect(self.src_if, [pkt], self.dst_if)
+
+        # now forward packets matching original reassembly, should still work
+        rx = self.send_and_expect(self.src_if, frags[1:], self.dst_if)
+
+    def test_bunch_of_fragments(self):
+        """ valid fragments followed by rogue fragments and atomic fragment"""
+        pkt = (Ether(src=self.src_if.local_mac, dst=self.src_if.remote_mac) /
+               IPv6(src=self.src_if.remote_ip6, dst=self.dst_if.remote_ip6) /
+               ICMPv6EchoRequest()/Raw('X' * 1600))
+        frags = fragment_rfc8200(pkt, 1, 400)
+        rx = self.send_and_expect(self.src_if, frags, self.dst_if)
+
+        rogue = (Ether(src=self.src_if.local_mac, dst=self.src_if.remote_mac) /
+                 IPv6(src=self.src_if.remote_ip6, dst=self.dst_if.remote_ip6) /
+                 IPv6ExtHdrFragment(id=1, nh=58, offset=608)/Raw('X'*308))
+
+        self.send_and_expect(self.src_if, rogue*604, self.dst_if)
+
+        pkt = (Ether(src=self.src_if.local_mac, dst=self.src_if.remote_mac) /
+               IPv6(src=self.src_if.remote_ip6, dst=self.dst_if.remote_ip6) /
+               IPv6ExtHdrFragment(id=1)/ICMPv6EchoRequest())
+        rx = self.send_and_expect(self.src_if, [pkt], self.dst_if)
+
 
 class TestIPv4ReassemblyLocalNode(VppTestCase):
     """ IPv4 Reassembly for packets coming to ip4-local node """