ip: reassembly: drop zero length fragments
[vpp.git] / src / vnet / ip / reass / ip6_full_reass.c
index 9ec40cd..c9509e3 100644 (file)
@@ -25,6 +25,7 @@
 #include <vnet/ip/ip.h>
 #include <vppinfra/bihash_48_8.h>
 #include <vnet/ip/reass/ip6_full_reass.h>
+#include <vnet/ip/ip6_inlines.h>
 
 #define MSEC_PER_SEC 1000
 #define IP6_FULL_REASS_TIMEOUT_DEFAULT_MS 100
@@ -40,6 +41,7 @@ typedef enum
   IP6_FULL_REASS_RC_TOO_MANY_FRAGMENTS,
   IP6_FULL_REASS_RC_NO_BUF,
   IP6_FULL_REASS_RC_HANDOFF,
+  IP6_FULL_REASS_RC_INVALID_FRAG_LEN,
 } ip6_full_reass_rc_t;
 
 typedef struct
@@ -497,11 +499,11 @@ ip6_full_reass_on_timeout (vlib_main_t * vm, vlib_node_runtime_t * node,
 }
 
 always_inline ip6_full_reass_t *
-ip6_full_reass_find_or_create (vlib_main_t * vm, vlib_node_runtime_t * node,
-                              ip6_full_reass_main_t * rm,
-                              ip6_full_reass_per_thread_t * rt,
-                              ip6_full_reass_kv_t * kv, u32 * icmp_bi,
-                              u8 * do_handoff)
+ip6_full_reass_find_or_create (vlib_main_t *vm, vlib_node_runtime_t *node,
+                              ip6_full_reass_main_t *rm,
+                              ip6_full_reass_per_thread_t *rt,
+                              ip6_full_reass_kv_t *kv, u32 *icmp_bi,
+                              u8 *do_handoff, int skip_bihash)
 {
   ip6_full_reass_t *reass;
   f64 now;
@@ -511,7 +513,7 @@ again:
   reass = NULL;
   now = vlib_time_now (vm);
 
-  if (!clib_bihash_search_48_8 (&rm->hash, &kv->kv, &kv->kv))
+  if (!skip_bihash && !clib_bihash_search_48_8 (&rm->hash, &kv->kv, &kv->kv))
     {
       if (vm->thread_index != kv->v.memory_owner_thread_index)
        {
@@ -557,24 +559,37 @@ again:
       ++rt->reass_n;
     }
 
-  reass->key.as_u64[0] = kv->kv.key[0];
-  reass->key.as_u64[1] = kv->kv.key[1];
-  reass->key.as_u64[2] = kv->kv.key[2];
-  reass->key.as_u64[3] = kv->kv.key[3];
-  reass->key.as_u64[4] = kv->kv.key[4];
-  reass->key.as_u64[5] = kv->kv.key[5];
   kv->v.reass_index = (reass - rt->pool);
   kv->v.memory_owner_thread_index = vm->thread_index;
   reass->last_heard = now;
 
-  int rv = clib_bihash_add_del_48_8 (&rm->hash, &kv->kv, 2);
-  if (rv)
+  if (!skip_bihash)
     {
-      ip6_full_reass_free (rm, rt, reass);
-      reass = NULL;
-      // if other worker created a context already work with the other copy
-      if (-2 == rv)
-       goto again;
+      reass->key.as_u64[0] = kv->kv.key[0];
+      reass->key.as_u64[1] = kv->kv.key[1];
+      reass->key.as_u64[2] = kv->kv.key[2];
+      reass->key.as_u64[3] = kv->kv.key[3];
+      reass->key.as_u64[4] = kv->kv.key[4];
+      reass->key.as_u64[5] = kv->kv.key[5];
+
+      int rv = clib_bihash_add_del_48_8 (&rm->hash, &kv->kv, 2);
+      if (rv)
+       {
+         ip6_full_reass_free (rm, rt, reass);
+         reass = NULL;
+         // if other worker created a context already work with the other copy
+         if (-2 == rv)
+           goto again;
+       }
+    }
+  else
+    {
+      reass->key.as_u64[0] = ~0;
+      reass->key.as_u64[1] = ~0;
+      reass->key.as_u64[2] = ~0;
+      reass->key.as_u64[3] = ~0;
+      reass->key.as_u64[4] = ~0;
+      reass->key.as_u64[5] = ~0;
     }
 
   return reass;
@@ -729,19 +744,27 @@ ip6_full_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node,
   vnet_buffer_opaque_t *first_b_vnb = vnet_buffer (first_b);
   ip6_header_t *ip = vlib_buffer_get_current (first_b);
   u16 ip6_frag_hdr_offset = first_b_vnb->ip.reass.ip6_frag_hdr_offset;
-  ip6_ext_header_t *prev_hdr;
-  frag_hdr =
-    ip6_ext_header_find (vm, first_b, ip, IP_PROTOCOL_IPV6_FRAGMENTATION,
-                        &prev_hdr);
-  if (prev_hdr)
+  ip6_ext_hdr_chain_t hdr_chain;
+  ip6_ext_header_t *prev_hdr = 0;
+  int res = ip6_ext_header_walk (first_b, ip, IP_PROTOCOL_IPV6_FRAGMENTATION,
+                                &hdr_chain);
+  if (res < 0 ||
+      (hdr_chain.eh[res].protocol != IP_PROTOCOL_IPV6_FRAGMENTATION))
+    {
+      rv = IP6_FULL_REASS_RC_INTERNAL_ERROR;
+      goto free_buffers_and_return;
+    }
+  frag_hdr = ip6_ext_next_header_offset (ip, hdr_chain.eh[res].offset);
+  if (res > 0)
     {
+      prev_hdr = ip6_ext_next_header_offset (ip, hdr_chain.eh[res - 1].offset);
       prev_hdr->next_hdr = frag_hdr->next_hdr;
     }
   else
     {
       ip->protocol = frag_hdr->next_hdr;
     }
-  if (!((u8 *) frag_hdr - (u8 *) ip == ip6_frag_hdr_offset))
+  if (hdr_chain.eh[res].offset != ip6_frag_hdr_offset)
     {
       rv = IP6_FULL_REASS_RC_INTERNAL_ERROR;
       goto free_buffers_and_return;
@@ -834,12 +857,13 @@ ip6_full_reass_insert_range_in_chain (vlib_main_t * vm,
 }
 
 always_inline ip6_full_reass_rc_t
-ip6_full_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node,
-                      ip6_full_reass_main_t * rm,
-                      ip6_full_reass_per_thread_t * rt,
-                      ip6_full_reass_t * reass, u32 * bi0, u32 * next0,
-                      u32 * error0, ip6_frag_hdr_t * frag_hdr,
-                      bool is_custom_app, u32 * handoff_thread_idx)
+ip6_full_reass_update (vlib_main_t *vm, vlib_node_runtime_t *node,
+                      ip6_full_reass_main_t *rm,
+                      ip6_full_reass_per_thread_t *rt,
+                      ip6_full_reass_t *reass, u32 *bi0, u32 *next0,
+                      u32 *error0, ip6_frag_hdr_t *frag_hdr,
+                      bool is_custom_app, u32 *handoff_thread_idx,
+                      int skip_bihash)
 {
   int consumed = 0;
   vlib_buffer_t *fb = vlib_get_buffer (vm, *bi0);
@@ -865,6 +889,10 @@ ip6_full_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node,
   u32 fragment_length =
     vlib_buffer_length_in_chain (vm, fb) -
     (fvnb->ip.reass.ip6_frag_hdr_offset + sizeof (*frag_hdr));
+  if (0 == fragment_length)
+    {
+      return IP6_FULL_REASS_RC_INVALID_FRAG_LEN;
+    }
   u32 fragment_last = fvnb->ip.reass.fragment_last =
     fragment_first + fragment_length - 1;
   int more_fragments = ip6_frag_hdr_more (frag_hdr);
@@ -947,6 +975,12 @@ check_if_done_maybe:
                                    ~0);
        }
     }
+  else if (skip_bihash)
+    {
+      // if this reassembly is not in bihash, then the packet must have been
+      // consumed
+      return IP6_FULL_REASS_RC_INTERNAL_ERROR;
+    }
   if (~0 != reass->last_packet_octet &&
       reass->data_len == reass->last_packet_octet + 1)
     {
@@ -964,6 +998,12 @@ check_if_done_maybe:
     }
   else
     {
+      if (skip_bihash)
+       {
+         // if this reassembly is not in bihash, it should've been an atomic
+         // fragment and thus finalized
+         return IP6_FULL_REASS_RC_INTERNAL_ERROR;
+       }
       if (consumed)
        {
          *bi0 = ~0;
@@ -982,22 +1022,18 @@ check_if_done_maybe:
 }
 
 always_inline bool
-ip6_full_reass_verify_upper_layer_present (vlib_node_runtime_t * node,
-                                          vlib_buffer_t * b,
-                                          ip6_frag_hdr_t * frag_hdr)
+ip6_full_reass_verify_upper_layer_present (vlib_node_runtime_t *node,
+                                          vlib_buffer_t *b,
+                                          ip6_ext_hdr_chain_t *hc)
 {
-  ip6_ext_header_t *tmp = (ip6_ext_header_t *) frag_hdr;
-  while (ip6_ext_hdr (tmp->next_hdr))
-    {
-      tmp = ip6_ext_next_header (tmp);
-    }
-  if (IP_PROTOCOL_IP6_NONXT == tmp->next_hdr)
+  int nh = hc->eh[hc->length - 1].protocol;
+  /* Checking to see if it's a terminating header */
+  if (ip6_ext_hdr (nh))
     {
-      icmp6_error_set_vnet_buffer (b, ICMP6_parameter_problem,
-                                  ICMP6_parameter_problem_first_fragment_has_incomplete_header_chain,
-                                  0);
+      icmp6_error_set_vnet_buffer (
+       b, ICMP6_parameter_problem,
+       ICMP6_parameter_problem_first_fragment_has_incomplete_header_chain, 0);
       b->error = node->errors[IP6_ERROR_REASS_MISSING_UPPER];
-
       return false;
     }
   return true;
@@ -1076,29 +1112,27 @@ ip6_full_reassembly_inline (vlib_main_t * vm,
          b0 = vlib_get_buffer (vm, bi0);
 
          ip6_header_t *ip0 = vlib_buffer_get_current (b0);
-         ip6_frag_hdr_t *frag_hdr = NULL;
-         ip6_ext_header_t *prev_hdr;
-         if (ip6_ext_hdr (ip0->protocol))
-           {
-             frag_hdr =
-               ip6_ext_header_find (vm, b0, ip0,
-                                    IP_PROTOCOL_IPV6_FRAGMENTATION,
-                                    &prev_hdr);
-           }
-         if (!frag_hdr)
+         ip6_frag_hdr_t *frag_hdr;
+         ip6_ext_hdr_chain_t hdr_chain;
+         int res = ip6_ext_header_walk (
+           b0, ip0, IP_PROTOCOL_IPV6_FRAGMENTATION, &hdr_chain);
+         if (res < 0 ||
+             hdr_chain.eh[res].protocol != IP_PROTOCOL_IPV6_FRAGMENTATION)
            {
-             // this is a regular packet - no fragmentation
-             next0 = IP6_FULL_REASSEMBLY_NEXT_INPUT;
+             // this is a mangled packet - no fragmentation
+             next0 = IP6_FULL_REASSEMBLY_NEXT_DROP;
              goto skip_reass;
            }
+         frag_hdr =
+           ip6_ext_next_header_offset (ip0, hdr_chain.eh[res].offset);
          vnet_buffer (b0)->ip.reass.ip6_frag_hdr_offset =
-           (u8 *) frag_hdr - (u8 *) ip0;
+           hdr_chain.eh[res].offset;
 
          if (0 == ip6_frag_hdr_offset (frag_hdr))
            {
              // first fragment - verify upper-layer is present
-             if (!ip6_full_reass_verify_upper_layer_present
-                 (node, b0, frag_hdr))
+             if (!ip6_full_reass_verify_upper_layer_present (node, b0,
+                                                             &hdr_chain))
                {
                  next0 = IP6_FULL_REASSEMBLY_NEXT_ICMP_ERROR;
                  goto skip_reass;
@@ -1110,22 +1144,33 @@ ip6_full_reassembly_inline (vlib_main_t * vm,
              next0 = IP6_FULL_REASSEMBLY_NEXT_ICMP_ERROR;
              goto skip_reass;
            }
+
+         int skip_bihash = 0;
          ip6_full_reass_kv_t kv;
          u8 do_handoff = 0;
 
-         kv.k.as_u64[0] = ip0->src_address.as_u64[0];
-         kv.k.as_u64[1] = ip0->src_address.as_u64[1];
-         kv.k.as_u64[2] = ip0->dst_address.as_u64[0];
-         kv.k.as_u64[3] = ip0->dst_address.as_u64[1];
-         kv.k.as_u64[4] =
-           ((u64) vec_elt (ip6_main.fib_index_by_sw_if_index,
-                           vnet_buffer (b0)->sw_if_index[VLIB_RX])) << 32 |
-           (u64) frag_hdr->identification;
-         kv.k.as_u64[5] = ip0->protocol;
+         if (0 == ip6_frag_hdr_offset (frag_hdr) &&
+             !ip6_frag_hdr_more (frag_hdr))
+           {
+             // this is atomic fragment and needs to be processed separately
+             skip_bihash = 1;
+           }
+         else
+           {
+             kv.k.as_u64[0] = ip0->src_address.as_u64[0];
+             kv.k.as_u64[1] = ip0->src_address.as_u64[1];
+             kv.k.as_u64[2] = ip0->dst_address.as_u64[0];
+             kv.k.as_u64[3] = ip0->dst_address.as_u64[1];
+             kv.k.as_u64[4] =
+               ((u64) vec_elt (ip6_main.fib_index_by_sw_if_index,
+                               vnet_buffer (b0)->sw_if_index[VLIB_RX]))
+                 << 32 |
+               (u64) frag_hdr->identification;
+             kv.k.as_u64[5] = ip0->protocol;
+           }
 
-         ip6_full_reass_t *reass =
-           ip6_full_reass_find_or_create (vm, node, rm, rt, &kv, &icmp_bi,
-                                          &do_handoff);
+         ip6_full_reass_t *reass = ip6_full_reass_find_or_create (
+           vm, node, rm, rt, &kv, &icmp_bi, &do_handoff, skip_bihash);
 
          if (reass)
            {
@@ -1144,9 +1189,10 @@ ip6_full_reassembly_inline (vlib_main_t * vm,
          else if (reass)
            {
              u32 handoff_thread_idx;
-             switch (ip6_full_reass_update
-                     (vm, node, rm, rt, reass, &bi0, &next0, &error0,
-                      frag_hdr, is_custom_app, &handoff_thread_idx))
+             u32 counter = ~0;
+             switch (ip6_full_reass_update (
+               vm, node, rm, rt, reass, &bi0, &next0, &error0, frag_hdr,
+               is_custom_app, &handoff_thread_idx, skip_bihash))
                {
                case IP6_FULL_REASS_RC_OK:
                  /* nothing to do here */
@@ -1158,28 +1204,25 @@ ip6_full_reassembly_inline (vlib_main_t * vm,
                    handoff_thread_idx;
                  break;
                case IP6_FULL_REASS_RC_TOO_MANY_FRAGMENTS:
-                 vlib_node_increment_counter (vm, node->node_index,
-                                              IP6_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG,
-                                              1);
-                 ip6_full_reass_drop_all (vm, node, reass);
-                 ip6_full_reass_free (rm, rt, reass);
-                 goto next_packet;
+                 counter = IP6_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG;
                  break;
                case IP6_FULL_REASS_RC_NO_BUF:
-                 vlib_node_increment_counter (vm, node->node_index,
-                                              IP6_ERROR_REASS_NO_BUF, 1);
-                 ip6_full_reass_drop_all (vm, node, reass);
-                 ip6_full_reass_free (rm, rt, reass);
-                 goto next_packet;
+                 counter = IP6_ERROR_REASS_NO_BUF;
                  break;
                case IP6_FULL_REASS_RC_INTERNAL_ERROR:
-                 vlib_node_increment_counter (vm, node->node_index,
-                                              IP6_ERROR_REASS_INTERNAL_ERROR,
+                 counter = IP6_ERROR_REASS_INTERNAL_ERROR;
+                 break;
+               case IP6_FULL_REASS_RC_INVALID_FRAG_LEN:
+                 counter = IP6_ERROR_REASS_INVALID_FRAG_LEN;
+                 break;
+               }
+             if (~0 != counter)
+               {
+                 vlib_node_increment_counter (vm, node->node_index, counter,
                                               1);
                  ip6_full_reass_drop_all (vm, node, reass);
                  ip6_full_reass_free (rm, rt, reass);
                  goto next_packet;
-                 break;
                }
            }
          else