reassembly: support more custom options for apps 12/19712/5
authorKlement Sekera <ksekera@cisco.com>
Mon, 20 May 2019 10:27:33 +0000 (12:27 +0200)
committerOle Trøan <otroan@employees.org>
Thu, 6 Jun 2019 08:28:29 +0000 (08:28 +0000)
Change-Id: Ib9f98fba5a724480ca95f11a762002c53e08df70
Signed-off-by: Klement Sekera <ksekera@cisco.com>
src/vnet/buffer.h
src/vnet/ip/ip4_reassembly.c
src/vnet/ip/ip6_reassembly.c

index 2144ed3..5114dc6 100644 (file)
@@ -189,6 +189,7 @@ typedef struct
          struct
          {
            u32 next_index;     /* index of next node - ignored if "feature" node */
+           u32 error_next_index;       /* index of next node if error - ignored if 'feature' node */
            u16 estimated_mtu;  /* estimated MTU calculated during reassembly */
            u16 owner_thread_index;
          };
index 763229c..73a83a9 100644 (file)
@@ -132,7 +132,11 @@ typedef struct
   // trace operation counter
   u32 trace_op_counter;
   // next index - used by non-feature node
-  u8 next_index;
+  u32 next_index;
+  // error next index - used by non-feature node
+  u32 error_next_index;
+  // is_feature flag stored for non-inline code use
+  bool is_feature;
   // minimum fragment length for this reassembly - used to estimate MTU
   u16 min_fragment_length;
   // number of fragments in this reassembly
@@ -332,8 +336,9 @@ ip4_reass_free (ip4_reass_main_t * rm, ip4_reass_per_thread_t * rt,
 }
 
 always_inline void
-ip4_reass_on_timeout (vlib_main_t * vm, ip4_reass_main_t * rm,
-                     ip4_reass_t * reass)
+ip4_reass_drop_all (vlib_main_t * vm, vlib_node_runtime_t * node,
+                   ip4_reass_main_t * rm, ip4_reass_t * reass,
+                   bool is_feature)
 {
   u32 range_bi = reass->first_bi;
   vlib_buffer_t *range_b;
@@ -360,14 +365,46 @@ ip4_reass_on_timeout (vlib_main_t * vm, ip4_reass_main_t * rm,
        }
       range_bi = range_vnb->ip.reass.next_range_bi;
     }
-  vlib_buffer_free (vm, to_free, vec_len (to_free));
-  vec_free (to_free);
+  /* send to next_error_index */
+  if (!(is_feature))
+    {
+      u32 n_left_to_next, *to_next, next_index;
+
+      next_index = reass->error_next_index;
+      u32 bi = ~0;
+
+      while (vec_len (to_free) > 0)
+       {
+         vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+         while (vec_len (to_free) > 0 && n_left_to_next > 0)
+           {
+             bi = vec_pop (to_free);
+
+             if (~0 != bi)
+               {
+                 to_next[0] = bi;
+                 to_next += 1;
+                 n_left_to_next -= 1;
+                 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+                                                  to_next, n_left_to_next,
+                                                  bi, next_index);
+               }
+           }
+         vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+       }
+    }
+  else
+    {
+      vlib_buffer_free (vm, to_free, vec_len (to_free));
+    }
 }
 
 static ip4_reass_t *
-ip4_reass_find_or_create (vlib_main_t * vm, ip4_reass_main_t * rm,
-                         ip4_reass_per_thread_t * rt, ip4_reass_kv_t * kv,
-                         u8 * do_handoff)
+ip4_reass_find_or_create (vlib_main_t * vm, vlib_node_runtime_t * node,
+                         ip4_reass_main_t * rm, ip4_reass_per_thread_t * rt,
+                         ip4_reass_kv_t * kv, u8 * do_handoff,
+                         bool is_feature)
 {
   ip4_reass_t *reass = NULL;
   f64 now = vlib_time_now (rm->vlib_main);
@@ -384,7 +421,7 @@ ip4_reass_find_or_create (vlib_main_t * vm, ip4_reass_main_t * rm,
 
       if (now > reass->last_heard + rm->timeout)
        {
-         ip4_reass_on_timeout (vm, rm, reass);
+         ip4_reass_drop_all (vm, node, rm, reass, is_feature);
          ip4_reass_free (rm, rt, reass);
          reass = NULL;
        }
@@ -410,6 +447,7 @@ ip4_reass_find_or_create (vlib_main_t * vm, ip4_reass_main_t * rm,
       reass->first_bi = ~0;
       reass->last_packet_octet = ~0;
       reass->data_len = 0;
+      reass->is_feature = is_feature;
       ++rt->reass_n;
     }
 
@@ -1020,7 +1058,8 @@ ip4_reassembly_inline (vlib_main_t * vm,
                    (u64) ip0->fragment_id << 32 | (u64) ip0->protocol << 48;
 
                  ip4_reass_t *reass =
-                   ip4_reass_find_or_create (vm, rm, rt, &kv, &do_handoff);
+                   ip4_reass_find_or_create (vm, node, rm, rt, &kv,
+                                             &do_handoff, is_feature);
 
                  if (PREDICT_FALSE (do_handoff))
                    {
@@ -1046,7 +1085,8 @@ ip4_reassembly_inline (vlib_main_t * vm,
                          vlib_node_increment_counter (vm, node->node_index,
                                                       IP4_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG,
                                                       1);
-                         ip4_reass_on_timeout (vm, rm, reass);
+                         ip4_reass_drop_all (vm, node, rm, reass,
+                                             is_feature);
                          ip4_reass_free (rm, rt, reass);
                          goto next_packet;
                          break;
@@ -1054,15 +1094,18 @@ ip4_reassembly_inline (vlib_main_t * vm,
                          vlib_node_increment_counter (vm, node->node_index,
                                                       IP4_ERROR_REASS_NO_BUF,
                                                       1);
-                         ip4_reass_on_timeout (vm, rm, reass);
+                         ip4_reass_drop_all (vm, node, rm, reass,
+                                             is_feature);
                          ip4_reass_free (rm, rt, reass);
                          goto next_packet;
                          break;
                        case IP4_REASS_RC_INTERNAL_ERROR:
+                         /* drop everything and start with a clean slate */
                          vlib_node_increment_counter (vm, node->node_index,
                                                       IP4_ERROR_REASS_INTERNAL_ERROR,
                                                       1);
-                         ip4_reass_on_timeout (vm, rm, reass);
+                         ip4_reass_drop_all (vm, node, rm, reass,
+                                             is_feature);
                          ip4_reass_free (rm, rt, reass);
                          goto next_packet;
                          break;
@@ -1369,7 +1412,7 @@ ip4_reass_walk_expired (vlib_main_t * vm,
           vec_foreach (i, pool_indexes_to_free)
           {
             ip4_reass_t *reass = pool_elt_at_index (rt->pool, i[0]);
-            ip4_reass_on_timeout (vm, rm, reass);
+            ip4_reass_drop_all (vm, node, rm, reass, reass->is_feature);
             ip4_reass_free (rm, rt, reass);
           }
           /* *INDENT-ON* */
index cb1cd9a..01f76aa 100644 (file)
@@ -111,7 +111,11 @@ typedef struct
   // trace operation counter
   u32 trace_op_counter;
   // next index - used by non-feature node
-  u8 next_index;
+  u32 next_index;
+  // error next index - used by non-feature node
+  u32 error_next_index;
+  // is_feature flag stored for non-inline code use
+  bool is_feature;
   // minimum fragment length for this reassembly - used to estimate MTU
   u16 min_fragment_length;
   // number of fragments for this reassembly
@@ -321,8 +325,9 @@ ip6_reass_free (ip6_reass_main_t * rm, ip6_reass_per_thread_t * rt,
 }
 
 always_inline void
-ip6_reass_drop_all (vlib_main_t * vm, ip6_reass_main_t * rm,
-                   ip6_reass_t * reass)
+ip6_reass_drop_all (vlib_main_t * vm, vlib_node_runtime_t * node,
+                   ip6_reass_main_t * rm, ip6_reass_t * reass,
+                   bool is_feature)
 {
   u32 range_bi = reass->first_bi;
   vlib_buffer_t *range_b;
@@ -349,50 +354,86 @@ ip6_reass_drop_all (vlib_main_t * vm, ip6_reass_main_t * rm,
        }
       range_bi = range_vnb->ip.reass.next_range_bi;
     }
-  vlib_buffer_free (vm, to_free, vec_len (to_free));
+  /* send to next_error_index */
+  if (!(is_feature))
+    {
+      u32 n_left_to_next, *to_next, next_index;
+
+      next_index = reass->error_next_index;
+      u32 bi = ~0;
+
+      while (vec_len (to_free) > 0)
+       {
+         vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+         while (vec_len (to_free) > 0 && n_left_to_next > 0)
+           {
+             bi = vec_pop (to_free);
+
+             if (~0 != bi)
+               {
+                 to_next[0] = bi;
+                 to_next += 1;
+                 n_left_to_next -= 1;
+                 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+                                                  to_next, n_left_to_next,
+                                                  bi, next_index);
+               }
+           }
+         vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+       }
+    }
+  else
+    {
+      vlib_buffer_free (vm, to_free, vec_len (to_free));
+    }
   vec_free (to_free);
 }
 
 always_inline void
 ip6_reass_on_timeout (vlib_main_t * vm, vlib_node_runtime_t * node,
                      ip6_reass_main_t * rm, ip6_reass_t * reass,
-                     u32 * icmp_bi)
+                     u32 * icmp_bi, bool is_feature)
 {
   if (~0 == reass->first_bi)
     {
       return;
     }
-  vlib_buffer_t *b = vlib_get_buffer (vm, reass->first_bi);
-  if (0 == vnet_buffer (b)->ip.reass.fragment_first)
+  if (is_feature)
     {
-      *icmp_bi = reass->first_bi;
-      if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED))
-       {
-         ip6_reass_add_trace (vm, node, rm, reass, reass->first_bi,
-                              ICMP_ERROR_RT_EXCEEDED, 0);
-       }
-      // fragment with offset zero received - send icmp message back
-      if (b->flags & VLIB_BUFFER_NEXT_PRESENT)
+      vlib_buffer_t *b = vlib_get_buffer (vm, reass->first_bi);
+      if (0 == vnet_buffer (b)->ip.reass.fragment_first)
        {
-         // separate first buffer from chain and steer it towards icmp node
-         b->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
-         reass->first_bi = b->next_buffer;
-       }
-      else
-       {
-         reass->first_bi = vnet_buffer (b)->ip.reass.next_range_bi;
+         *icmp_bi = reass->first_bi;
+         if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED))
+           {
+             ip6_reass_add_trace (vm, node, rm, reass, reass->first_bi,
+                                  ICMP_ERROR_RT_EXCEEDED, 0);
+           }
+         // fragment with offset zero received - send icmp message back
+         if (b->flags & VLIB_BUFFER_NEXT_PRESENT)
+           {
+             // separate first buffer from chain and steer it towards icmp node
+             b->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
+             reass->first_bi = b->next_buffer;
+           }
+         else
+           {
+             reass->first_bi = vnet_buffer (b)->ip.reass.next_range_bi;
+           }
+         icmp6_error_set_vnet_buffer (b, ICMP6_time_exceeded,
+                                      ICMP6_time_exceeded_fragment_reassembly_time_exceeded,
+                                      0);
        }
-      icmp6_error_set_vnet_buffer (b, ICMP6_time_exceeded,
-                                  ICMP6_time_exceeded_fragment_reassembly_time_exceeded,
-                                  0);
     }
-  ip6_reass_drop_all (vm, rm, reass);
+  ip6_reass_drop_all (vm, node, rm, reass, is_feature);
 }
 
 always_inline ip6_reass_t *
 ip6_reass_find_or_create (vlib_main_t * vm, vlib_node_runtime_t * node,
                          ip6_reass_main_t * rm, ip6_reass_per_thread_t * rt,
-                         ip6_reass_kv_t * kv, u32 * icmp_bi, u8 * do_handoff)
+                         ip6_reass_kv_t * kv, u32 * icmp_bi, u8 * do_handoff,
+                         bool is_feature)
 {
   ip6_reass_t *reass = NULL;
   f64 now = vlib_time_now (rm->vlib_main);
@@ -409,7 +450,7 @@ ip6_reass_find_or_create (vlib_main_t * vm, vlib_node_runtime_t * node,
 
       if (now > reass->last_heard + rm->timeout)
        {
-         ip6_reass_on_timeout (vm, node, rm, reass, icmp_bi);
+         ip6_reass_on_timeout (vm, node, rm, reass, icmp_bi, is_feature);
          ip6_reass_free (rm, rt, reass);
          reass = NULL;
        }
@@ -435,6 +476,7 @@ ip6_reass_find_or_create (vlib_main_t * vm, vlib_node_runtime_t * node,
       reass->first_bi = ~0;
       reass->last_packet_octet = ~0;
       reass->data_len = 0;
+      reass->is_feature = is_feature;
       ++rt->reass_n;
     }
 
@@ -718,6 +760,8 @@ ip6_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node,
   vlib_buffer_t *fb = vlib_get_buffer (vm, *bi0);
   vnet_buffer_opaque_t *fvnb = vnet_buffer (fb);
   reass->next_index = fvnb->ip.reass.next_index;       // store next_index before it's overwritten
+  reass->error_next_index = fvnb->ip.reass.error_next_index;   // store error_next_index before it is overwritten
+
   fvnb->ip.reass.ip6_frag_hdr_offset =
     (u8 *) frag_hdr - (u8 *) vlib_buffer_get_current (fb);
   ip6_header_t *fip = vlib_buffer_get_current (fb);
@@ -793,7 +837,7 @@ ip6_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node,
       else
        {
          // overlapping fragment - not allowed by RFC 8200
-         ip6_reass_drop_all (vm, rm, reass);
+         ip6_reass_drop_all (vm, node, rm, reass, is_feature);
          ip6_reass_free (rm, rt, reass);
          if (PREDICT_FALSE (fb->flags & VLIB_BUFFER_IS_TRACED))
            {
@@ -983,7 +1027,7 @@ ip6_reassembly_inline (vlib_main_t * vm,
 
          ip6_reass_t *reass =
            ip6_reass_find_or_create (vm, node, rm, rt, &kv, &icmp_bi,
-                                     &do_handoff);
+                                     &do_handoff, is_feature);
 
          if (PREDICT_FALSE (do_handoff))
            {
@@ -1007,22 +1051,23 @@ ip6_reassembly_inline (vlib_main_t * vm,
                  vlib_node_increment_counter (vm, node->node_index,
                                               IP6_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG,
                                               1);
-                 ip6_reass_drop_all (vm, rm, reass);
+                 ip6_reass_drop_all (vm, node, rm, reass, is_feature);
                  ip6_reass_free (rm, rt, reass);
                  goto next_packet;
                  break;
                case IP6_REASS_RC_NO_BUF:
                  vlib_node_increment_counter (vm, node->node_index,
                                               IP6_ERROR_REASS_NO_BUF, 1);
-                 ip6_reass_drop_all (vm, rm, reass);
+                 ip6_reass_drop_all (vm, node, rm, reass, is_feature);
                  ip6_reass_free (rm, rt, reass);
                  goto next_packet;
                  break;
                case IP6_REASS_RC_INTERNAL_ERROR:
+                 /* drop everything and start with a clean slate */
                  vlib_node_increment_counter (vm, node->node_index,
                                               IP6_ERROR_REASS_INTERNAL_ERROR,
                                               1);
-                 ip6_reass_drop_all (vm, rm, reass);
+                 ip6_reass_drop_all (vm, node, rm, reass, is_feature);
                  ip6_reass_free (rm, rt, reass);
                  goto next_packet;
                  break;
@@ -1030,7 +1075,15 @@ ip6_reassembly_inline (vlib_main_t * vm,
            }
          else
            {
-             next0 = IP6_REASSEMBLY_NEXT_DROP;
+             if (is_feature)
+               {
+                 next0 = IP6_REASSEMBLY_NEXT_DROP;
+               }
+             else
+               {
+                 vnet_buffer_opaque_t *fvnb = vnet_buffer (b0);
+                 next0 = fvnb->ip.reass.error_next_index;
+               }
              error0 = IP6_ERROR_REASS_LIMIT_REACHED;
            }
 
@@ -1354,7 +1407,7 @@ ip6_reass_walk_expired (vlib_main_t * vm,
                     b->flags &= ~VLIB_BUFFER_IS_TRACED;
                   }
               }
-            ip6_reass_on_timeout (vm, node, rm, reass, &icmp_bi);
+            ip6_reass_on_timeout (vm, node, rm, reass, &icmp_bi, reass->is_feature);
             if (~0 != icmp_bi)
               {
                 vec_add1 (vec_icmp_bi, icmp_bi);