vlib: packet tracer support for pkt thread handoffs
[vpp.git] / src / vnet / ip / ip6_reassembly.c
index 0c86145..a8b6d2c 100644 (file)
 #define IP6_REASS_TIMEOUT_DEFAULT_MS 100
 #define IP6_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS 10000        // 10 seconds default
 #define IP6_REASS_MAX_REASSEMBLIES_DEFAULT 1024
+#define IP6_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT 3
 #define IP6_REASS_HT_LOAD_FACTOR (0.75)
 
 typedef enum
 {
   IP6_REASS_RC_OK,
   IP6_REASS_RC_INTERNAL_ERROR,
+  IP6_REASS_RC_TOO_MANY_FRAGMENTS,
   IP6_REASS_RC_NO_BUF,
 } ip6_reass_rc_t;
 
@@ -108,10 +110,14 @@ typedef struct
   u32 data_len;
   // trace operation counter
   u32 trace_op_counter;
-  // next index - used by non-feature node
-  u8 next_index;
+  // next index - used by custom apps (~0 if not set)
+  u32 next_index;
+  // error next index - used by custom apps (~0 if not set)
+  u32 error_next_index;
   // minimum fragment length for this reassembly - used to estimate MTU
   u16 min_fragment_length;
+  // number of fragments for this reassembly
+  u32 fragments_n;
 } ip6_reass_t;
 
 typedef struct
@@ -128,6 +134,9 @@ typedef struct
   u32 timeout_ms;
   f64 timeout;
   u32 expire_walk_interval_ms;
+  // maximum number of fragments in one reassembly
+  u32 max_reass_len;
+  // maximum number of reassemblies
   u32 max_reass_n;
 
   // IPv6 runtime
@@ -151,7 +160,11 @@ typedef struct
 
 } ip6_reass_main_t;
 
+extern ip6_reass_main_t ip6_reass_main;
+
+#ifndef CLIB_MARCH_VARIANT
 ip6_reass_main_t ip6_reass_main;
+#endif /* CLIB_MARCH_VARIANT */
 
 typedef enum
 {
@@ -267,12 +280,6 @@ ip6_reass_add_trace (vlib_main_t * vm, vlib_node_runtime_t * node,
 {
   vlib_buffer_t *b = vlib_get_buffer (vm, bi);
   vnet_buffer_opaque_t *vnb = vnet_buffer (b);
-  if (pool_is_free_index (vm->trace_main.trace_buffer_pool, b->trace_index))
-    {
-      // this buffer's trace is gone
-      b->flags &= ~VLIB_BUFFER_IS_TRACED;
-      return;
-    }
   ip6_reass_trace_t *t = vlib_add_trace (vm, node, b, sizeof (t[0]));
   t->reass_id = reass->id;
   t->action = action;
@@ -310,8 +317,8 @@ ip6_reass_free (ip6_reass_main_t * rm, ip6_reass_per_thread_t * rt,
 }
 
 always_inline void
-ip6_reass_drop_all (vlib_main_t * vm, ip6_reass_main_t * rm,
-                   ip6_reass_t * reass)
+ip6_reass_drop_all (vlib_main_t * vm, vlib_node_runtime_t * node,
+                   ip6_reass_main_t * rm, ip6_reass_t * reass)
 {
   u32 range_bi = reass->first_bi;
   vlib_buffer_t *range_b;
@@ -338,7 +345,39 @@ ip6_reass_drop_all (vlib_main_t * vm, ip6_reass_main_t * rm,
        }
       range_bi = range_vnb->ip.reass.next_range_bi;
     }
-  vlib_buffer_free (vm, to_free, vec_len (to_free));
+  /* send to next_error_index */
+  if (~0 != reass->error_next_index)
+    {
+      u32 n_left_to_next, *to_next, next_index;
+
+      next_index = reass->error_next_index;
+      u32 bi = ~0;
+
+      while (vec_len (to_free) > 0)
+       {
+         vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+         while (vec_len (to_free) > 0 && n_left_to_next > 0)
+           {
+             bi = vec_pop (to_free);
+
+             if (~0 != bi)
+               {
+                 to_next[0] = bi;
+                 to_next += 1;
+                 n_left_to_next -= 1;
+                 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+                                                  to_next, n_left_to_next,
+                                                  bi, next_index);
+               }
+           }
+         vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+       }
+    }
+  else
+    {
+      vlib_buffer_free (vm, to_free, vec_len (to_free));
+    }
   vec_free (to_free);
 }
 
@@ -351,31 +390,34 @@ ip6_reass_on_timeout (vlib_main_t * vm, vlib_node_runtime_t * node,
     {
       return;
     }
-  vlib_buffer_t *b = vlib_get_buffer (vm, reass->first_bi);
-  if (0 == vnet_buffer (b)->ip.reass.fragment_first)
+  if (~0 == reass->next_index) // custom apps don't want icmp
     {
-      *icmp_bi = reass->first_bi;
-      if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED))
-       {
-         ip6_reass_add_trace (vm, node, rm, reass, reass->first_bi,
-                              ICMP_ERROR_RT_EXCEEDED, 0);
-       }
-      // fragment with offset zero received - send icmp message back
-      if (b->flags & VLIB_BUFFER_NEXT_PRESENT)
-       {
-         // separate first buffer from chain and steer it towards icmp node
-         b->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
-         reass->first_bi = b->next_buffer;
-       }
-      else
+      vlib_buffer_t *b = vlib_get_buffer (vm, reass->first_bi);
+      if (0 == vnet_buffer (b)->ip.reass.fragment_first)
        {
-         reass->first_bi = vnet_buffer (b)->ip.reass.next_range_bi;
+         *icmp_bi = reass->first_bi;
+         if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED))
+           {
+             ip6_reass_add_trace (vm, node, rm, reass, reass->first_bi,
+                                  ICMP_ERROR_RT_EXCEEDED, 0);
+           }
+         // fragment with offset zero received - send icmp message back
+         if (b->flags & VLIB_BUFFER_NEXT_PRESENT)
+           {
+             // separate first buffer from chain and steer it towards icmp node
+             b->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
+             reass->first_bi = b->next_buffer;
+           }
+         else
+           {
+             reass->first_bi = vnet_buffer (b)->ip.reass.next_range_bi;
+           }
+         icmp6_error_set_vnet_buffer (b, ICMP6_time_exceeded,
+                                      ICMP6_time_exceeded_fragment_reassembly_time_exceeded,
+                                      0);
        }
-      icmp6_error_set_vnet_buffer (b, ICMP6_time_exceeded,
-                                  ICMP6_time_exceeded_fragment_reassembly_time_exceeded,
-                                  0);
     }
-  ip6_reass_drop_all (vm, rm, reass);
+  ip6_reass_drop_all (vm, node, rm, reass);
 }
 
 always_inline ip6_reass_t *
@@ -424,6 +466,8 @@ ip6_reass_find_or_create (vlib_main_t * vm, vlib_node_runtime_t * node,
       reass->first_bi = ~0;
       reass->last_packet_octet = ~0;
       reass->data_len = 0;
+      reass->next_index = ~0;
+      reass->error_next_index = ~0;
       ++rt->reass_n;
     }
 
@@ -450,7 +494,7 @@ always_inline ip6_reass_rc_t
 ip6_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node,
                    ip6_reass_main_t * rm, ip6_reass_per_thread_t * rt,
                    ip6_reass_t * reass, u32 * bi0, u32 * next0, u32 * error0,
-                   bool is_feature)
+                   bool is_custom_app)
 {
   *bi0 = reass->first_bi;
   *error0 = IP6_ERROR_NONE;
@@ -622,6 +666,7 @@ ip6_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node,
       rv = IP6_REASS_RC_NO_BUF;
       goto free_buffers_and_return;
     }
+  first_b->flags &= ~VLIB_BUFFER_EXT_HDR_VALID;
   if (PREDICT_FALSE (first_b->flags & VLIB_BUFFER_IS_TRACED))
     {
       ip6_reass_add_trace (vm, node, rm, reass, reass->first_bi, FINALIZE, 0);
@@ -652,7 +697,7 @@ ip6_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node,
       while (0);
 #endif
     }
-  if (is_feature)
+  if (!is_custom_app)
     {
       *next0 = IP6_REASSEMBLY_NEXT_INPUT;
     }
@@ -700,12 +745,17 @@ always_inline ip6_reass_rc_t
 ip6_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node,
                  ip6_reass_main_t * rm, ip6_reass_per_thread_t * rt,
                  ip6_reass_t * reass, u32 * bi0, u32 * next0, u32 * error0,
-                 ip6_frag_hdr_t * frag_hdr, bool is_feature)
+                 ip6_frag_hdr_t * frag_hdr, bool is_custom_app)
 {
   int consumed = 0;
   vlib_buffer_t *fb = vlib_get_buffer (vm, *bi0);
   vnet_buffer_opaque_t *fvnb = vnet_buffer (fb);
-  reass->next_index = fvnb->ip.reass.next_index;       // store next_index before it's overwritten
+  if (is_custom_app)
+    {
+      reass->next_index = fvnb->ip.reass.next_index;   // store next_index before it's overwritten
+      reass->error_next_index = fvnb->ip.reass.error_next_index;       // store error_next_index before it is overwritten
+    }
+
   fvnb->ip.reass.ip6_frag_hdr_offset =
     (u8 *) frag_hdr - (u8 *) vlib_buffer_get_current (fb);
   ip6_header_t *fip = vlib_buffer_get_current (fb);
@@ -740,6 +790,7 @@ ip6_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node,
                                       *bi0);
       reass->min_fragment_length = clib_net_to_host_u16 (fip->payload_length);
       consumed = 1;
+      reass->fragments_n = 1;
       goto check_if_done_maybe;
     }
   reass->min_fragment_length =
@@ -780,7 +831,7 @@ ip6_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node,
       else
        {
          // overlapping fragment - not allowed by RFC 8200
-         ip6_reass_drop_all (vm, rm, reass);
+         ip6_reass_drop_all (vm, node, rm, reass);
          ip6_reass_free (rm, rt, reass);
          if (PREDICT_FALSE (fb->flags & VLIB_BUFFER_IS_TRACED))
            {
@@ -793,6 +844,7 @@ ip6_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node,
        }
       break;
     }
+  ++reass->fragments_n;
 check_if_done_maybe:
   if (consumed)
     {
@@ -805,13 +857,17 @@ check_if_done_maybe:
       reass->data_len == reass->last_packet_octet + 1)
     {
       return ip6_reass_finalize (vm, node, rm, rt, reass, bi0, next0, error0,
-                                is_feature);
+                                is_custom_app);
     }
   else
     {
       if (consumed)
        {
          *bi0 = ~0;
+         if (reass->fragments_n > rm->max_reass_len)
+           {
+             return IP6_REASS_RC_TOO_MANY_FRAGMENTS;
+           }
        }
       else
        {
@@ -890,9 +946,9 @@ ip6_reass_verify_packet_size_lt_64k (vlib_main_t * vm,
 }
 
 always_inline uword
-ip6_reassembly_inline (vlib_main_t * vm,
-                      vlib_node_runtime_t * node,
-                      vlib_frame_t * frame, bool is_feature)
+ip6_reassembly_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
+                      vlib_frame_t * frame, bool is_feature,
+                      bool is_custom_app)
 {
   u32 *from = vlib_frame_vector_args (frame);
   u32 n_left_from, n_left_to_next, *to_next, next_index;
@@ -980,16 +1036,32 @@ ip6_reassembly_inline (vlib_main_t * vm,
          else if (reass)
            {
              switch (ip6_reass_update (vm, node, rm, rt, reass, &bi0, &next0,
-                                       &error0, frag_hdr, is_feature))
+                                       &error0, frag_hdr, is_custom_app))
                {
                case IP6_REASS_RC_OK:
                  /* nothing to do here */
                  break;
+               case IP6_REASS_RC_TOO_MANY_FRAGMENTS:
+                 vlib_node_increment_counter (vm, node->node_index,
+                                              IP6_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG,
+                                              1);
+                 ip6_reass_drop_all (vm, node, rm, reass);
+                 ip6_reass_free (rm, rt, reass);
+                 goto next_packet;
+                 break;
                case IP6_REASS_RC_NO_BUF:
-                 /* fallthrough */
+                 vlib_node_increment_counter (vm, node->node_index,
+                                              IP6_ERROR_REASS_NO_BUF, 1);
+                 ip6_reass_drop_all (vm, node, rm, reass);
+                 ip6_reass_free (rm, rt, reass);
+                 goto next_packet;
+                 break;
                case IP6_REASS_RC_INTERNAL_ERROR:
                  /* drop everything and start with a clean slate */
-                 ip6_reass_drop_all (vm, rm, reass);
+                 vlib_node_increment_counter (vm, node->node_index,
+                                              IP6_ERROR_REASS_INTERNAL_ERROR,
+                                              1);
+                 ip6_reass_drop_all (vm, node, rm, reass);
                  ip6_reass_free (rm, rt, reass);
                  goto next_packet;
                  break;
@@ -997,7 +1069,15 @@ ip6_reassembly_inline (vlib_main_t * vm,
            }
          else
            {
-             next0 = IP6_REASSEMBLY_NEXT_DROP;
+             if (is_feature)
+               {
+                 next0 = IP6_REASSEMBLY_NEXT_DROP;
+               }
+             else
+               {
+                 vnet_buffer_opaque_t *fvnb = vnet_buffer (b0);
+                 next0 = fvnb->ip.reass.error_next_index;
+               }
              error0 = IP6_ERROR_REASS_LIMIT_REACHED;
            }
 
@@ -1046,16 +1126,15 @@ static char *ip6_reassembly_error_strings[] = {
 #undef _
 };
 
-static uword
-ip6_reassembly (vlib_main_t * vm, vlib_node_runtime_t * node,
-               vlib_frame_t * frame)
+VLIB_NODE_FN (ip6_reass_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
+                              vlib_frame_t * frame)
 {
-  return ip6_reassembly_inline (vm, node, frame, false /* is_feature */ );
+  return ip6_reassembly_inline (vm, node, frame, false /* is_feature */ ,
+                               false /* is_custom_app */ );
 }
 
 /* *INDENT-OFF* */
-VLIB_REGISTER_NODE (ip6_reass_node, static) = {
-    .function = ip6_reassembly,
+VLIB_REGISTER_NODE (ip6_reass_node) = {
     .name = "ip6-reassembly",
     .vector_size = sizeof (u32),
     .format_trace = format_ip6_reass_trace,
@@ -1072,18 +1151,16 @@ VLIB_REGISTER_NODE (ip6_reass_node, static) = {
 };
 /* *INDENT-ON* */
 
-VLIB_NODE_FUNCTION_MULTIARCH (ip6_reass_node, ip6_reassembly);
-
-static uword
-ip6_reassembly_feature (vlib_main_t * vm,
-                       vlib_node_runtime_t * node, vlib_frame_t * frame)
+VLIB_NODE_FN (ip6_reass_node_feature) (vlib_main_t * vm,
+                                      vlib_node_runtime_t * node,
+                                      vlib_frame_t * frame)
 {
-  return ip6_reassembly_inline (vm, node, frame, true /* is_feature */ );
+  return ip6_reassembly_inline (vm, node, frame, true /* is_feature */ ,
+                               false /* is_custom_app */ );
 }
 
 /* *INDENT-OFF* */
-VLIB_REGISTER_NODE (ip6_reass_node_feature, static) = {
-    .function = ip6_reassembly_feature,
+VLIB_REGISTER_NODE (ip6_reass_node_feature) = {
     .name = "ip6-reassembly-feature",
     .vector_size = sizeof (u32),
     .format_trace = format_ip6_reass_trace,
@@ -1100,8 +1177,6 @@ VLIB_REGISTER_NODE (ip6_reass_node_feature, static) = {
 };
 /* *INDENT-ON* */
 
-VLIB_NODE_FUNCTION_MULTIARCH (ip6_reass_node_feature, ip6_reassembly_feature);
-
 /* *INDENT-OFF* */
 VNET_FEATURE_INIT (ip6_reassembly_feature, static) = {
     .arc_name = "ip6-unicast",
@@ -1111,6 +1186,7 @@ VNET_FEATURE_INIT (ip6_reassembly_feature, static) = {
 };
 /* *INDENT-ON* */
 
+#ifndef CLIB_MARCH_VARIANT
 static u32
 ip6_reass_get_nbuckets ()
 {
@@ -1127,12 +1203,14 @@ ip6_reass_get_nbuckets ()
 
   return nbuckets;
 }
+#endif /* CLIB_MARCH_VARIANT */
 
 typedef enum
 {
   IP6_EVENT_CONFIG_CHANGED = 1,
 } ip6_reass_event_t;
 
+#ifndef CLIB_MARCH_VARIANT
 typedef struct
 {
   int failure;
@@ -1151,20 +1229,21 @@ ip6_rehash_cb (clib_bihash_kv_48_8_t * kv, void *_ctx)
 
 static void
 ip6_reass_set_params (u32 timeout_ms, u32 max_reassemblies,
-                     u32 expire_walk_interval_ms)
+                     u32 max_reassembly_length, u32 expire_walk_interval_ms)
 {
   ip6_reass_main.timeout_ms = timeout_ms;
   ip6_reass_main.timeout = (f64) timeout_ms / (f64) MSEC_PER_SEC;
   ip6_reass_main.max_reass_n = max_reassemblies;
+  ip6_reass_main.max_reass_len = max_reassembly_length;
   ip6_reass_main.expire_walk_interval_ms = expire_walk_interval_ms;
 }
 
 vnet_api_error_t
 ip6_reass_set (u32 timeout_ms, u32 max_reassemblies,
-              u32 expire_walk_interval_ms)
+              u32 max_reassembly_length, u32 expire_walk_interval_ms)
 {
   u32 old_nbuckets = ip6_reass_get_nbuckets ();
-  ip6_reass_set_params (timeout_ms, max_reassemblies,
+  ip6_reass_set_params (timeout_ms, max_reassemblies, max_reassembly_length,
                        expire_walk_interval_ms);
   vlib_process_signal_event (ip6_reass_main.vlib_main,
                             ip6_reass_main.ip6_reass_expire_node_idx,
@@ -1231,6 +1310,7 @@ ip6_reass_init_function (vlib_main_t * vm)
 
   ip6_reass_set_params (IP6_REASS_TIMEOUT_DEFAULT_MS,
                        IP6_REASS_MAX_REASSEMBLIES_DEFAULT,
+                       IP6_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT,
                        IP6_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS);
 
   nbuckets = ip6_reass_get_nbuckets ();
@@ -1256,6 +1336,7 @@ ip6_reass_init_function (vlib_main_t * vm)
 }
 
 VLIB_INIT_FUNCTION (ip6_reass_init_function);
+#endif /* CLIB_MARCH_VARIANT */
 
 static uword
 ip6_reass_walk_expired (vlib_main_t * vm,
@@ -1312,21 +1393,10 @@ ip6_reass_walk_expired (vlib_main_t * vm,
           {
             ip6_reass_t *reass = pool_elt_at_index (rt->pool, i[0]);
             u32 icmp_bi = ~0;
-            vlib_buffer_t *b = vlib_get_buffer (vm, reass->first_bi);
-            if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED))
-              {
-                if (pool_is_free_index (vm->trace_main.trace_buffer_pool,
-                                        b->trace_index))
-                  {
-                    /* the trace is gone, don't trace this buffer anymore */
-                    b->flags &= ~VLIB_BUFFER_IS_TRACED;
-                  }
-              }
             ip6_reass_on_timeout (vm, node, rm, reass, &icmp_bi);
             if (~0 != icmp_bi)
-              {
-                vec_add1 (vec_icmp_bi, icmp_bi);
-              }
+              vec_add1 (vec_icmp_bi, icmp_bi);
+
             ip6_reass_free (rm, rt, reass);
           }
           /* *INDENT-ON* */
@@ -1346,18 +1416,7 @@ ip6_reass_walk_expired (vlib_main_t * vm,
              u32 bi = vec_pop (vec_icmp_bi);
              vlib_buffer_t *b = vlib_get_buffer (vm, bi);
              if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED))
-               {
-                 if (pool_is_free_index (vm->trace_main.trace_buffer_pool,
-                                         b->trace_index))
-                   {
-                     /* the trace is gone, don't trace this buffer anymore */
-                     b->flags &= ~VLIB_BUFFER_IS_TRACED;
-                   }
-                 else
-                   {
-                     trace_frame = 1;
-                   }
-               }
+               trace_frame = 1;
              b->error = node->errors[IP6_ERROR_REASS_TIMEOUT];
              to_next[0] = bi;
              ++f->n_vectors;
@@ -1379,8 +1438,6 @@ ip6_reass_walk_expired (vlib_main_t * vm,
   return 0;
 }
 
-static vlib_node_registration_t ip6_reass_expire_node;
-
 /* *INDENT-OFF* */
 VLIB_REGISTER_NODE (ip6_reass_expire_node, static) = {
     .function = ip6_reass_walk_expired,
@@ -1493,12 +1550,14 @@ VLIB_CLI_COMMAND (show_ip6_reassembly_cmd, static) = {
 };
 /* *INDENT-ON* */
 
+#ifndef CLIB_MARCH_VARIANT
 vnet_api_error_t
 ip6_reass_enable_disable (u32 sw_if_index, u8 enable_disable)
 {
   return vnet_feature_enable_disable ("ip6-unicast", "ip6-reassembly-feature",
                                      sw_if_index, enable_disable, 0, 0);
 }
+#endif /* CLIB_MARCH_VARIANT */
 
 #define foreach_ip6_reassembly_handoff_error                       \
 _(CONGESTION_DROP, "congestion drop")