#define IP4_REASS_TIMEOUT_DEFAULT_MS 100
#define IP4_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS 10000 // 10 seconds default
#define IP4_REASS_MAX_REASSEMBLIES_DEFAULT 1024
+#define IP4_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT 3
#define IP4_REASS_HT_LOAD_FACTOR (0.75)
#define IP4_REASS_DEBUG_BUFFERS 0
typedef enum
{
IP4_REASS_RC_OK,
+ IP4_REASS_RC_TOO_MANY_FRAGMENTS,
IP4_REASS_RC_INTERNAL_ERROR,
IP4_REASS_RC_NO_BUF,
} ip4_reass_rc_t;
// trace operation counter
u32 trace_op_counter;
// next index - used by non-feature node
- u8 next_index;
+ u32 next_index;
+ // error next index - used by custom apps (~0 if not used)
+ u32 error_next_index;
// minimum fragment length for this reassembly - used to estimate MTU
u16 min_fragment_length;
-
+ // number of fragments in this reassembly
+ u32 fragments_n;
} ip4_reass_t;
typedef struct
u32 timeout_ms;
f64 timeout;
u32 expire_walk_interval_ms;
+ // maximum number of fragments in one reassembly
+ u32 max_reass_len;
+ // maximum number of reassemblies
u32 max_reass_n;
// IPv4 runtime
{
vlib_buffer_t *b = vlib_get_buffer (vm, bi);
vnet_buffer_opaque_t *vnb = vnet_buffer (b);
- if (pool_is_free_index (vm->trace_main.trace_buffer_pool, b->trace_index))
- {
- // this buffer's trace is gone
- b->flags &= ~VLIB_BUFFER_IS_TRACED;
- return;
- }
ip4_reass_trace_t *t = vlib_add_trace (vm, node, b, sizeof (t[0]));
t->reass_id = reass->id;
t->action = action;
}
always_inline void
-ip4_reass_on_timeout (vlib_main_t * vm, ip4_reass_main_t * rm,
- ip4_reass_t * reass)
+ip4_reass_drop_all (vlib_main_t * vm, vlib_node_runtime_t * node,
+ ip4_reass_main_t * rm, ip4_reass_t * reass)
{
u32 range_bi = reass->first_bi;
vlib_buffer_t *range_b;
}
range_bi = range_vnb->ip.reass.next_range_bi;
}
- vlib_buffer_free (vm, to_free, vec_len (to_free));
- vec_free (to_free);
+ /* send to next_error_index */
+ if (~0 != reass->error_next_index)
+ {
+ u32 n_left_to_next, *to_next, next_index;
+
+ next_index = reass->error_next_index;
+ u32 bi = ~0;
+
+ while (vec_len (to_free) > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (vec_len (to_free) > 0 && n_left_to_next > 0)
+ {
+ bi = vec_pop (to_free);
+
+ if (~0 != bi)
+ {
+ to_next[0] = bi;
+ to_next += 1;
+ n_left_to_next -= 1;
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi, next_index);
+ }
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ }
+ else
+ {
+ vlib_buffer_free (vm, to_free, vec_len (to_free));
+ }
}
static ip4_reass_t *
-ip4_reass_find_or_create (vlib_main_t * vm, ip4_reass_main_t * rm,
- ip4_reass_per_thread_t * rt, ip4_reass_kv_t * kv,
- u8 * do_handoff)
+ip4_reass_find_or_create (vlib_main_t * vm, vlib_node_runtime_t * node,
+ ip4_reass_main_t * rm, ip4_reass_per_thread_t * rt,
+ ip4_reass_kv_t * kv, u8 * do_handoff)
{
ip4_reass_t *reass = NULL;
f64 now = vlib_time_now (rm->vlib_main);
if (now > reass->last_heard + rm->timeout)
{
- ip4_reass_on_timeout (vm, rm, reass);
+ ip4_reass_drop_all (vm, node, rm, reass);
ip4_reass_free (rm, rt, reass);
reass = NULL;
}
reass->first_bi = ~0;
reass->last_packet_octet = ~0;
reass->data_len = 0;
+ reass->next_index = ~0;
+ reass->error_next_index = ~0;
++rt->reass_n;
}
ip4_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node,
ip4_reass_main_t * rm, ip4_reass_per_thread_t * rt,
ip4_reass_t * reass, u32 * bi0, u32 * next0, u32 * error0,
- bool is_feature)
+ bool is_custom_app)
{
vlib_buffer_t *first_b = vlib_get_buffer (vm, reass->first_bi);
vlib_buffer_t *last_b = NULL;
if (trim_front > tmp->current_length)
{
/* drop whole buffer */
- vlib_buffer_free_one (vm, tmp_bi);
+ u32 to_be_freed_bi = tmp_bi;
trim_front -= tmp->current_length;
if (!(tmp->flags & VLIB_BUFFER_NEXT_PRESENT))
{
}
tmp->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
tmp_bi = tmp->next_buffer;
+ tmp->next_buffer = 0;
tmp = vlib_get_buffer (vm, tmp_bi);
+ vlib_buffer_free_one (vm, to_be_freed_bi);
continue;
}
else
}
}
total_length += tmp->current_length;
+ if (tmp->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ tmp_bi = tmp->next_buffer;
+ tmp = vlib_get_buffer (vm, tmp->next_buffer);
+ }
+ else
+ {
+ break;
+ }
}
else
{
- vlib_buffer_free_one (vm, tmp_bi);
+ u32 to_be_freed_bi = tmp_bi;
if (reass->first_bi == tmp_bi)
{
return IP4_REASS_RC_INTERNAL_ERROR;
}
- }
- if (tmp->flags & VLIB_BUFFER_NEXT_PRESENT)
- {
- tmp_bi = tmp->next_buffer;
- tmp = vlib_get_buffer (vm, tmp->next_buffer);
- }
- else
- {
- break;
+ if (tmp->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ tmp->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
+ tmp_bi = tmp->next_buffer;
+ tmp->next_buffer = 0;
+ tmp = vlib_get_buffer (vm, tmp_bi);
+ vlib_buffer_free_one (vm, to_be_freed_bi);
+ }
+ else
+ {
+ tmp->next_buffer = 0;
+ vlib_buffer_free_one (vm, to_be_freed_bi);
+ break;
+ }
}
}
sub_chain_bi =
return IP4_REASS_RC_INTERNAL_ERROR;
}
last_b->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
+
if (total_length < first_b->current_length)
{
return IP4_REASS_RC_INTERNAL_ERROR;
{
return IP4_REASS_RC_NO_BUF;
}
-
+ // reset to reconstruct the mbuf linking
+ first_b->flags &= ~VLIB_BUFFER_EXT_HDR_VALID;
if (PREDICT_FALSE (first_b->flags & VLIB_BUFFER_IS_TRACED))
{
ip4_reass_add_trace (vm, node, rm, reass, reass->first_bi, FINALIZE, 0);
#endif
}
*bi0 = reass->first_bi;
- if (is_feature)
+ if (!is_custom_app)
{
*next0 = IP4_REASSEMBLY_NEXT_INPUT;
}
reass->data_len -= ip4_reass_buffer_get_data_len (discard_b);
while (1)
{
- vlib_buffer_free_one (vm, discard_bi);
+ u32 to_be_freed_bi = discard_bi;
if (PREDICT_FALSE (discard_b->flags & VLIB_BUFFER_IS_TRACED))
{
ip4_reass_add_trace (vm, node, rm, reass, discard_bi, RANGE_DISCARD,
{
discard_b->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
discard_bi = discard_b->next_buffer;
+ discard_b->next_buffer = 0;
discard_b = vlib_get_buffer (vm, discard_bi);
+ vlib_buffer_free_one (vm, to_be_freed_bi);
}
else
{
+ discard_b->next_buffer = 0;
+ vlib_buffer_free_one (vm, to_be_freed_bi);
break;
}
}
ip4_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node,
ip4_reass_main_t * rm, ip4_reass_per_thread_t * rt,
ip4_reass_t * reass, u32 * bi0, u32 * next0, u32 * error0,
- bool is_feature)
+ bool is_custom_app)
{
ip4_reass_rc_t rc = IP4_REASS_RC_OK;
int consumed = 0;
vlib_buffer_t *fb = vlib_get_buffer (vm, *bi0);
ip4_header_t *fip = vlib_buffer_get_current (fb);
vnet_buffer_opaque_t *fvnb = vnet_buffer (fb);
- reass->next_index = fvnb->ip.reass.next_index; // store next_index before it's overwritten
+ if (is_custom_app)
+ {
+ // store (error_)next_index before it's overwritten
+ reass->next_index = fvnb->ip.reass.next_index;
+ reass->error_next_index = fvnb->ip.reass.error_next_index;
+ }
const u32 fragment_first = ip4_get_fragment_offset_bytes (fip);
const u32 fragment_length =
clib_net_to_host_u16 (fip->length) - ip4_header_bytes (fip);
}
*bi0 = ~0;
reass->min_fragment_length = clib_net_to_host_u16 (fip->length);
+ reass->fragments_n = 1;
return IP4_REASS_RC_OK;
}
reass->min_fragment_length = clib_min (clib_net_to_host_u16 (fip->length),
}
break;
}
+ ++reass->fragments_n;
if (consumed)
{
if (PREDICT_FALSE (fb->flags & VLIB_BUFFER_IS_TRACED))
reass->data_len == reass->last_packet_octet + 1)
{
return ip4_reass_finalize (vm, node, rm, rt, reass, bi0, next0, error0,
- is_feature);
+ is_custom_app);
}
else
{
if (consumed)
{
*bi0 = ~0;
+ if (reass->fragments_n > rm->max_reass_len)
+ {
+ rc = IP4_REASS_RC_TOO_MANY_FRAGMENTS;
+ }
}
else
{
}
always_inline uword
-ip4_reassembly_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame, bool is_feature)
+ip4_reassembly_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * frame, bool is_feature,
+ bool is_custom_app)
{
u32 *from = vlib_frame_vector_args (frame);
u32 n_left_from, n_left_to_next, *to_next, next_index;
if (!ip4_get_fragment_more (ip0) && !ip4_get_fragment_offset (ip0))
{
// this is a whole packet - no fragmentation
- if (is_feature)
+ if (!is_custom_app)
{
next0 = IP4_REASSEMBLY_NEXT_INPUT;
}
(u64) ip0->fragment_id << 32 | (u64) ip0->protocol << 48;
ip4_reass_t *reass =
- ip4_reass_find_or_create (vm, rm, rt, &kv, &do_handoff);
+ ip4_reass_find_or_create (vm, node, rm, rt, &kv,
+ &do_handoff);
if (PREDICT_FALSE (do_handoff))
{
{
switch (ip4_reass_update
(vm, node, rm, rt, reass, &bi0, &next0,
- &error0, is_feature))
+ &error0, is_custom_app))
{
case IP4_REASS_RC_OK:
/* nothing to do here */
break;
+ case IP4_REASS_RC_TOO_MANY_FRAGMENTS:
+ vlib_node_increment_counter (vm, node->node_index,
+ IP4_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG,
+ 1);
+ ip4_reass_drop_all (vm, node, rm, reass);
+ ip4_reass_free (rm, rt, reass);
+ goto next_packet;
+ break;
case IP4_REASS_RC_NO_BUF:
- /* fallthrough */
+ vlib_node_increment_counter (vm, node->node_index,
+ IP4_ERROR_REASS_NO_BUF,
+ 1);
+ ip4_reass_drop_all (vm, node, rm, reass);
+ ip4_reass_free (rm, rt, reass);
+ goto next_packet;
+ break;
case IP4_REASS_RC_INTERNAL_ERROR:
/* drop everything and start with a clean slate */
- ip4_reass_on_timeout (vm, rm, reass);
+ vlib_node_increment_counter (vm, node->node_index,
+ IP4_ERROR_REASS_INTERNAL_ERROR,
+ 1);
+ ip4_reass_drop_all (vm, node, rm, reass);
ip4_reass_free (rm, rt, reass);
goto next_packet;
break;
VLIB_NODE_FN (ip4_reass_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip4_reassembly_inline (vm, node, frame, false /* is_feature */ );
+ return ip4_reassembly_inline (vm, node, frame, false /* is_feature */ ,
+ false /* is_custom_app */ );
}
/* *INDENT-OFF* */
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip4_reassembly_inline (vm, node, frame, true /* is_feature */ );
+ return ip4_reassembly_inline (vm, node, frame, true /* is_feature */ ,
+ false /* is_custom_app */ );
}
/* *INDENT-OFF* */
VNET_FEATURE_INIT (ip4_reassembly_feature, static) = {
.arc_name = "ip4-unicast",
.node_name = "ip4-reassembly-feature",
- .runs_before = VNET_FEATURES ("ip4-lookup"),
+ .runs_before = VNET_FEATURES ("ip4-lookup",
+ "ipsec4-input-feature"),
.runs_after = 0,
};
/* *INDENT-ON* */
static void
ip4_reass_set_params (u32 timeout_ms, u32 max_reassemblies,
- u32 expire_walk_interval_ms)
+ u32 max_reassembly_length, u32 expire_walk_interval_ms)
{
ip4_reass_main.timeout_ms = timeout_ms;
ip4_reass_main.timeout = (f64) timeout_ms / (f64) MSEC_PER_SEC;
ip4_reass_main.max_reass_n = max_reassemblies;
+ ip4_reass_main.max_reass_len = max_reassembly_length;
ip4_reass_main.expire_walk_interval_ms = expire_walk_interval_ms;
}
vnet_api_error_t
ip4_reass_set (u32 timeout_ms, u32 max_reassemblies,
- u32 expire_walk_interval_ms)
+ u32 max_reassembly_length, u32 expire_walk_interval_ms)
{
u32 old_nbuckets = ip4_reass_get_nbuckets ();
- ip4_reass_set_params (timeout_ms, max_reassemblies,
+ ip4_reass_set_params (timeout_ms, max_reassemblies, max_reassembly_length,
expire_walk_interval_ms);
vlib_process_signal_event (ip4_reass_main.vlib_main,
ip4_reass_main.ip4_reass_expire_node_idx,
clib_bihash_free_16_8 (&ip4_reass_main.hash);
clib_memcpy_fast (&ip4_reass_main.hash, &new_hash,
sizeof (ip4_reass_main.hash));
+ clib_bihash_copied (&ip4_reass_main.hash, &new_hash);
}
}
return 0;
vnet_api_error_t
ip4_reass_get (u32 * timeout_ms, u32 * max_reassemblies,
- u32 * expire_walk_interval_ms)
+ u32 * max_reassembly_length, u32 * expire_walk_interval_ms)
{
*timeout_ms = ip4_reass_main.timeout_ms;
*max_reassemblies = ip4_reass_main.max_reass_n;
+ *max_reassembly_length = ip4_reass_main.max_reass_len;
*expire_walk_interval_ms = ip4_reass_main.expire_walk_interval_ms;
return 0;
}
ip4_reass_set_params (IP4_REASS_TIMEOUT_DEFAULT_MS,
IP4_REASS_MAX_REASSEMBLIES_DEFAULT,
+ IP4_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT,
IP4_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS);
nbuckets = ip4_reass_get_nbuckets ();
vec_foreach (i, pool_indexes_to_free)
{
ip4_reass_t *reass = pool_elt_at_index (rt->pool, i[0]);
- ip4_reass_on_timeout (vm, rm, reass);
+ ip4_reass_drop_all (vm, node, rm, reass);
ip4_reass_free (rm, rt, reass);
}
/* *INDENT-ON* */