typedef enum
{
RANGE_NEW,
+ RANGE_DISCARD,
RANGE_OVERLAP,
ICMP_ERROR_RT_EXCEEDED,
ICMP_ERROR_FL_TOO_BIG,
s = format (s, "\n%Unew %U", format_white_space, indent,
format_ip6_full_reass_range_trace, &t->trace_range);
break;
+ case RANGE_DISCARD:
+ s = format (s, "\n%Udiscard %U", format_white_space, indent,
+ format_ip6_full_reass_range_trace, &t->trace_range);
+ break;
case RANGE_OVERLAP:
s = format (s, "\n%Uoverlap %U", format_white_space, indent,
format_ip6_full_reass_range_trace, &t->trace_range);
ip6_full_reass_free_ctx (rt, reass);
}
+/* n_left_to_next, and to_next are taken as input params, as this function
+ * could be called from a graphnode, where its managing local copy of these
+ * variables, and ignoring those and still trying to enqueue the buffers
+ * with local variables would cause either buffer leak or corruption */
always_inline void
ip6_full_reass_drop_all (vlib_main_t *vm, vlib_node_runtime_t *node,
- ip6_full_reass_t *reass, u32 offending_bi)
+ ip6_full_reass_t *reass, u32 *n_left_to_next,
+ u32 **to_next)
{
u32 range_bi = reass->first_bi;
vlib_buffer_t *range_b;
vnet_buffer_opaque_t *range_vnb;
u32 *to_free = NULL;
+
while (~0 != range_bi)
{
range_b = vlib_get_buffer (vm, range_bi);
range_vnb = vnet_buffer (range_b);
- u32 bi = range_bi;
- while (~0 != bi)
+
+ if (~0 != range_bi)
{
- vec_add1 (to_free, bi);
- if (bi == offending_bi)
- {
- offending_bi = ~0;
- }
- vlib_buffer_t *b = vlib_get_buffer (vm, bi);
- if (b->flags & VLIB_BUFFER_NEXT_PRESENT)
- {
- bi = b->next_buffer;
- b->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
- }
- else
- {
- bi = ~0;
- }
+ vec_add1 (to_free, range_bi);
}
range_bi = range_vnb->ip.reass.next_range_bi;
}
- if (~0 != offending_bi)
- {
- vec_add1 (to_free, offending_bi);
- }
+
/* send to next_error_index */
- if (~0 != reass->error_next_index)
+ if (~0 != reass->error_next_index &&
+ reass->error_next_index < node->n_next_nodes)
{
- u32 n_left_to_next, *to_next, next_index;
+ u32 next_index;
next_index = reass->error_next_index;
u32 bi = ~0;
+ /* record number of packets sent to custom app */
+ vlib_node_increment_counter (vm, node->node_index,
+ IP6_ERROR_REASS_TO_CUSTOM_APP,
+ vec_len (to_free));
+
while (vec_len (to_free) > 0)
{
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+ vlib_get_next_frame (vm, node, next_index, *to_next,
+ (*n_left_to_next));
- while (vec_len (to_free) > 0 && n_left_to_next > 0)
+ while (vec_len (to_free) > 0 && (*n_left_to_next) > 0)
{
bi = vec_pop (to_free);
if (~0 != bi)
{
- to_next[0] = bi;
- to_next += 1;
- n_left_to_next -= 1;
+ vlib_buffer_t *b = vlib_get_buffer (vm, bi);
+ if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ ip6_full_reass_add_trace (vm, node, reass, bi, NULL,
+ RANGE_DISCARD, ~0);
+ }
+ *to_next[0] = bi;
+ (*to_next) += 1;
+ (*n_left_to_next) -= 1;
}
}
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ vlib_put_next_frame (vm, node, next_index, (*n_left_to_next));
}
}
else
}
always_inline void
-ip6_full_reass_on_timeout (vlib_main_t * vm, vlib_node_runtime_t * node,
- ip6_full_reass_t * reass, u32 * icmp_bi)
+sanitize_reass_buffers_add_missing (vlib_main_t *vm, ip6_full_reass_t *reass,
+ u32 *bi0)
+{
+ u32 range_bi = reass->first_bi;
+ vlib_buffer_t *range_b;
+ vnet_buffer_opaque_t *range_vnb;
+
+ while (~0 != range_bi)
+ {
+ range_b = vlib_get_buffer (vm, range_bi);
+ range_vnb = vnet_buffer (range_b);
+ u32 bi = range_bi;
+ if (~0 != bi)
+ {
+ if (bi == *bi0)
+ *bi0 = ~0;
+ if (range_b->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ u32 _bi = bi;
+ vlib_buffer_t *_b = vlib_get_buffer (vm, _bi);
+ while (_b->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ if (_b->next_buffer != range_vnb->ip.reass.next_range_bi)
+ {
+ _bi = _b->next_buffer;
+ _b = vlib_get_buffer (vm, _bi);
+ }
+ else
+ {
+ _b->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
+ break;
+ }
+ }
+ }
+ range_bi = range_vnb->ip.reass.next_range_bi;
+ }
+ }
+ if (*bi0 != ~0)
+ {
+ vlib_buffer_t *fb = vlib_get_buffer (vm, *bi0);
+ vnet_buffer_opaque_t *fvnb = vnet_buffer (fb);
+ if (~0 != reass->first_bi)
+ {
+ fvnb->ip.reass.next_range_bi = reass->first_bi;
+ reass->first_bi = *bi0;
+ }
+ else
+ {
+ reass->first_bi = *bi0;
+ fvnb->ip.reass.next_range_bi = ~0;
+ }
+ *bi0 = ~0;
+ }
+}
+
+always_inline void
+ip6_full_reass_on_timeout (vlib_main_t *vm, vlib_node_runtime_t *node,
+ ip6_full_reass_t *reass, u32 *icmp_bi,
+ u32 *n_left_to_next, u32 **to_next)
{
if (~0 == reass->first_bi)
{
0);
}
}
- ip6_full_reass_drop_all (vm, node, reass, ~0);
+ ip6_full_reass_drop_all (vm, node, reass, n_left_to_next, to_next);
}
always_inline ip6_full_reass_t *
ip6_full_reass_main_t *rm,
ip6_full_reass_per_thread_t *rt,
ip6_full_reass_kv_t *kv, u32 *icmp_bi,
- u8 *do_handoff, int skip_bihash)
+ u8 *do_handoff, int skip_bihash,
+ u32 *n_left_to_next, u32 **to_next)
{
ip6_full_reass_t *reass;
f64 now;
if (now > reass->last_heard + rm->timeout)
{
- ip6_full_reass_on_timeout (vm, node, reass, icmp_bi);
+ vlib_node_increment_counter (vm, node->node_index,
+ IP6_ERROR_REASS_TIMEOUT, 1);
+ ip6_full_reass_on_timeout (vm, node, reass, icmp_bi, n_left_to_next,
+ to_next);
ip6_full_reass_free (rm, rt, reass);
reass = NULL;
}
vlib_buffer_t *last_b = NULL;
u32 sub_chain_bi = reass->first_bi;
u32 total_length = 0;
- u32 buf_cnt = 0;
- u32 dropped_cnt = 0;
u32 *vec_drop_compress = NULL;
ip6_full_reass_rc_t rv = IP6_FULL_REASS_RC_OK;
do
vlib_buffer_length_in_chain (vm, tmp) - trim_front - trim_end;
while (1)
{
- ++buf_cnt;
if (trim_front)
{
if (trim_front > tmp->current_length)
goto free_buffers_and_return;
}
vec_add1 (vec_drop_compress, tmp_bi);
- ++dropped_cnt;
}
if (tmp->flags & VLIB_BUFFER_NEXT_PRESENT)
{
*next0 = reass->next_index;
}
vnet_buffer (first_b)->ip.reass.estimated_mtu = reass->min_fragment_length;
+ /* Keep track of number of successfully reassembled packets and number of
+ * fragments reassembled */
+ vlib_node_increment_counter (vm, node->node_index, IP6_ERROR_REASS_SUCCESS,
+ 1);
+
+ vlib_node_increment_counter (vm, node->node_index,
+ IP6_ERROR_REASS_FRAGMENTS_REASSEMBLED,
+ reass->fragments_n);
+
ip6_full_reass_free (rm, rt, reass);
reass = NULL;
free_buffers_and_return:
}
always_inline bool
-ip6_full_reass_verify_fragment_multiple_8 (vlib_main_t * vm,
- vlib_buffer_t * b,
- ip6_frag_hdr_t * frag_hdr)
+ip6_full_reass_verify_fragment_multiple_8 (vlib_main_t *vm,
+ vlib_node_runtime_t *node,
+ vlib_buffer_t *b,
+ ip6_frag_hdr_t *frag_hdr)
{
vnet_buffer_opaque_t *vnb = vnet_buffer (b);
ip6_header_t *ip = vlib_buffer_get_current (b);
icmp6_error_set_vnet_buffer (b, ICMP6_parameter_problem,
ICMP6_parameter_problem_erroneous_header_field,
(u8 *) & ip->payload_length - (u8 *) ip);
+ b->error = node->errors[IP6_ERROR_REASS_INVALID_FRAG_SIZE];
return false;
}
return true;
}
always_inline bool
-ip6_full_reass_verify_packet_size_lt_64k (vlib_main_t * vm,
- vlib_buffer_t * b,
- ip6_frag_hdr_t * frag_hdr)
+ip6_full_reass_verify_packet_size_lt_64k (vlib_main_t *vm,
+ vlib_node_runtime_t *node,
+ vlib_buffer_t *b,
+ ip6_frag_hdr_t *frag_hdr)
{
vnet_buffer_opaque_t *vnb = vnet_buffer (b);
u32 fragment_first = ip6_frag_hdr_offset_bytes (frag_hdr);
ICMP6_parameter_problem_erroneous_header_field,
(u8 *) & frag_hdr->fragment_offset_and_more
- (u8 *) ip0);
+ b->error = node->errors[IP6_ERROR_REASS_INVALID_FRAG_SIZE];
return false;
}
return true;
b0 = vlib_get_buffer (vm, bi0);
ip6_header_t *ip0 = vlib_buffer_get_current (b0);
- ip6_frag_hdr_t *frag_hdr;
+ ip6_frag_hdr_t *frag_hdr = NULL;
ip6_ext_hdr_chain_t hdr_chain;
+ vnet_buffer_opaque_t *fvnb = vnet_buffer (b0);
+
int res = ip6_ext_header_walk (
b0, ip0, IP_PROTOCOL_IPV6_FRAGMENTATION, &hdr_chain);
if (res < 0 ||
hdr_chain.eh[res].protocol != IP_PROTOCOL_IPV6_FRAGMENTATION)
{
+ vlib_node_increment_counter (vm, node->node_index,
+ IP6_ERROR_REASS_NO_FRAG_HDR, 1);
// this is a mangled packet - no fragmentation
- next0 = IP6_FULL_REASSEMBLY_NEXT_DROP;
+ next0 = is_custom_app ? fvnb->ip.reass.error_next_index :
+ IP6_FULL_REASSEMBLY_NEXT_DROP;
ip6_full_reass_add_trace (vm, node, NULL, bi0, NULL, PASSTHROUGH,
~0);
goto skip_reass;
next0 = IP6_FULL_REASSEMBLY_NEXT_DROP;
goto skip_reass;
}
+
+ /* Keep track of received fragments */
+ vlib_node_increment_counter (vm, node->node_index,
+ IP6_ERROR_REASS_FRAGMENTS_RCVD, 1);
frag_hdr =
ip6_ext_next_header_offset (ip0, hdr_chain.eh[res].offset);
vnet_buffer (b0)->ip.reass.ip6_frag_hdr_offset =
if (!ip6_full_reass_verify_upper_layer_present (node, b0,
&hdr_chain))
{
- next0 = IP6_FULL_REASSEMBLY_NEXT_ICMP_ERROR;
+ next0 = is_custom_app ? fvnb->ip.reass.error_next_index :
+ IP6_FULL_REASSEMBLY_NEXT_ICMP_ERROR;
goto skip_reass;
}
}
- if (!ip6_full_reass_verify_fragment_multiple_8 (vm, b0, frag_hdr) ||
- !ip6_full_reass_verify_packet_size_lt_64k (vm, b0, frag_hdr))
+
+ if (!ip6_full_reass_verify_fragment_multiple_8 (vm, node, b0,
+ frag_hdr) ||
+ !ip6_full_reass_verify_packet_size_lt_64k (vm, node, b0,
+ frag_hdr))
{
- next0 = IP6_FULL_REASSEMBLY_NEXT_ICMP_ERROR;
+ next0 = is_custom_app ? fvnb->ip.reass.error_next_index :
+ IP6_FULL_REASSEMBLY_NEXT_ICMP_ERROR;
goto skip_reass;
}
vnet_buffer (b0)->sw_if_index[VLIB_RX]))
<< 32 |
(u64) frag_hdr->identification;
- kv.k.as_u64[5] = ip0->protocol;
+ /* RFC 8200: The Next Header values in the Fragment headers of
+ * different fragments of the same original packet may differ.
+ * Only the value from the Offset zero fragment packet is used
+ * for reassembly.
+ *
+ * Also, IPv6 Header doesnt contain the protocol value unlike
+ * IPv4.*/
+ kv.k.as_u64[5] = 0;
}
ip6_full_reass_t *reass = ip6_full_reass_find_or_create (
- vm, node, rm, rt, &kv, &icmp_bi, &do_handoff, skip_bihash);
+ vm, node, rm, rt, &kv, &icmp_bi, &do_handoff, skip_bihash,
+ &n_left_to_next, &to_next);
if (reass)
{
case IP6_FULL_REASS_RC_NO_BUF:
counter = IP6_ERROR_REASS_NO_BUF;
break;
- case IP6_FULL_REASS_RC_INTERNAL_ERROR:
- counter = IP6_ERROR_REASS_INTERNAL_ERROR;
- break;
case IP6_FULL_REASS_RC_INVALID_FRAG_LEN:
counter = IP6_ERROR_REASS_INVALID_FRAG_LEN;
break;
case IP6_FULL_REASS_RC_OVERLAP:
counter = IP6_ERROR_REASS_OVERLAPPING_FRAGMENT;
break;
+ case IP6_FULL_REASS_RC_INTERNAL_ERROR:
+ counter = IP6_ERROR_REASS_INTERNAL_ERROR;
+ /* Sanitization is needed in internal error cases only, as
+ * the incoming packet is already dropped in other cases,
+ * also adding bi0 back to the reassembly list, fixes the
+ * leaking of buffers during internal errors.
+ *
+ * Also it doesnt make sense to send these buffers custom
+ * app, these fragments are with internal errors */
+ sanitize_reass_buffers_add_missing (vm, reass, &bi0);
+ reass->error_next_index = ~0;
+ break;
}
if (~0 != counter)
{
vlib_node_increment_counter (vm, node->node_index, counter,
1);
- ip6_full_reass_drop_all (vm, node, reass, bi0);
+ ip6_full_reass_drop_all (vm, node, reass, &n_left_to_next,
+ &to_next);
ip6_full_reass_free (rm, rt, reass);
goto next_packet;
break;
}
else
{
- vnet_buffer_opaque_t *fvnb = vnet_buffer (b0);
next0 = fvnb->ip.reass.error_next_index;
}
error0 = IP6_ERROR_REASS_LIMIT_REACHED;
{
vnet_feature_next (&next0, b0);
}
+
+ /* Increment the counter to-custom-app also as this fragment is
+ * also going to application */
+ if (is_custom_app)
+ {
+ vlib_node_increment_counter (
+ vm, node->node_index, IP6_ERROR_REASS_TO_CUSTOM_APP, 1);
+ }
+
vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
n_left_to_next, bi0, next0);
}
return frame->n_vectors;
}
-static char *ip6_full_reassembly_error_strings[] = {
-#define _(sym, string) string,
- foreach_ip6_error
-#undef _
-};
-
VLIB_NODE_FN (ip6_full_reass_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
.name = "ip6-full-reassembly",
.vector_size = sizeof (u32),
.format_trace = format_ip6_full_reass_trace,
- .n_errors = ARRAY_LEN (ip6_full_reassembly_error_strings),
- .error_strings = ip6_full_reassembly_error_strings,
+ .n_errors = IP6_N_ERROR,
+ .error_counters = ip6_error_counters,
.n_next_nodes = IP6_FULL_REASSEMBLY_N_NEXT,
.next_nodes =
{
.name = "ip6-local-full-reassembly",
.vector_size = sizeof (u32),
.format_trace = format_ip6_full_reass_trace,
- .n_errors = ARRAY_LEN (ip6_full_reassembly_error_strings),
- .error_strings = ip6_full_reassembly_error_strings,
+ .n_errors = IP6_N_ERROR,
+ .error_counters = ip6_error_counters,
.n_next_nodes = IP6_FULL_REASSEMBLY_N_NEXT,
.next_nodes =
{
.name = "ip6-full-reassembly-feature",
.vector_size = sizeof (u32),
.format_trace = format_ip6_full_reass_trace,
- .n_errors = ARRAY_LEN (ip6_full_reassembly_error_strings),
- .error_strings = ip6_full_reassembly_error_strings,
+ .n_errors = IP6_N_ERROR,
+ .error_counters = ip6_error_counters,
.n_next_nodes = IP6_FULL_REASSEMBLY_N_NEXT,
.next_nodes =
{
.name = "ip6-full-reassembly-custom",
.vector_size = sizeof (u32),
.format_trace = format_ip6_full_reass_trace,
- .n_errors = ARRAY_LEN (ip6_full_reassembly_error_strings),
- .error_strings = ip6_full_reassembly_error_strings,
+ .n_errors = IP6_N_ERROR,
+ .error_counters = ip6_error_counters,
.n_next_nodes = IP6_FULL_REASSEMBLY_N_NEXT,
.next_nodes =
{
int index;
const uword nthreads = vlib_num_workers () + 1;
u32 *vec_icmp_bi = NULL;
+ u32 n_left_to_next, *to_next;
+
for (thread_index = 0; thread_index < nthreads; ++thread_index)
{
ip6_full_reass_per_thread_t *rt =
&rm->per_thread_data[thread_index];
+ u32 reass_timeout_cnt = 0;
clib_spinlock_lock (&rt->lock);
vec_reset_length (pool_indexes_to_free);
{
ip6_full_reass_t *reass = pool_elt_at_index (rt->pool, i[0]);
u32 icmp_bi = ~0;
- ip6_full_reass_on_timeout (vm, node, reass, &icmp_bi);
+
+ reass_timeout_cnt += reass->fragments_n;
+ ip6_full_reass_on_timeout (vm, node, reass, &icmp_bi,
+ &n_left_to_next, &to_next);
if (~0 != icmp_bi)
vec_add1 (vec_icmp_bi, icmp_bi);
}
clib_spinlock_unlock (&rt->lock);
+ if (reass_timeout_cnt)
+ vlib_node_increment_counter (vm, node->node_index,
+ IP6_ERROR_REASS_TIMEOUT,
+ reass_timeout_cnt);
}
while (vec_len (vec_icmp_bi) > 0)
vlib_buffer_t *b = vlib_get_buffer (vm, bi);
if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED))
trace_frame = 1;
- b->error = node->errors[IP6_ERROR_REASS_TIMEOUT];
to_next[0] = bi;
++f->n_vectors;
to_next += 1;
}
VLIB_REGISTER_NODE (ip6_full_reass_expire_node) = {
- .function = ip6_full_reass_walk_expired,
- .format_trace = format_ip6_full_reass_trace,
- .type = VLIB_NODE_TYPE_PROCESS,
- .name = "ip6-full-reassembly-expire-walk",
-
- .n_errors = ARRAY_LEN (ip6_full_reassembly_error_strings),
- .error_strings = ip6_full_reassembly_error_strings,
+ .function = ip6_full_reass_walk_expired,
+ .format_trace = format_ip6_full_reass_trace,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "ip6-full-reassembly-expire-walk",
+ .n_errors = IP6_N_ERROR,
+ .error_counters = ip6_error_counters,
};
static u8 *