From 21aa8f1022590b8b5caf819b4bbd485de0f1dfe5 Mon Sep 17 00:00:00 2001 From: Klement Sekera Date: Mon, 20 May 2019 12:27:33 +0200 Subject: [PATCH] reassembly: support more custom options for apps Change-Id: Ib9f98fba5a724480ca95f11a762002c53e08df70 Signed-off-by: Klement Sekera --- src/vnet/buffer.h | 1 + src/vnet/ip/ip4_reassembly.c | 71 ++++++++++++++++++++----- src/vnet/ip/ip6_reassembly.c | 123 +++++++++++++++++++++++++++++++------------ 3 files changed, 146 insertions(+), 49 deletions(-) diff --git a/src/vnet/buffer.h b/src/vnet/buffer.h index 2144ed3a9df..5114dc68a71 100644 --- a/src/vnet/buffer.h +++ b/src/vnet/buffer.h @@ -189,6 +189,7 @@ typedef struct struct { u32 next_index; /* index of next node - ignored if "feature" node */ + u32 error_next_index; /* index of next node if error - ignored if 'feature' node */ u16 estimated_mtu; /* estimated MTU calculated during reassembly */ u16 owner_thread_index; }; diff --git a/src/vnet/ip/ip4_reassembly.c b/src/vnet/ip/ip4_reassembly.c index 763229c5baa..73a83a9a632 100644 --- a/src/vnet/ip/ip4_reassembly.c +++ b/src/vnet/ip/ip4_reassembly.c @@ -132,7 +132,11 @@ typedef struct // trace operation counter u32 trace_op_counter; // next index - used by non-feature node - u8 next_index; + u32 next_index; + // error next index - used by non-feature node + u32 error_next_index; + // is_feature flag stored for non-inline code use + bool is_feature; // minimum fragment length for this reassembly - used to estimate MTU u16 min_fragment_length; // number of fragments in this reassembly @@ -332,8 +336,9 @@ ip4_reass_free (ip4_reass_main_t * rm, ip4_reass_per_thread_t * rt, } always_inline void -ip4_reass_on_timeout (vlib_main_t * vm, ip4_reass_main_t * rm, - ip4_reass_t * reass) +ip4_reass_drop_all (vlib_main_t * vm, vlib_node_runtime_t * node, + ip4_reass_main_t * rm, ip4_reass_t * reass, + bool is_feature) { u32 range_bi = reass->first_bi; vlib_buffer_t *range_b; @@ -360,14 +365,46 @@ ip4_reass_on_timeout (vlib_main_t * vm, ip4_reass_main_t * rm, } range_bi = range_vnb->ip.reass.next_range_bi; } - vlib_buffer_free (vm, to_free, vec_len (to_free)); - vec_free (to_free); + /* send to next_error_index */ + if (!(is_feature)) + { + u32 n_left_to_next, *to_next, next_index; + + next_index = reass->error_next_index; + u32 bi = ~0; + + while (vec_len (to_free) > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (vec_len (to_free) > 0 && n_left_to_next > 0) + { + bi = vec_pop (to_free); + + if (~0 != bi) + { + to_next[0] = bi; + to_next += 1; + n_left_to_next -= 1; + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi, next_index); + } + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + } + else + { + vlib_buffer_free (vm, to_free, vec_len (to_free)); + } } static ip4_reass_t * -ip4_reass_find_or_create (vlib_main_t * vm, ip4_reass_main_t * rm, - ip4_reass_per_thread_t * rt, ip4_reass_kv_t * kv, - u8 * do_handoff) +ip4_reass_find_or_create (vlib_main_t * vm, vlib_node_runtime_t * node, + ip4_reass_main_t * rm, ip4_reass_per_thread_t * rt, + ip4_reass_kv_t * kv, u8 * do_handoff, + bool is_feature) { ip4_reass_t *reass = NULL; f64 now = vlib_time_now (rm->vlib_main); @@ -384,7 +421,7 @@ ip4_reass_find_or_create (vlib_main_t * vm, ip4_reass_main_t * rm, if (now > reass->last_heard + rm->timeout) { - ip4_reass_on_timeout (vm, rm, reass); + ip4_reass_drop_all (vm, node, rm, reass, is_feature); ip4_reass_free (rm, rt, reass); reass = NULL; } @@ -410,6 +447,7 @@ ip4_reass_find_or_create (vlib_main_t * vm, ip4_reass_main_t * rm, reass->first_bi = ~0; reass->last_packet_octet = ~0; reass->data_len = 0; + reass->is_feature = is_feature; ++rt->reass_n; } @@ -1020,7 +1058,8 @@ ip4_reassembly_inline (vlib_main_t * vm, (u64) ip0->fragment_id << 32 | (u64) ip0->protocol << 48; ip4_reass_t *reass = - ip4_reass_find_or_create (vm, rm, rt, &kv, &do_handoff); + ip4_reass_find_or_create (vm, node, rm, rt, &kv, + &do_handoff, is_feature); if (PREDICT_FALSE (do_handoff)) { @@ -1046,7 +1085,8 @@ ip4_reassembly_inline (vlib_main_t * vm, vlib_node_increment_counter (vm, node->node_index, IP4_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG, 1); - ip4_reass_on_timeout (vm, rm, reass); + ip4_reass_drop_all (vm, node, rm, reass, + is_feature); ip4_reass_free (rm, rt, reass); goto next_packet; break; @@ -1054,15 +1094,18 @@ ip4_reassembly_inline (vlib_main_t * vm, vlib_node_increment_counter (vm, node->node_index, IP4_ERROR_REASS_NO_BUF, 1); - ip4_reass_on_timeout (vm, rm, reass); + ip4_reass_drop_all (vm, node, rm, reass, + is_feature); ip4_reass_free (rm, rt, reass); goto next_packet; break; case IP4_REASS_RC_INTERNAL_ERROR: + /* drop everything and start with a clean slate */ vlib_node_increment_counter (vm, node->node_index, IP4_ERROR_REASS_INTERNAL_ERROR, 1); - ip4_reass_on_timeout (vm, rm, reass); + ip4_reass_drop_all (vm, node, rm, reass, + is_feature); ip4_reass_free (rm, rt, reass); goto next_packet; break; @@ -1369,7 +1412,7 @@ ip4_reass_walk_expired (vlib_main_t * vm, vec_foreach (i, pool_indexes_to_free) { ip4_reass_t *reass = pool_elt_at_index (rt->pool, i[0]); - ip4_reass_on_timeout (vm, rm, reass); + ip4_reass_drop_all (vm, node, rm, reass, reass->is_feature); ip4_reass_free (rm, rt, reass); } /* *INDENT-ON* */ diff --git a/src/vnet/ip/ip6_reassembly.c b/src/vnet/ip/ip6_reassembly.c index cb1cd9afeb4..01f76aa1f0e 100644 --- a/src/vnet/ip/ip6_reassembly.c +++ b/src/vnet/ip/ip6_reassembly.c @@ -111,7 +111,11 @@ typedef struct // trace operation counter u32 trace_op_counter; // next index - used by non-feature node - u8 next_index; + u32 next_index; + // error next index - used by non-feature node + u32 error_next_index; + // is_feature flag stored for non-inline code use + bool is_feature; // minimum fragment length for this reassembly - used to estimate MTU u16 min_fragment_length; // number of fragments for this reassembly @@ -321,8 +325,9 @@ ip6_reass_free (ip6_reass_main_t * rm, ip6_reass_per_thread_t * rt, } always_inline void -ip6_reass_drop_all (vlib_main_t * vm, ip6_reass_main_t * rm, - ip6_reass_t * reass) +ip6_reass_drop_all (vlib_main_t * vm, vlib_node_runtime_t * node, + ip6_reass_main_t * rm, ip6_reass_t * reass, + bool is_feature) { u32 range_bi = reass->first_bi; vlib_buffer_t *range_b; @@ -349,50 +354,86 @@ ip6_reass_drop_all (vlib_main_t * vm, ip6_reass_main_t * rm, } range_bi = range_vnb->ip.reass.next_range_bi; } - vlib_buffer_free (vm, to_free, vec_len (to_free)); + /* send to next_error_index */ + if (!(is_feature)) + { + u32 n_left_to_next, *to_next, next_index; + + next_index = reass->error_next_index; + u32 bi = ~0; + + while (vec_len (to_free) > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (vec_len (to_free) > 0 && n_left_to_next > 0) + { + bi = vec_pop (to_free); + + if (~0 != bi) + { + to_next[0] = bi; + to_next += 1; + n_left_to_next -= 1; + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi, next_index); + } + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + } + else + { + vlib_buffer_free (vm, to_free, vec_len (to_free)); + } vec_free (to_free); } always_inline void ip6_reass_on_timeout (vlib_main_t * vm, vlib_node_runtime_t * node, ip6_reass_main_t * rm, ip6_reass_t * reass, - u32 * icmp_bi) + u32 * icmp_bi, bool is_feature) { if (~0 == reass->first_bi) { return; } - vlib_buffer_t *b = vlib_get_buffer (vm, reass->first_bi); - if (0 == vnet_buffer (b)->ip.reass.fragment_first) + if (is_feature) { - *icmp_bi = reass->first_bi; - if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED)) - { - ip6_reass_add_trace (vm, node, rm, reass, reass->first_bi, - ICMP_ERROR_RT_EXCEEDED, 0); - } - // fragment with offset zero received - send icmp message back - if (b->flags & VLIB_BUFFER_NEXT_PRESENT) + vlib_buffer_t *b = vlib_get_buffer (vm, reass->first_bi); + if (0 == vnet_buffer (b)->ip.reass.fragment_first) { - // separate first buffer from chain and steer it towards icmp node - b->flags &= ~VLIB_BUFFER_NEXT_PRESENT; - reass->first_bi = b->next_buffer; - } - else - { - reass->first_bi = vnet_buffer (b)->ip.reass.next_range_bi; + *icmp_bi = reass->first_bi; + if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED)) + { + ip6_reass_add_trace (vm, node, rm, reass, reass->first_bi, + ICMP_ERROR_RT_EXCEEDED, 0); + } + // fragment with offset zero received - send icmp message back + if (b->flags & VLIB_BUFFER_NEXT_PRESENT) + { + // separate first buffer from chain and steer it towards icmp node + b->flags &= ~VLIB_BUFFER_NEXT_PRESENT; + reass->first_bi = b->next_buffer; + } + else + { + reass->first_bi = vnet_buffer (b)->ip.reass.next_range_bi; + } + icmp6_error_set_vnet_buffer (b, ICMP6_time_exceeded, + ICMP6_time_exceeded_fragment_reassembly_time_exceeded, + 0); } - icmp6_error_set_vnet_buffer (b, ICMP6_time_exceeded, - ICMP6_time_exceeded_fragment_reassembly_time_exceeded, - 0); } - ip6_reass_drop_all (vm, rm, reass); + ip6_reass_drop_all (vm, node, rm, reass, is_feature); } always_inline ip6_reass_t * ip6_reass_find_or_create (vlib_main_t * vm, vlib_node_runtime_t * node, ip6_reass_main_t * rm, ip6_reass_per_thread_t * rt, - ip6_reass_kv_t * kv, u32 * icmp_bi, u8 * do_handoff) + ip6_reass_kv_t * kv, u32 * icmp_bi, u8 * do_handoff, + bool is_feature) { ip6_reass_t *reass = NULL; f64 now = vlib_time_now (rm->vlib_main); @@ -409,7 +450,7 @@ ip6_reass_find_or_create (vlib_main_t * vm, vlib_node_runtime_t * node, if (now > reass->last_heard + rm->timeout) { - ip6_reass_on_timeout (vm, node, rm, reass, icmp_bi); + ip6_reass_on_timeout (vm, node, rm, reass, icmp_bi, is_feature); ip6_reass_free (rm, rt, reass); reass = NULL; } @@ -435,6 +476,7 @@ ip6_reass_find_or_create (vlib_main_t * vm, vlib_node_runtime_t * node, reass->first_bi = ~0; reass->last_packet_octet = ~0; reass->data_len = 0; + reass->is_feature = is_feature; ++rt->reass_n; } @@ -718,6 +760,8 @@ ip6_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_buffer_t *fb = vlib_get_buffer (vm, *bi0); vnet_buffer_opaque_t *fvnb = vnet_buffer (fb); reass->next_index = fvnb->ip.reass.next_index; // store next_index before it's overwritten + reass->error_next_index = fvnb->ip.reass.error_next_index; // store error_next_index before it is overwritten + fvnb->ip.reass.ip6_frag_hdr_offset = (u8 *) frag_hdr - (u8 *) vlib_buffer_get_current (fb); ip6_header_t *fip = vlib_buffer_get_current (fb); @@ -793,7 +837,7 @@ ip6_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node, else { // overlapping fragment - not allowed by RFC 8200 - ip6_reass_drop_all (vm, rm, reass); + ip6_reass_drop_all (vm, node, rm, reass, is_feature); ip6_reass_free (rm, rt, reass); if (PREDICT_FALSE (fb->flags & VLIB_BUFFER_IS_TRACED)) { @@ -983,7 +1027,7 @@ ip6_reassembly_inline (vlib_main_t * vm, ip6_reass_t *reass = ip6_reass_find_or_create (vm, node, rm, rt, &kv, &icmp_bi, - &do_handoff); + &do_handoff, is_feature); if (PREDICT_FALSE (do_handoff)) { @@ -1007,22 +1051,23 @@ ip6_reassembly_inline (vlib_main_t * vm, vlib_node_increment_counter (vm, node->node_index, IP6_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG, 1); - ip6_reass_drop_all (vm, rm, reass); + ip6_reass_drop_all (vm, node, rm, reass, is_feature); ip6_reass_free (rm, rt, reass); goto next_packet; break; case IP6_REASS_RC_NO_BUF: vlib_node_increment_counter (vm, node->node_index, IP6_ERROR_REASS_NO_BUF, 1); - ip6_reass_drop_all (vm, rm, reass); + ip6_reass_drop_all (vm, node, rm, reass, is_feature); ip6_reass_free (rm, rt, reass); goto next_packet; break; case IP6_REASS_RC_INTERNAL_ERROR: + /* drop everything and start with a clean slate */ vlib_node_increment_counter (vm, node->node_index, IP6_ERROR_REASS_INTERNAL_ERROR, 1); - ip6_reass_drop_all (vm, rm, reass); + ip6_reass_drop_all (vm, node, rm, reass, is_feature); ip6_reass_free (rm, rt, reass); goto next_packet; break; @@ -1030,7 +1075,15 @@ ip6_reassembly_inline (vlib_main_t * vm, } else { - next0 = IP6_REASSEMBLY_NEXT_DROP; + if (is_feature) + { + next0 = IP6_REASSEMBLY_NEXT_DROP; + } + else + { + vnet_buffer_opaque_t *fvnb = vnet_buffer (b0); + next0 = fvnb->ip.reass.error_next_index; + } error0 = IP6_ERROR_REASS_LIMIT_REACHED; } @@ -1354,7 +1407,7 @@ ip6_reass_walk_expired (vlib_main_t * vm, b->flags &= ~VLIB_BUFFER_IS_TRACED; } } - ip6_reass_on_timeout (vm, node, rm, reass, &icmp_bi); + ip6_reass_on_timeout (vm, node, rm, reass, &icmp_bi, reass->is_feature); if (~0 != icmp_bi) { vec_add1 (vec_icmp_bi, icmp_bi); -- 2.16.6