X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvnet%2Fip%2Fip4_reassembly.c;h=682cad965afcd672876524345e6c90712502126c;hb=33a58171e5995d9e649b414bfc77f2aab26e4c58;hp=86e5e3936cb7eed42045b9560a3a7de60fffe821;hpb=470a3704e38d710018f1aa55c8bf857e55fbc069;p=vpp.git diff --git a/src/vnet/ip/ip4_reassembly.c b/src/vnet/ip/ip4_reassembly.c index 86e5e3936cb..682cad965af 100644 --- a/src/vnet/ip/ip4_reassembly.c +++ b/src/vnet/ip/ip4_reassembly.c @@ -25,11 +25,13 @@ #include #include #include +#include #define MSEC_PER_SEC 1000 #define IP4_REASS_TIMEOUT_DEFAULT_MS 100 #define IP4_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS 10000 // 10 seconds default #define IP4_REASS_MAX_REASSEMBLIES_DEFAULT 1024 +#define IP4_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT 3 #define IP4_REASS_HT_LOAD_FACTOR (0.75) #define IP4_REASS_DEBUG_BUFFERS 0 @@ -57,8 +59,10 @@ typedef enum { IP4_REASS_RC_OK, + IP4_REASS_RC_TOO_MANY_FRAGMENTS, IP4_REASS_RC_INTERNAL_ERROR, IP4_REASS_RC_NO_BUF, + IP4_REASS_RC_HANDOFF, } ip4_reass_rc_t; typedef struct @@ -83,7 +87,7 @@ typedef union struct { u32 reass_index; - u32 thread_index; + u32 memory_owner_thread_index; }; u64 as_u64; } ip4_reass_val_t; @@ -130,14 +134,23 @@ typedef struct // trace operation counter u32 trace_op_counter; // next index - used by non-feature node - u8 next_index; + u32 next_index; + // error next index - used by custom apps (~0 if not used) + u32 error_next_index; // minimum fragment length for this reassembly - used to estimate MTU u16 min_fragment_length; - + // number of fragments in this reassembly + u32 fragments_n; + // thread owning memory for this context (whose pool contains this ctx) + u32 memory_owner_thread_index; + // thread which received fragment with offset 0 and which sends out the + // completed reassembly + u32 sendout_thread_index; } ip4_reass_t; typedef struct { + // pool of reassembly contexts ip4_reass_t *pool; u32 reass_n; u32 id_counter; @@ -150,6 +163,9 @@ typedef struct u32 timeout_ms; f64 timeout; u32 expire_walk_interval_ms; + // maximum number of fragments in one reassembly + u32 max_reass_len; + // maximum number of reassemblies u32 max_reass_n; // IPv4 runtime @@ -159,7 +175,6 @@ typedef struct // convenience vlib_main_t *vlib_main; - vnet_main_t *vnet_main; // node index of ip4-drop node u32 ip4_drop_idx; @@ -168,10 +183,13 @@ typedef struct /** Worker handoff */ u32 fq_index; u32 fq_feature_index; - } ip4_reass_main_t; +extern ip4_reass_main_t ip4_reass_main; + +#ifndef CLIB_MARCH_VARIANT ip4_reass_main_t ip4_reass_main; +#endif /* CLIB_MARCH_VARIANT */ typedef enum { @@ -188,6 +206,7 @@ typedef enum RANGE_DISCARD, RANGE_OVERLAP, FINALIZE, + HANDOFF, } ip4_reass_trace_operation_e; typedef struct @@ -207,11 +226,16 @@ typedef struct ip4_reass_range_trace_t trace_range; u32 size_diff; u32 op_id; + u32 thread_id; + u32 thread_id_to; u32 fragment_first; u32 fragment_last; u32 total_data_len; } ip4_reass_trace_t; +extern vlib_node_registration_t ip4_reass_node; +extern vlib_node_registration_t ip4_reass_node_feature; + static void ip4_reass_trace_details (vlib_main_t * vm, u32 bi, ip4_reass_range_trace_t * trace) @@ -235,17 +259,23 @@ format_ip4_reass_range_trace (u8 * s, va_list * args) return s; } -u8 * +static u8 * format_ip4_reass_trace (u8 * s, va_list * args) { CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); ip4_reass_trace_t *t = va_arg (*args, ip4_reass_trace_t *); - s = format (s, "reass id: %u, op id: %u ", t->reass_id, t->op_id); - u32 indent = format_get_indent (s); - s = format (s, "first bi: %u, data len: %u, ip/fragment[%u, %u]", - t->trace_range.first_bi, t->total_data_len, t->fragment_first, - t->fragment_last); + u32 indent = 0; + if (~0 != t->reass_id) + { + s = format (s, "reass id: %u, op id: %u, ", t->reass_id, t->op_id); + indent = format_get_indent (s); + s = + format (s, + "first bi: %u, data len: %u, ip/fragment[%u, %u]", + t->trace_range.first_bi, t->total_data_len, t->fragment_first, + t->fragment_last); + } switch (t->action) { case RANGE_SHRINK: @@ -268,34 +298,36 @@ format_ip4_reass_trace (u8 * s, va_list * args) case FINALIZE: s = format (s, "\n%Ufinalize reassembly", format_white_space, indent); break; + case HANDOFF: + s = + format (s, "handoff from thread #%u to thread #%u", t->thread_id, + t->thread_id_to); + break; } return s; } static void ip4_reass_add_trace (vlib_main_t * vm, vlib_node_runtime_t * node, - ip4_reass_main_t * rm, ip4_reass_t * reass, u32 bi, - ip4_reass_trace_operation_e action, u32 size_diff) + ip4_reass_main_t * rm, u32 reass_id, u32 op_id, + u32 bi, u32 first_bi, u32 data_len, + ip4_reass_trace_operation_e action, u32 size_diff, + u32 thread_id_to) { vlib_buffer_t *b = vlib_get_buffer (vm, bi); vnet_buffer_opaque_t *vnb = vnet_buffer (b); - if (pool_is_free_index (vm->trace_main.trace_buffer_pool, b->trace_index)) - { - // this buffer's trace is gone - b->flags &= ~VLIB_BUFFER_IS_TRACED; - return; - } ip4_reass_trace_t *t = vlib_add_trace (vm, node, b, sizeof (t[0])); - t->reass_id = reass->id; + t->reass_id = reass_id; t->action = action; ip4_reass_trace_details (vm, bi, &t->trace_range); t->size_diff = size_diff; - t->op_id = reass->trace_op_counter; - ++reass->trace_op_counter; + t->op_id = op_id; + t->thread_id = vm->thread_index; + t->thread_id_to = thread_id_to; t->fragment_first = vnb->ip.reass.fragment_first; t->fragment_last = vnb->ip.reass.fragment_last; - t->trace_range.first_bi = reass->first_bi; - t->total_data_len = reass->data_len; + t->trace_range.first_bi = first_bi; + t->total_data_len = data_len; #if 0 static u8 *s = NULL; s = format (s, "%U", format_ip4_reass_trace, NULL, NULL, t); @@ -305,22 +337,27 @@ ip4_reass_add_trace (vlib_main_t * vm, vlib_node_runtime_t * node, #endif } +always_inline void +ip4_reass_free_ctx (ip4_reass_per_thread_t * rt, ip4_reass_t * reass) +{ + pool_put (rt->pool, reass); + --rt->reass_n; +} always_inline void -ip4_reass_free (ip4_reass_main_t * rm, ip4_reass_per_thread_t * rt, - ip4_reass_t * reass) +ip4_reass_free (vlib_main_t * vm, ip4_reass_main_t * rm, + ip4_reass_per_thread_t * rt, ip4_reass_t * reass) { clib_bihash_kv_16_8_t kv; kv.key[0] = reass->key.as_u64[0]; kv.key[1] = reass->key.as_u64[1]; clib_bihash_add_del_16_8 (&rm->hash, &kv, 0); - pool_put (rt->pool, reass); - --rt->reass_n; + return ip4_reass_free_ctx (rt, reass); } always_inline void -ip4_reass_on_timeout (vlib_main_t * vm, ip4_reass_main_t * rm, - ip4_reass_t * reass) +ip4_reass_drop_all (vlib_main_t * vm, vlib_node_runtime_t * node, + ip4_reass_main_t * rm, ip4_reass_t * reass) { u32 range_bi = reass->first_bi; vlib_buffer_t *range_b; @@ -347,32 +384,67 @@ ip4_reass_on_timeout (vlib_main_t * vm, ip4_reass_main_t * rm, } range_bi = range_vnb->ip.reass.next_range_bi; } - vlib_buffer_free (vm, to_free, vec_len (to_free)); - vec_free (to_free); + /* send to next_error_index */ + if (~0 != reass->error_next_index) + { + u32 n_left_to_next, *to_next, next_index; + + next_index = reass->error_next_index; + u32 bi = ~0; + + while (vec_len (to_free) > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (vec_len (to_free) > 0 && n_left_to_next > 0) + { + bi = vec_pop (to_free); + + if (~0 != bi) + { + to_next[0] = bi; + to_next += 1; + n_left_to_next -= 1; + } + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + } + else + { + vlib_buffer_free (vm, to_free, vec_len (to_free)); + } } -ip4_reass_t * -ip4_reass_find_or_create (vlib_main_t * vm, ip4_reass_main_t * rm, - ip4_reass_per_thread_t * rt, ip4_reass_kv_t * kv, - u8 * do_handoff) +static ip4_reass_t * +ip4_reass_find_or_create (vlib_main_t * vm, vlib_node_runtime_t * node, + ip4_reass_main_t * rm, ip4_reass_per_thread_t * rt, + ip4_reass_kv_t * kv, u8 * do_handoff) { - ip4_reass_t *reass = NULL; - f64 now = vlib_time_now (rm->vlib_main); + ip4_reass_t *reass; + f64 now; +again: + + reass = NULL; + now = vlib_time_now (vm); if (!clib_bihash_search_16_8 (&rm->hash, (clib_bihash_kv_16_8_t *) kv, (clib_bihash_kv_16_8_t *) kv)) { - if (vm->thread_index != kv->v.thread_index) + reass = + pool_elt_at_index (rm->per_thread_data + [kv->v.memory_owner_thread_index].pool, + kv->v.reass_index); + if (vm->thread_index != reass->memory_owner_thread_index) { *do_handoff = 1; - return NULL; + return reass; } - reass = pool_elt_at_index (rt->pool, kv->v.reass_index); if (now > reass->last_heard + rm->timeout) { - ip4_reass_on_timeout (vm, rm, reass); - ip4_reass_free (rm, rt, reass); + ip4_reass_drop_all (vm, node, rm, reass); + ip4_reass_free (vm, rm, rt, reass); reass = NULL; } } @@ -393,23 +465,31 @@ ip4_reass_find_or_create (vlib_main_t * vm, ip4_reass_main_t * rm, pool_get (rt->pool, reass); clib_memset (reass, 0, sizeof (*reass)); reass->id = ((u64) vm->thread_index * 1000000000) + rt->id_counter; + reass->memory_owner_thread_index = vm->thread_index; ++rt->id_counter; reass->first_bi = ~0; reass->last_packet_octet = ~0; reass->data_len = 0; + reass->next_index = ~0; + reass->error_next_index = ~0; ++rt->reass_n; } reass->key.as_u64[0] = ((clib_bihash_kv_16_8_t *) kv)->key[0]; reass->key.as_u64[1] = ((clib_bihash_kv_16_8_t *) kv)->key[1]; kv->v.reass_index = (reass - rt->pool); - kv->v.thread_index = vm->thread_index; + kv->v.memory_owner_thread_index = vm->thread_index; reass->last_heard = now; - if (clib_bihash_add_del_16_8 (&rm->hash, (clib_bihash_kv_16_8_t *) kv, 1)) + int rv = + clib_bihash_add_del_16_8 (&rm->hash, (clib_bihash_kv_16_8_t *) kv, 2); + if (rv) { - ip4_reass_free (rm, rt, reass); + ip4_reass_free_ctx (rt, reass); reass = NULL; + // if other worker created a context already work with the other copy + if (-2 == rv) + goto again; } return reass; @@ -419,7 +499,7 @@ always_inline ip4_reass_rc_t ip4_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node, ip4_reass_main_t * rm, ip4_reass_per_thread_t * rt, ip4_reass_t * reass, u32 * bi0, u32 * next0, u32 * error0, - bool is_feature) + bool is_custom_app) { vlib_buffer_t *first_b = vlib_get_buffer (vm, reass->first_bi); vlib_buffer_t *last_b = NULL; @@ -468,7 +548,7 @@ ip4_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node, if (trim_front > tmp->current_length) { /* drop whole buffer */ - vlib_buffer_free_one (vm, tmp_bi); + u32 to_be_freed_bi = tmp_bi; trim_front -= tmp->current_length; if (!(tmp->flags & VLIB_BUFFER_NEXT_PRESENT)) { @@ -476,7 +556,9 @@ ip4_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node, } tmp->flags &= ~VLIB_BUFFER_NEXT_PRESENT; tmp_bi = tmp->next_buffer; + tmp->next_buffer = 0; tmp = vlib_get_buffer (vm, tmp_bi); + vlib_buffer_free_one (vm, to_be_freed_bi); continue; } else @@ -507,23 +589,37 @@ ip4_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node, } } total_length += tmp->current_length; + if (tmp->flags & VLIB_BUFFER_NEXT_PRESENT) + { + tmp_bi = tmp->next_buffer; + tmp = vlib_get_buffer (vm, tmp->next_buffer); + } + else + { + break; + } } else { - vlib_buffer_free_one (vm, tmp_bi); + u32 to_be_freed_bi = tmp_bi; if (reass->first_bi == tmp_bi) { return IP4_REASS_RC_INTERNAL_ERROR; } - } - if (tmp->flags & VLIB_BUFFER_NEXT_PRESENT) - { - tmp_bi = tmp->next_buffer; - tmp = vlib_get_buffer (vm, tmp->next_buffer); - } - else - { - break; + if (tmp->flags & VLIB_BUFFER_NEXT_PRESENT) + { + tmp->flags &= ~VLIB_BUFFER_NEXT_PRESENT; + tmp_bi = tmp->next_buffer; + tmp->next_buffer = 0; + tmp = vlib_get_buffer (vm, tmp_bi); + vlib_buffer_free_one (vm, to_be_freed_bi); + } + else + { + tmp->next_buffer = 0; + vlib_buffer_free_one (vm, to_be_freed_bi); + break; + } } } sub_chain_bi = @@ -537,6 +633,7 @@ ip4_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node, return IP4_REASS_RC_INTERNAL_ERROR; } last_b->flags &= ~VLIB_BUFFER_NEXT_PRESENT; + if (total_length < first_b->current_length) { return IP4_REASS_RC_INTERNAL_ERROR; @@ -552,10 +649,14 @@ ip4_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node, { return IP4_REASS_RC_NO_BUF; } - + // reset to reconstruct the mbuf linking + first_b->flags &= ~VLIB_BUFFER_EXT_HDR_VALID; if (PREDICT_FALSE (first_b->flags & VLIB_BUFFER_IS_TRACED)) { - ip4_reass_add_trace (vm, node, rm, reass, reass->first_bi, FINALIZE, 0); + ip4_reass_add_trace (vm, node, rm, reass->id, reass->trace_op_counter, + reass->first_bi, reass->first_bi, reass->data_len, + FINALIZE, 0, ~0); + ++reass->trace_op_counter; #if 0 // following code does a hexdump of packet fragments to stdout ... do @@ -584,7 +685,7 @@ ip4_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node, #endif } *bi0 = reass->first_bi; - if (is_feature) + if (!is_custom_app) { *next0 = IP4_REASSEMBLY_NEXT_INPUT; } @@ -594,7 +695,7 @@ ip4_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node, } vnet_buffer (first_b)->ip.reass.estimated_mtu = reass->min_fragment_length; *error0 = IP4_ERROR_NONE; - ip4_reass_free (rm, rt, reass); + ip4_reass_free (vm, rm, rt, reass); reass = NULL; return IP4_REASS_RC_OK; } @@ -665,20 +766,27 @@ ip4_reass_remove_range_from_chain (vlib_main_t * vm, reass->data_len -= ip4_reass_buffer_get_data_len (discard_b); while (1) { - vlib_buffer_free_one (vm, discard_bi); + u32 to_be_freed_bi = discard_bi; if (PREDICT_FALSE (discard_b->flags & VLIB_BUFFER_IS_TRACED)) { - ip4_reass_add_trace (vm, node, rm, reass, discard_bi, RANGE_DISCARD, - 0); + ip4_reass_add_trace (vm, node, rm, reass->id, + reass->trace_op_counter, discard_bi, + reass->first_bi, reass->data_len, + RANGE_DISCARD, 0, ~0); + ++reass->trace_op_counter; } if (discard_b->flags & VLIB_BUFFER_NEXT_PRESENT) { discard_b->flags &= ~VLIB_BUFFER_NEXT_PRESENT; discard_bi = discard_b->next_buffer; + discard_b->next_buffer = 0; discard_b = vlib_get_buffer (vm, discard_bi); + vlib_buffer_free_one (vm, to_be_freed_bi); } else { + discard_b->next_buffer = 0; + vlib_buffer_free_one (vm, to_be_freed_bi); break; } } @@ -689,14 +797,19 @@ always_inline ip4_reass_rc_t ip4_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node, ip4_reass_main_t * rm, ip4_reass_per_thread_t * rt, ip4_reass_t * reass, u32 * bi0, u32 * next0, u32 * error0, - bool is_feature) + bool is_custom_app, u32 * handoff_thread_idx) { ip4_reass_rc_t rc = IP4_REASS_RC_OK; int consumed = 0; vlib_buffer_t *fb = vlib_get_buffer (vm, *bi0); ip4_header_t *fip = vlib_buffer_get_current (fb); vnet_buffer_opaque_t *fvnb = vnet_buffer (fb); - reass->next_index = fvnb->ip.reass.next_index; // store next_index before it's overwritten + if (is_custom_app) + { + // store (error_)next_index before it's overwritten + reass->next_index = fvnb->ip.reass.next_index; + reass->error_next_index = fvnb->ip.reass.error_next_index; + } const u32 fragment_first = ip4_get_fragment_offset_bytes (fip); const u32 fragment_length = clib_net_to_host_u16 (fip->length) - ip4_header_bytes (fip); @@ -725,10 +838,14 @@ ip4_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node, } if (PREDICT_FALSE (fb->flags & VLIB_BUFFER_IS_TRACED)) { - ip4_reass_add_trace (vm, node, rm, reass, *bi0, RANGE_NEW, 0); + ip4_reass_add_trace (vm, node, rm, reass->id, + reass->trace_op_counter, *bi0, reass->first_bi, + reass->data_len, RANGE_NEW, 0, ~0); + ++reass->trace_op_counter; } *bi0 = ~0; reass->min_fragment_length = clib_net_to_host_u16 (fip->length); + reass->fragments_n = 1; return IP4_REASS_RC_OK; } reass->min_fragment_length = clib_min (clib_net_to_host_u16 (fip->length), @@ -778,8 +895,11 @@ ip4_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node, // this fragment is a (sub)part of existing range, ignore it if (PREDICT_FALSE (fb->flags & VLIB_BUFFER_IS_TRACED)) { - ip4_reass_add_trace (vm, node, rm, reass, *bi0, - RANGE_OVERLAP, 0); + ip4_reass_add_trace (vm, node, rm, reass->id, + reass->trace_op_counter, *bi0, + reass->first_bi, reass->data_len, + RANGE_OVERLAP, 0, ~0); + ++reass->trace_op_counter; } break; } @@ -798,9 +918,12 @@ ip4_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node, reass->data_len -= overlap; if (PREDICT_FALSE (fb->flags & VLIB_BUFFER_IS_TRACED)) { - ip4_reass_add_trace (vm, node, rm, reass, - candidate_range_bi, RANGE_SHRINK, - overlap); + ip4_reass_add_trace (vm, node, rm, reass->id, + reass->trace_op_counter, + candidate_range_bi, + reass->first_bi, reass->data_len, + RANGE_SHRINK, 0, ~0); + ++reass->trace_op_counter; } rc = ip4_reass_insert_range_in_chain (vm, rm, rt, reass, @@ -886,24 +1009,39 @@ ip4_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node, } break; } + ++reass->fragments_n; if (consumed) { if (PREDICT_FALSE (fb->flags & VLIB_BUFFER_IS_TRACED)) { - ip4_reass_add_trace (vm, node, rm, reass, *bi0, RANGE_NEW, 0); + ip4_reass_add_trace (vm, node, rm, reass->id, + reass->trace_op_counter, *bi0, reass->first_bi, + reass->data_len, RANGE_NEW, 0, ~0); + ++reass->trace_op_counter; } } if (~0 != reass->last_packet_octet && reass->data_len == reass->last_packet_octet + 1) { - return ip4_reass_finalize (vm, node, rm, rt, reass, bi0, next0, error0, - is_feature); + *handoff_thread_idx = reass->sendout_thread_index; + rc = + ip4_reass_finalize (vm, node, rm, rt, reass, bi0, next0, error0, + is_custom_app); + if (IP4_REASS_RC_OK == rc + && reass->memory_owner_thread_index != reass->sendout_thread_index) + { + rc = IP4_REASS_RC_HANDOFF; + } } else { if (consumed) { *bi0 = ~0; + if (reass->fragments_n > rm->max_reass_len) + { + rc = IP4_REASS_RC_TOO_MANY_FRAGMENTS; + } } else { @@ -915,9 +1053,9 @@ ip4_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node, } always_inline uword -ip4_reassembly_inline (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame, bool is_feature) +ip4_reassembly_inline (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame, bool is_feature, + bool is_custom_app) { u32 *from = vlib_frame_vector_args (frame); u32 n_left_from, n_left_to_next, *to_next, next_index; @@ -945,7 +1083,7 @@ ip4_reassembly_inline (vlib_main_t * vm, if (!ip4_get_fragment_more (ip0) && !ip4_get_fragment_offset (ip0)) { // this is a whole packet - no fragmentation - if (is_feature) + if (!is_custom_app) { next0 = IP4_REASSEMBLY_NEXT_INPUT; } @@ -979,34 +1117,72 @@ ip4_reassembly_inline (vlib_main_t * vm, (u64) ip0->fragment_id << 32 | (u64) ip0->protocol << 48; ip4_reass_t *reass = - ip4_reass_find_or_create (vm, rm, rt, &kv, &do_handoff); - + ip4_reass_find_or_create (vm, node, rm, rt, &kv, + &do_handoff); + if (reass) + { + const u32 fragment_first = + ip4_get_fragment_offset_bytes (ip0); + if (0 == fragment_first) + { + reass->sendout_thread_index = vm->thread_index; + } + } if (PREDICT_FALSE (do_handoff)) { next0 = IP4_REASSEMBLY_NEXT_HANDOFF; if (is_feature) vnet_buffer (b0)->ip. reass.owner_feature_thread_index = - kv.v.thread_index; + kv.v.memory_owner_thread_index; else vnet_buffer (b0)->ip.reass.owner_thread_index = - kv.v.thread_index; + kv.v.memory_owner_thread_index; } else if (reass) { + u32 handoff_thread_idx; switch (ip4_reass_update (vm, node, rm, rt, reass, &bi0, &next0, - &error0, is_feature)) + &error0, is_custom_app, &handoff_thread_idx)) { case IP4_REASS_RC_OK: /* nothing to do here */ break; + case IP4_REASS_RC_HANDOFF: + next0 = IP4_REASSEMBLY_NEXT_HANDOFF; + b0 = vlib_get_buffer (vm, bi0); + if (is_feature) + vnet_buffer (b0)->ip. + reass.owner_feature_thread_index = + handoff_thread_idx; + else + vnet_buffer (b0)->ip.reass.owner_thread_index = + handoff_thread_idx; + break; + case IP4_REASS_RC_TOO_MANY_FRAGMENTS: + vlib_node_increment_counter (vm, node->node_index, + IP4_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG, + 1); + ip4_reass_drop_all (vm, node, rm, reass); + ip4_reass_free (vm, rm, rt, reass); + goto next_packet; + break; case IP4_REASS_RC_NO_BUF: - /* fallthrough */ + vlib_node_increment_counter (vm, node->node_index, + IP4_ERROR_REASS_NO_BUF, + 1); + ip4_reass_drop_all (vm, node, rm, reass); + ip4_reass_free (vm, rm, rt, reass); + goto next_packet; + break; case IP4_REASS_RC_INTERNAL_ERROR: /* drop everything and start with a clean slate */ - ip4_reass_on_timeout (vm, rm, reass); - ip4_reass_free (rm, rt, reass); + vlib_node_increment_counter (vm, node->node_index, + IP4_ERROR_REASS_INTERNAL_ERROR, + 1); + ip4_reass_drop_all (vm, node, rm, reass); + ip4_reass_free (vm, rm, rt, reass); goto next_packet; break; } @@ -1026,7 +1202,24 @@ ip4_reassembly_inline (vlib_main_t * vm, to_next[0] = bi0; to_next += 1; n_left_to_next -= 1; - if (is_feature && IP4_ERROR_NONE == error0) + if (next0 == IP4_REASSEMBLY_NEXT_HANDOFF) + { + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + if (is_feature) + ip4_reass_add_trace (vm, node, rm, ~0, + ~0, + bi0, ~0, ~0, HANDOFF, 0, + vnet_buffer (b0)->ip. + reass.owner_feature_thread_index); + else + ip4_reass_add_trace (vm, node, rm, ~0, ~0, bi0, + ~0, ~0, HANDOFF, 0, + vnet_buffer (b0)->ip. + reass.owner_thread_index); + } + } + else if (is_feature && IP4_ERROR_NONE == error0) { b0 = vlib_get_buffer (vm, bi0); vnet_feature_next (&next0, b0); @@ -1055,16 +1248,15 @@ static char *ip4_reassembly_error_strings[] = { #undef _ }; -static uword -ip4_reassembly (vlib_main_t * vm, vlib_node_runtime_t * node, - vlib_frame_t * frame) +VLIB_NODE_FN (ip4_reass_node) (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame) { - return ip4_reassembly_inline (vm, node, frame, false /* is_feature */ ); + return ip4_reassembly_inline (vm, node, frame, false /* is_feature */ , + false /* is_custom_app */ ); } /* *INDENT-OFF* */ -VLIB_REGISTER_NODE (ip4_reass_node, static) = { - .function = ip4_reassembly, +VLIB_REGISTER_NODE (ip4_reass_node) = { .name = "ip4-reassembly", .vector_size = sizeof (u32), .format_trace = format_ip4_reass_trace, @@ -1081,18 +1273,16 @@ VLIB_REGISTER_NODE (ip4_reass_node, static) = { }; /* *INDENT-ON* */ -VLIB_NODE_FUNCTION_MULTIARCH (ip4_reass_node, ip4_reassembly); - -static uword -ip4_reassembly_feature (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) +VLIB_NODE_FN (ip4_reass_node_feature) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) { - return ip4_reassembly_inline (vm, node, frame, true /* is_feature */ ); + return ip4_reassembly_inline (vm, node, frame, true /* is_feature */ , + false /* is_custom_app */ ); } /* *INDENT-OFF* */ -VLIB_REGISTER_NODE (ip4_reass_node_feature, static) = { - .function = ip4_reassembly_feature, +VLIB_REGISTER_NODE (ip4_reass_node_feature) = { .name = "ip4-reassembly-feature", .vector_size = sizeof (u32), .format_trace = format_ip4_reass_trace, @@ -1108,17 +1298,17 @@ VLIB_REGISTER_NODE (ip4_reass_node_feature, static) = { }; /* *INDENT-ON* */ -VLIB_NODE_FUNCTION_MULTIARCH (ip4_reass_node_feature, ip4_reassembly_feature); - /* *INDENT-OFF* */ VNET_FEATURE_INIT (ip4_reassembly_feature, static) = { .arc_name = "ip4-unicast", .node_name = "ip4-reassembly-feature", - .runs_before = VNET_FEATURES ("ip4-lookup"), + .runs_before = VNET_FEATURES ("ip4-lookup", + "ipsec4-input-feature"), .runs_after = 0, }; /* *INDENT-ON* */ +#ifndef CLIB_MARCH_VARIANT always_inline u32 ip4_reass_get_nbuckets () { @@ -1135,6 +1325,7 @@ ip4_reass_get_nbuckets () return nbuckets; } +#endif /* CLIB_MARCH_VARIANT */ typedef enum { @@ -1147,6 +1338,7 @@ typedef struct clib_bihash_16_8_t *new_hash; } ip4_rehash_cb_ctx; +#ifndef CLIB_MARCH_VARIANT static void ip4_rehash_cb (clib_bihash_kv_16_8_t * kv, void *_ctx) { @@ -1159,20 +1351,21 @@ ip4_rehash_cb (clib_bihash_kv_16_8_t * kv, void *_ctx) static void ip4_reass_set_params (u32 timeout_ms, u32 max_reassemblies, - u32 expire_walk_interval_ms) + u32 max_reassembly_length, u32 expire_walk_interval_ms) { ip4_reass_main.timeout_ms = timeout_ms; ip4_reass_main.timeout = (f64) timeout_ms / (f64) MSEC_PER_SEC; ip4_reass_main.max_reass_n = max_reassemblies; + ip4_reass_main.max_reass_len = max_reassembly_length; ip4_reass_main.expire_walk_interval_ms = expire_walk_interval_ms; } vnet_api_error_t ip4_reass_set (u32 timeout_ms, u32 max_reassemblies, - u32 expire_walk_interval_ms) + u32 max_reassembly_length, u32 expire_walk_interval_ms) { u32 old_nbuckets = ip4_reass_get_nbuckets (); - ip4_reass_set_params (timeout_ms, max_reassemblies, + ip4_reass_set_params (timeout_ms, max_reassemblies, max_reassembly_length, expire_walk_interval_ms); vlib_process_signal_event (ip4_reass_main.vlib_main, ip4_reass_main.ip4_reass_expire_node_idx, @@ -1199,6 +1392,7 @@ ip4_reass_set (u32 timeout_ms, u32 max_reassemblies, clib_bihash_free_16_8 (&ip4_reass_main.hash); clib_memcpy_fast (&ip4_reass_main.hash, &new_hash, sizeof (ip4_reass_main.hash)); + clib_bihash_copied (&ip4_reass_main.hash, &new_hash); } } return 0; @@ -1206,10 +1400,11 @@ ip4_reass_set (u32 timeout_ms, u32 max_reassemblies, vnet_api_error_t ip4_reass_get (u32 * timeout_ms, u32 * max_reassemblies, - u32 * expire_walk_interval_ms) + u32 * max_reassembly_length, u32 * expire_walk_interval_ms) { *timeout_ms = ip4_reass_main.timeout_ms; *max_reassemblies = ip4_reass_main.max_reass_n; + *max_reassembly_length = ip4_reass_main.max_reass_len; *expire_walk_interval_ms = ip4_reass_main.expire_walk_interval_ms; return 0; } @@ -1223,7 +1418,6 @@ ip4_reass_init_function (vlib_main_t * vm) vlib_node_t *node; rm->vlib_main = vm; - rm->vnet_main = vnet_get_main (); vec_validate (rm->per_thread_data, vlib_num_workers ()); ip4_reass_per_thread_t *rt; @@ -1239,6 +1433,7 @@ ip4_reass_init_function (vlib_main_t * vm) ip4_reass_set_params (IP4_REASS_TIMEOUT_DEFAULT_MS, IP4_REASS_MAX_REASSEMBLIES_DEFAULT, + IP4_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT, IP4_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS); nbuckets = ip4_reass_get_nbuckets (); @@ -1252,11 +1447,11 @@ ip4_reass_init_function (vlib_main_t * vm) rm->fq_feature_index = vlib_frame_queue_main_init (ip4_reass_node_feature.index, 0); - return error; } VLIB_INIT_FUNCTION (ip4_reass_init_function); +#endif /* CLIB_MARCH_VARIANT */ static uword ip4_reass_walk_expired (vlib_main_t * vm, @@ -1312,8 +1507,8 @@ ip4_reass_walk_expired (vlib_main_t * vm, vec_foreach (i, pool_indexes_to_free) { ip4_reass_t *reass = pool_elt_at_index (rt->pool, i[0]); - ip4_reass_on_timeout (vm, rm, reass); - ip4_reass_free (rm, rt, reass); + ip4_reass_drop_all (vm, node, rm, reass); + ip4_reass_free (vm, rm, rt, reass); } /* *INDENT-ON* */ @@ -1331,7 +1526,7 @@ ip4_reass_walk_expired (vlib_main_t * vm, } /* *INDENT-OFF* */ -VLIB_REGISTER_NODE (ip4_reass_expire_node, static) = { +VLIB_REGISTER_NODE (ip4_reass_expire_node) = { .function = ip4_reass_walk_expired, .type = VLIB_NODE_TYPE_PROCESS, .name = "ip4-reassembly-expire-walk", @@ -1440,6 +1635,7 @@ VLIB_CLI_COMMAND (show_ip4_reassembly_cmd, static) = { }; /* *INDENT-ON* */ +#ifndef CLIB_MARCH_VARIANT vnet_api_error_t ip4_reass_enable_disable (u32 sw_if_index, u8 enable_disable) { @@ -1447,6 +1643,7 @@ ip4_reass_enable_disable (u32 sw_if_index, u8 enable_disable) "ip4-reassembly-feature", sw_if_index, enable_disable, 0, 0); } +#endif /* CLIB_MARCH_VARIANT */ #define foreach_ip4_reassembly_handoff_error \