X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvnet%2Fip%2Freass%2Fip4_full_reass.c;h=fca96aff0d6d79bf944ca826601aeca72e60883a;hb=dd298e804a25dbebc9e284b3b2d0dfd9bf674ad8;hp=f0e1753cf2ddaef9f297ae74141b55b8f6e8c7a3;hpb=e0f901a0483f7b10d2409075e694d91cf4fb42c5;p=vpp.git diff --git a/src/vnet/ip/reass/ip4_full_reass.c b/src/vnet/ip/reass/ip4_full_reass.c index f0e1753cf2d..fca96aff0d6 100644 --- a/src/vnet/ip/reass/ip4_full_reass.c +++ b/src/vnet/ip/reass/ip4_full_reass.c @@ -23,14 +23,19 @@ #include #include #include +#include #include #include #include #include #define MSEC_PER_SEC 1000 -#define IP4_REASS_TIMEOUT_DEFAULT_MS 100 -#define IP4_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS 10000 // 10 seconds default +#define IP4_REASS_TIMEOUT_DEFAULT_MS 200 + +/* As there are only 1024 reass context per thread, either the DDOS attacks + * or fractions of real timeouts, would consume these contexts quickly and + * running out context space and unable to perform reassembly */ +#define IP4_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS 50 // 50 ms default #define IP4_REASS_MAX_REASSEMBLIES_DEFAULT 1024 #define IP4_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT 3 #define IP4_REASS_HT_LOAD_FACTOR (0.75) @@ -68,21 +73,19 @@ typedef enum typedef struct { - union + struct { - struct - { - u32 xx_id; - ip4_address_t src; - ip4_address_t dst; - u16 frag_id; - u8 proto; - u8 unused; - }; - u64 as_u64[2]; + u16 frag_id; + u8 proto; + u8 unused; + u32 fib_index; + ip4_address_t src; + ip4_address_t dst; }; } ip4_full_reass_key_t; +STATIC_ASSERT_SIZEOF (ip4_full_reass_key_t, 16); + typedef union { struct @@ -155,6 +158,8 @@ typedef struct ip4_full_reass_t *pool; u32 reass_n; u32 id_counter; + // for pacing the main thread timeouts + u32 last_id; clib_spinlock_t lock; } ip4_full_reass_per_thread_t; @@ -410,9 +415,8 @@ ip4_full_reass_free (ip4_full_reass_main_t * rm, ip4_full_reass_per_thread_t * rt, ip4_full_reass_t * reass) { - clib_bihash_kv_16_8_t kv; - kv.key[0] = reass->key.as_u64[0]; - kv.key[1] = reass->key.as_u64[1]; + clib_bihash_kv_16_8_t kv = {}; + clib_memcpy_fast (&kv, &reass->key, sizeof (kv.key)); clib_bihash_add_del_16_8 (&rm->hash, &kv, 0); return ip4_full_reass_free_ctx (rt, reass); } @@ -453,6 +457,11 @@ ip4_full_reass_drop_all (vlib_main_t *vm, vlib_node_runtime_t *node, next_index = reass->error_next_index; u32 bi = ~0; + /* record number of packets sent to custom app */ + vlib_node_increment_counter (vm, node->node_index, + IP4_ERROR_REASS_TO_CUSTOM_APP, + vec_len (to_free)); + while (vec_len (to_free) > 0) { vlib_get_next_frame (vm, node, next_index, *to_next, @@ -465,7 +474,7 @@ ip4_full_reass_drop_all (vlib_main_t *vm, vlib_node_runtime_t *node, if (~0 != bi) { vlib_buffer_t *b = vlib_get_buffer (vm, bi); - if ((b->flags & VLIB_BUFFER_IS_TRACED)) + if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED)) { ip4_full_reass_add_trace (vm, node, reass, bi, RANGE_DISCARD, 0, ~0); @@ -579,6 +588,8 @@ again: if (now > reass->last_heard + rm->timeout) { + vlib_node_increment_counter (vm, node->node_index, + IP4_ERROR_REASS_TIMEOUT, 1); ip4_full_reass_drop_all (vm, node, reass, n_left_to_next, to_next); ip4_full_reass_free (rm, rt, reass); reass = NULL; @@ -607,8 +618,7 @@ again: ++rt->reass_n; } - reass->key.as_u64[0] = kv->kv.key[0]; - reass->key.as_u64[1] = kv->kv.key[1]; + clib_memcpy_fast (&reass->key, &kv->kv.key, sizeof (reass->key)); kv->v.reass_index = (reass - rt->pool); kv->v.memory_owner_thread_index = vm->thread_index; reass->last_heard = now; @@ -637,7 +647,6 @@ ip4_full_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_buffer_t *last_b = NULL; u32 sub_chain_bi = reass->first_bi; u32 total_length = 0; - u32 buf_cnt = 0; do { u32 tmp_bi = sub_chain_bi; @@ -674,7 +683,6 @@ ip4_full_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_buffer_length_in_chain (vm, tmp) - trim_front - trim_end; while (1) { - ++buf_cnt; if (trim_front) { if (trim_front > tmp->current_length) @@ -825,6 +833,15 @@ ip4_full_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node, } vnet_buffer (first_b)->ip.reass.estimated_mtu = reass->min_fragment_length; + /* Keep track of number of successfully reassembled packets and number of + * fragments reassembled */ + vlib_node_increment_counter (vm, node->node_index, IP4_ERROR_REASS_SUCCESS, + 1); + + vlib_node_increment_counter (vm, node->node_index, + IP4_ERROR_REASS_FRAGMENTS_REASSEMBLED, + reass->fragments_n); + *error0 = IP4_ERROR_NONE; ip4_full_reass_free (rm, rt, reass); reass = NULL; @@ -1214,6 +1231,10 @@ ip4_full_reass_inline (vlib_main_t *vm, vlib_node_runtime_t *node, clib_net_to_host_u16 (ip0->length) - ip4_header_bytes (ip0); const u32 fragment_last = fragment_first + fragment_length - 1; + /* Keep track of received fragments */ + vlib_node_increment_counter (vm, node->node_index, + IP4_ERROR_REASS_FRAGMENTS_RCVD, 1); + if (fragment_first > fragment_last || fragment_first + fragment_length > UINT16_MAX - 20 || (fragment_length < 8 && // 8 is minimum frag length per RFC 791 @@ -1223,16 +1244,18 @@ ip4_full_reass_inline (vlib_main_t *vm, vlib_node_runtime_t *node, error0 = IP4_ERROR_REASS_MALFORMED_PACKET; goto packet_enqueue; } - ip4_full_reass_kv_t kv; - u8 do_handoff = 0; - kv.k.as_u64[0] = - (u64) vec_elt (ip4_main.fib_index_by_sw_if_index, - vnet_buffer (b0)->sw_if_index[VLIB_RX]) | - (u64) ip0->src_address.as_u32 << 32; - kv.k.as_u64[1] = - (u64) ip0->dst_address. - as_u32 | (u64) ip0->fragment_id << 32 | (u64) ip0->protocol << 48; + u32 fib_index = vec_elt (ip4_main.fib_index_by_sw_if_index, + vnet_buffer (b0)->sw_if_index[VLIB_RX]); + + ip4_full_reass_kv_t kv = { .k.fib_index = fib_index, + .k.src.as_u32 = ip0->src_address.as_u32, + .k.dst.as_u32 = ip0->dst_address.as_u32, + .k.frag_id = ip0->fragment_id, + .k.proto = ip0->protocol + + }; + u8 do_handoff = 0; ip4_full_reass_t *reass = ip4_full_reass_find_or_create ( vm, node, rm, rt, &kv, &do_handoff, &n_left_to_next, &to_next); @@ -1335,6 +1358,14 @@ ip4_full_reass_inline (vlib_main_t *vm, vlib_node_runtime_t *node, vnet_feature_next (&next0, b0); } + /* Increment the counter to-custom-app also as this fragment is + * also going to application */ + if (CUSTOM == type) + { + vlib_node_increment_counter ( + vm, node->node_index, IP4_ERROR_REASS_TO_CUSTOM_APP, 1); + } + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, n_left_to_next, bi0, next0); @@ -1353,12 +1384,6 @@ ip4_full_reass_inline (vlib_main_t *vm, vlib_node_runtime_t *node, return frame->n_vectors; } -static char *ip4_full_reass_error_strings[] = { -#define _(sym, string) string, - foreach_ip4_error -#undef _ -}; - VLIB_NODE_FN (ip4_full_reass_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) @@ -1370,8 +1395,8 @@ VLIB_REGISTER_NODE (ip4_full_reass_node) = { .name = "ip4-full-reassembly", .vector_size = sizeof (u32), .format_trace = format_ip4_full_reass_trace, - .n_errors = ARRAY_LEN (ip4_full_reass_error_strings), - .error_strings = ip4_full_reass_error_strings, + .n_errors = IP4_N_ERROR, + .error_counters = ip4_error_counters, .n_next_nodes = IP4_FULL_REASS_N_NEXT, .next_nodes = { @@ -1392,8 +1417,8 @@ VLIB_REGISTER_NODE (ip4_local_full_reass_node) = { .name = "ip4-local-full-reassembly", .vector_size = sizeof (u32), .format_trace = format_ip4_full_reass_trace, - .n_errors = ARRAY_LEN (ip4_full_reass_error_strings), - .error_strings = ip4_full_reass_error_strings, + .n_errors = IP4_N_ERROR, + .error_counters = ip4_error_counters, .n_next_nodes = IP4_FULL_REASS_N_NEXT, .next_nodes = { @@ -1416,8 +1441,8 @@ VLIB_REGISTER_NODE (ip4_full_reass_node_feature) = { .name = "ip4-full-reassembly-feature", .vector_size = sizeof (u32), .format_trace = format_ip4_full_reass_trace, - .n_errors = ARRAY_LEN (ip4_full_reass_error_strings), - .error_strings = ip4_full_reass_error_strings, + .n_errors = IP4_N_ERROR, + .error_counters = ip4_error_counters, .n_next_nodes = IP4_FULL_REASS_N_NEXT, .next_nodes = { @@ -1446,8 +1471,8 @@ VLIB_REGISTER_NODE (ip4_full_reass_node_custom) = { .name = "ip4-full-reassembly-custom", .vector_size = sizeof (u32), .format_trace = format_ip4_full_reass_trace, - .n_errors = ARRAY_LEN (ip4_full_reass_error_strings), - .error_strings = ip4_full_reass_error_strings, + .n_errors = IP4_N_ERROR, + .error_counters = ip4_error_counters, .n_next_nodes = IP4_FULL_REASS_N_NEXT, .next_nodes = { @@ -1481,7 +1506,9 @@ ip4_full_reass_get_nbuckets () u32 nbuckets; u8 i; - nbuckets = (u32) (rm->max_reass_n / IP4_REASS_HT_LOAD_FACTOR); + /* need more mem with more workers */ + nbuckets = (u32) (rm->max_reass_n * (vlib_num_workers () + 1) / + IP4_REASS_HT_LOAD_FACTOR); for (i = 0; i < 31; i++) if ((1 << i) >= nbuckets) @@ -1669,15 +1696,38 @@ ip4_full_reass_walk_expired (vlib_main_t *vm, vlib_node_runtime_t *node, vec_reset_length (pool_indexes_to_free); - pool_foreach_index (index, rt->pool) + /* Pace the number of timeouts handled per thread,to avoid barrier + * sync issues in real world scenarios */ + + u32 beg = rt->last_id; + /* to ensure we walk at least once per sec per context */ + u32 end = + beg + (IP4_REASS_MAX_REASSEMBLIES_DEFAULT * + IP4_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS / MSEC_PER_SEC + + 1); + if (end > vec_len (rt->pool)) { - reass = pool_elt_at_index (rt->pool, index); - if (now > reass->last_heard + rm->timeout) - { - vec_add1 (pool_indexes_to_free, index); - } + end = vec_len (rt->pool); + rt->last_id = 0; + } + else + { + rt->last_id = end; } + pool_foreach_stepping_index (index, beg, end, rt->pool) + { + reass = pool_elt_at_index (rt->pool, index); + if (now > reass->last_heard + rm->timeout) + { + vec_add1 (pool_indexes_to_free, index); + } + } + + if (vec_len (pool_indexes_to_free)) + vlib_node_increment_counter (vm, node->node_index, + IP4_ERROR_REASS_TIMEOUT, + vec_len (pool_indexes_to_free)); int *i; vec_foreach (i, pool_indexes_to_free) { @@ -1701,13 +1751,12 @@ ip4_full_reass_walk_expired (vlib_main_t *vm, vlib_node_runtime_t *node, } VLIB_REGISTER_NODE (ip4_full_reass_expire_node) = { - .function = ip4_full_reass_walk_expired, - .type = VLIB_NODE_TYPE_PROCESS, - .name = "ip4-full-reassembly-expire-walk", - .format_trace = format_ip4_full_reass_trace, - .n_errors = ARRAY_LEN (ip4_full_reass_error_strings), - .error_strings = ip4_full_reass_error_strings, - + .function = ip4_full_reass_walk_expired, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "ip4-full-reassembly-expire-walk", + .format_trace = format_ip4_full_reass_trace, + .n_errors = IP4_N_ERROR, + .error_counters = ip4_error_counters, }; static u8 * @@ -1715,9 +1764,8 @@ format_ip4_full_reass_key (u8 * s, va_list * args) { ip4_full_reass_key_t *key = va_arg (*args, ip4_full_reass_key_t *); s = - format (s, - "xx_id: %u, src: %U, dst: %U, frag_id: %u, proto: %u", - key->xx_id, format_ip4_address, &key->src, format_ip4_address, + format (s, "fib_index: %u, src: %U, dst: %U, frag_id: %u, proto: %u", + key->fib_index, format_ip4_address, &key->src, format_ip4_address, &key->dst, clib_net_to_host_u16 (key->frag_id), key->proto); return s; }