X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fplugins%2Fsnat%2Fin2out.c;h=9b09902abe1d9c155effbb91e7b93c4fa9adcb80;hb=57b5860f013953ce161d05302e05370db9cd6ee2;hp=5970588b5b979e15814d3f3817552ad8ee58ef27;hpb=066f034b903bda6e938bec1b12f01edef65ee9c4;p=vpp.git diff --git a/src/plugins/snat/in2out.c b/src/plugins/snat/in2out.c index 5970588b5b9..9b09902abe1 100644 --- a/src/plugins/snat/in2out.c +++ b/src/plugins/snat/in2out.c @@ -94,7 +94,7 @@ _(UNSUPPORTED_PROTOCOL, "Unsupported protocol") \ _(IN2OUT_PACKETS, "Good in2out packets processed") \ _(OUT_OF_PORTS, "Out of ports") \ _(BAD_OUTSIDE_FIB, "Outside VRF ID not found") \ -_(BAD_ICMP_TYPE, "icmp type not echo-request") \ +_(BAD_ICMP_TYPE, "unsupported ICMP type") \ _(NO_TRANSLATION, "No translation") typedef enum { @@ -113,8 +113,8 @@ static char * snat_in2out_error_strings[] = { typedef enum { SNAT_IN2OUT_NEXT_LOOKUP, SNAT_IN2OUT_NEXT_DROP, - SNAT_IN2OUT_NEXT_SLOW_PATH, SNAT_IN2OUT_NEXT_ICMP_ERROR, + SNAT_IN2OUT_NEXT_SLOW_PATH, SNAT_IN2OUT_N_NEXT, } snat_in2out_next_t; @@ -134,13 +134,10 @@ typedef enum { * @returns 0 if packet should be translated otherwise 1 */ static inline int -snat_not_translate (snat_main_t * sm, snat_runtime_t * rt, u32 sw_if_index0, - ip4_header_t * ip0, u32 proto0, u32 rx_fib_index0) +snat_not_translate_fast (snat_main_t * sm, vlib_node_runtime_t *node, + u32 sw_if_index0, ip4_header_t * ip0, u32 proto0, + u32 rx_fib_index0) { - ip4_address_t * first_int_addr; - udp_header_t * udp0 = ip4_next_header (ip0); - snat_session_key_t key0, sm0; - clib_bihash_kv_8_8_t kv0, value0; fib_node_index_t fei = FIB_NODE_INDEX_INVALID; fib_prefix_t pfx = { .fp_proto = FIB_PROTOCOL_IP4, @@ -150,39 +147,11 @@ snat_not_translate (snat_main_t * sm, snat_runtime_t * rt, u32 sw_if_index0, }, }; - if (PREDICT_FALSE(rt->cached_sw_if_index != sw_if_index0)) - { - first_int_addr = - ip4_interface_first_address (sm->ip4_main, sw_if_index0, - 0 /* just want the address */); - rt->cached_sw_if_index = sw_if_index0; - if (first_int_addr) - rt->cached_ip4_address = first_int_addr->as_u32; - else - rt->cached_ip4_address = 0; - } - /* Don't NAT packet aimed at the intfc address */ - if (PREDICT_FALSE(ip0->dst_address.as_u32 == rt->cached_ip4_address)) + if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0, + ip0->dst_address.as_u32))) return 1; - key0.addr = ip0->dst_address; - key0.port = udp0->dst_port; - key0.protocol = proto0; - key0.fib_index = sm->outside_fib_index; - kv0.key = key0.as_u64; - - /* NAT packet aimed at external address if */ - /* has active sessions */ - if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0)) - { - /* or is static mappings */ - if (!snat_static_mapping_match(sm, key0, &sm0, 1)) - return 0; - } - else - return 0; - fei = fib_table_lookup (rx_fib_index0, &pfx); if (FIB_NODE_INDEX_INVALID != fei) { @@ -205,6 +174,36 @@ snat_not_translate (snat_main_t * sm, snat_runtime_t * rt, u32 sw_if_index0, return 1; } +static inline int +snat_not_translate (snat_main_t * sm, vlib_node_runtime_t *node, + u32 sw_if_index0, ip4_header_t * ip0, u32 proto0, + u32 rx_fib_index0) +{ + udp_header_t * udp0 = ip4_next_header (ip0); + snat_session_key_t key0, sm0; + clib_bihash_kv_8_8_t kv0, value0; + + key0.addr = ip0->dst_address; + key0.port = udp0->dst_port; + key0.protocol = proto0; + key0.fib_index = sm->outside_fib_index; + kv0.key = key0.as_u64; + + /* NAT packet aimed at external address if */ + /* has active sessions */ + if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0)) + { + /* or is static mappings */ + if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0)) + return 0; + } + else + return 0; + + return snat_not_translate_fast(sm, node, sw_if_index0, ip0, proto0, + rx_fib_index0); +} + static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0, ip4_header_t * ip0, u32 rx_fib_index0, @@ -212,7 +211,7 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0, snat_session_t ** sessionp, vlib_node_runtime_t * node, u32 next0, - u32 cpu_index) + u32 thread_index) { snat_user_t *u; snat_user_key_t user_key; @@ -246,27 +245,27 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0, if (clib_bihash_search_8_8 (&sm->user_hash, &kv0, &value0)) { /* no, make a new one */ - pool_get (sm->per_thread_data[cpu_index].users, u); + pool_get (sm->per_thread_data[thread_index].users, u); memset (u, 0, sizeof (*u)); u->addr = ip0->src_address; u->fib_index = rx_fib_index0; - pool_get (sm->per_thread_data[cpu_index].list_pool, per_user_list_head_elt); + pool_get (sm->per_thread_data[thread_index].list_pool, per_user_list_head_elt); u->sessions_per_user_list_head_index = per_user_list_head_elt - - sm->per_thread_data[cpu_index].list_pool; + sm->per_thread_data[thread_index].list_pool; - clib_dlist_init (sm->per_thread_data[cpu_index].list_pool, + clib_dlist_init (sm->per_thread_data[thread_index].list_pool, u->sessions_per_user_list_head_index); - kv0.value = u - sm->per_thread_data[cpu_index].users; + kv0.value = u - sm->per_thread_data[thread_index].users; /* add user */ clib_bihash_add_del_8_8 (&sm->user_hash, &kv0, 1 /* is_add */); } else { - u = pool_elt_at_index (sm->per_thread_data[cpu_index].users, + u = pool_elt_at_index (sm->per_thread_data[thread_index].users, value0.value); } @@ -276,46 +275,71 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0, /* Remove the oldest dynamic translation */ do { oldest_per_user_translation_list_index = - clib_dlist_remove_head (sm->per_thread_data[cpu_index].list_pool, + clib_dlist_remove_head (sm->per_thread_data[thread_index].list_pool, u->sessions_per_user_list_head_index); ASSERT (oldest_per_user_translation_list_index != ~0); /* add it back to the end of the LRU list */ - clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool, + clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool, u->sessions_per_user_list_head_index, oldest_per_user_translation_list_index); /* Get the list element */ oldest_per_user_translation_list_elt = - pool_elt_at_index (sm->per_thread_data[cpu_index].list_pool, + pool_elt_at_index (sm->per_thread_data[thread_index].list_pool, oldest_per_user_translation_list_index); /* Get the session index from the list element */ session_index = oldest_per_user_translation_list_elt->value; /* Get the session */ - s = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions, + s = pool_elt_at_index (sm->per_thread_data[thread_index].sessions, session_index); } while (snat_is_session_static (s)); - /* Remove in2out, out2in keys */ - kv0.key = s->in2out.as_u64; - if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 0 /* is_add */)) - clib_warning ("in2out key delete failed"); - kv0.key = s->out2in.as_u64; - if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 0 /* is_add */)) - clib_warning ("out2in key delete failed"); - - /* log NAT event */ - snat_ipfix_logging_nat44_ses_delete(s->in2out.addr.as_u32, - s->out2in.addr.as_u32, - s->in2out.protocol, - s->in2out.port, - s->out2in.port, - s->in2out.fib_index); - - snat_free_outside_address_and_port - (sm, &s->out2in, s->outside_address_index); + if (snat_is_unk_proto_session (s)) + { + clib_bihash_kv_16_8_t up_kv; + snat_unk_proto_ses_key_t key; + + /* Remove from lookup tables */ + key.l_addr = s->in2out.addr; + key.r_addr = s->ext_host_addr; + key.fib_index = s->in2out.fib_index; + key.proto = s->in2out.port; + up_kv.key[0] = key.as_u64[0]; + up_kv.key[1] = key.as_u64[1]; + if (clib_bihash_add_del_16_8 (&sm->in2out_unk_proto, &up_kv, 0)) + clib_warning ("in2out key del failed"); + + key.l_addr = s->out2in.addr; + key.fib_index = s->out2in.fib_index; + up_kv.key[0] = key.as_u64[0]; + up_kv.key[1] = key.as_u64[1]; + if (clib_bihash_add_del_16_8 (&sm->out2in_unk_proto, &up_kv, 0)) + clib_warning ("out2in key del failed"); + } + else + { + /* Remove in2out, out2in keys */ + kv0.key = s->in2out.as_u64; + if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 0 /* is_add */)) + clib_warning ("in2out key delete failed"); + kv0.key = s->out2in.as_u64; + if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 0 /* is_add */)) + clib_warning ("out2in key delete failed"); + + /* log NAT event */ + snat_ipfix_logging_nat44_ses_delete(s->in2out.addr.as_u32, + s->out2in.addr.as_u32, + s->in2out.protocol, + s->in2out.port, + s->out2in.port, + s->in2out.fib_index); + + snat_free_outside_address_and_port + (sm, &s->out2in, s->outside_address_index); + } s->outside_address_index = ~0; if (snat_alloc_outside_address_and_port (sm, rx_fib_index0, &key1, @@ -333,7 +357,7 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0, u8 static_mapping = 1; /* First try to match static mapping by local address and port */ - if (snat_static_mapping_match (sm, *key0, &key1, 0)) + if (snat_static_mapping_match (sm, *key0, &key1, 0, 0)) { static_mapping = 0; /* Try to create dynamic translation */ @@ -346,7 +370,7 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0, } /* Create a new session */ - pool_get (sm->per_thread_data[cpu_index].sessions, s); + pool_get (sm->per_thread_data[thread_index].sessions, s); memset (s, 0, sizeof (*s)); s->outside_address_index = address_index; @@ -362,38 +386,39 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0, } /* Create list elts */ - pool_get (sm->per_thread_data[cpu_index].list_pool, + pool_get (sm->per_thread_data[thread_index].list_pool, per_user_translation_list_elt); - clib_dlist_init (sm->per_thread_data[cpu_index].list_pool, + clib_dlist_init (sm->per_thread_data[thread_index].list_pool, per_user_translation_list_elt - - sm->per_thread_data[cpu_index].list_pool); + sm->per_thread_data[thread_index].list_pool); per_user_translation_list_elt->value = - s - sm->per_thread_data[cpu_index].sessions; + s - sm->per_thread_data[thread_index].sessions; s->per_user_index = per_user_translation_list_elt - - sm->per_thread_data[cpu_index].list_pool; + sm->per_thread_data[thread_index].list_pool; s->per_user_list_head_index = u->sessions_per_user_list_head_index; - clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool, + clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool, s->per_user_list_head_index, per_user_translation_list_elt - - sm->per_thread_data[cpu_index].list_pool); + sm->per_thread_data[thread_index].list_pool); } s->in2out = *key0; s->out2in = key1; s->out2in.protocol = key0->protocol; s->out2in.fib_index = outside_fib_index; + s->ext_host_addr.as_u32 = ip0->dst_address.as_u32; *sessionp = s; /* Add to translation hashes */ kv0.key = s->in2out.as_u64; - kv0.value = s - sm->per_thread_data[cpu_index].sessions; + kv0.value = s - sm->per_thread_data[thread_index].sessions; if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 1 /* is_add */)) clib_warning ("in2out key add failed"); kv0.key = s->out2in.as_u64; - kv0.value = s - sm->per_thread_data[cpu_index].sessions; + kv0.value = s - sm->per_thread_data[thread_index].sessions; if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 1 /* is_add */)) clib_warning ("out2in key add failed"); @@ -403,7 +428,7 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0, worker_by_out_key.port = s->out2in.port; worker_by_out_key.fib_index = s->out2in.fib_index; kv0.key = worker_by_out_key.as_u64; - kv0.value = cpu_index; + kv0.value = thread_index; clib_bihash_add_del_8_8 (&sm->worker_by_out, &kv0, 1); /* log NAT event */ @@ -415,62 +440,25 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0, s->in2out.fib_index); return next0; } - -typedef struct { - u16 src_port, dst_port; -} tcp_udp_header_t; -static inline u32 icmp_in2out_slow_path (snat_main_t *sm, - vlib_buffer_t * b0, - ip4_header_t * ip0, - icmp46_header_t * icmp0, - u32 sw_if_index0, - u32 rx_fib_index0, - vlib_node_runtime_t * node, - u32 next0, - f64 now, - u32 cpu_index, - snat_session_t ** p_s0) +static_always_inline +snat_in2out_error_t icmp_get_key(ip4_header_t *ip0, + snat_session_key_t *p_key0) { + icmp46_header_t *icmp0; snat_session_key_t key0; icmp_echo_header_t *echo0, *inner_echo0 = 0; ip4_header_t *inner_ip0 = 0; void *l4_header = 0; icmp46_header_t *inner_icmp0; - clib_bihash_kv_8_8_t kv0, value0; - snat_session_t * s0 = 0; - u32 new_addr0, old_addr0; - u16 old_id0, new_id0; - ip_csum_t sum0; - u16 checksum0; - snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data; - u8 is_error_message = 0; + icmp0 = (icmp46_header_t *) ip4_next_header (ip0); echo0 = (icmp_echo_header_t *)(icmp0+1); - key0.addr = ip0->src_address; - key0.fib_index = rx_fib_index0; - - switch(icmp0->type) - { - case ICMP4_destination_unreachable: - case ICMP4_time_exceeded: - case ICMP4_parameter_problem: - case ICMP4_source_quench: - case ICMP4_redirect: - case ICMP4_alternate_host_address: - is_error_message = 1; - } - - if (!is_error_message) + if (!icmp_is_error_message (icmp0)) { - if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request)) - { - b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE]; - next0 = SNAT_IN2OUT_NEXT_DROP; - goto out; - } key0.protocol = SNAT_PROTOCOL_ICMP; + key0.addr = ip0->src_address; key0.port = echo0->identifier; } else @@ -478,6 +466,7 @@ static inline u32 icmp_in2out_slow_path (snat_main_t *sm, inner_ip0 = (ip4_header_t *)(echo0+1); l4_header = ip4_next_header (inner_ip0); key0.protocol = ip_proto_to_snat_proto (inner_ip0->protocol); + key0.addr = inner_ip0->dst_address; switch (key0.protocol) { case SNAT_PROTOCOL_ICMP: @@ -490,35 +479,219 @@ static inline u32 icmp_in2out_slow_path (snat_main_t *sm, key0.port = ((tcp_udp_header_t*)l4_header)->dst_port; break; default: - b0->error = node->errors[SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL]; - next0 = SNAT_IN2OUT_NEXT_DROP; - goto out; + return SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL; } } + *p_key0 = key0; + return -1; /* success */ +} + +/** + * Get address and port values to be used for packet SNAT translation + * and create session if needed + * + * @param[in,out] sm SNAT main + * @param[in,out] node SNAT node runtime + * @param[in] thread_index thread index + * @param[in,out] b0 buffer containing packet to be translated + * @param[out] p_proto protocol used for matching + * @param[out] p_value address and port after NAT translation + * @param[out] p_dont_translate if packet should not be translated + * @param d optional parameter + * @param e optional parameter + */ +u32 icmp_match_in2out_slow(snat_main_t *sm, vlib_node_runtime_t *node, + u32 thread_index, vlib_buffer_t *b0, u8 *p_proto, + snat_session_key_t *p_value, + u8 *p_dont_translate, void *d, void *e) +{ + ip4_header_t *ip0; + icmp46_header_t *icmp0; + u32 sw_if_index0; + u32 rx_fib_index0; + snat_session_key_t key0; + snat_session_t *s0 = 0; + u8 dont_translate = 0; + clib_bihash_kv_8_8_t kv0, value0; + u32 next0 = ~0; + int err; + + ip0 = vlib_buffer_get_current (b0); + icmp0 = (icmp46_header_t *) ip4_next_header (ip0); + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0); + + err = icmp_get_key (ip0, &key0); + if (err != -1) + { + b0->error = node->errors[err]; + next0 = SNAT_IN2OUT_NEXT_DROP; + goto out; + } + key0.fib_index = rx_fib_index0; kv0.key = key0.as_u64; - + if (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0)) { - if (PREDICT_FALSE(snat_not_translate(sm, rt, sw_if_index0, ip0, + if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0, ip0, IP_PROTOCOL_ICMP, rx_fib_index0))) - goto out; + { + dont_translate = 1; + goto out; + } - if (is_error_message) + if (PREDICT_FALSE(icmp_is_error_message (icmp0))) { + b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE]; next0 = SNAT_IN2OUT_NEXT_DROP; goto out; } next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0, - &s0, node, next0, cpu_index); - + &s0, node, next0, thread_index); + if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP)) goto out; } else - s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions, - value0.value); + { + if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request && + icmp0->type != ICMP4_echo_reply && + !icmp_is_error_message (icmp0))) + { + b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE]; + next0 = SNAT_IN2OUT_NEXT_DROP; + goto out; + } + + s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions, + value0.value); + } + +out: + *p_proto = key0.protocol; + if (s0) + *p_value = s0->out2in; + *p_dont_translate = dont_translate; + if (d) + *(snat_session_t**)d = s0; + return next0; +} + +/** + * Get address and port values to be used for packet SNAT translation + * + * @param[in] sm SNAT main + * @param[in,out] node SNAT node runtime + * @param[in] thread_index thread index + * @param[in,out] b0 buffer containing packet to be translated + * @param[out] p_proto protocol used for matching + * @param[out] p_value address and port after NAT translation + * @param[out] p_dont_translate if packet should not be translated + * @param d optional parameter + * @param e optional parameter + */ +u32 icmp_match_in2out_fast(snat_main_t *sm, vlib_node_runtime_t *node, + u32 thread_index, vlib_buffer_t *b0, u8 *p_proto, + snat_session_key_t *p_value, + u8 *p_dont_translate, void *d, void *e) +{ + ip4_header_t *ip0; + icmp46_header_t *icmp0; + u32 sw_if_index0; + u32 rx_fib_index0; + snat_session_key_t key0; + snat_session_key_t sm0; + u8 dont_translate = 0; + u8 is_addr_only; + u32 next0 = ~0; + int err; + + ip0 = vlib_buffer_get_current (b0); + icmp0 = (icmp46_header_t *) ip4_next_header (ip0); + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0); + + err = icmp_get_key (ip0, &key0); + if (err != -1) + { + b0->error = node->errors[err]; + next0 = SNAT_IN2OUT_NEXT_DROP; + goto out2; + } + key0.fib_index = rx_fib_index0; + + if (snat_static_mapping_match(sm, key0, &sm0, 0, &is_addr_only)) + { + if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0, + IP_PROTOCOL_ICMP, rx_fib_index0))) + { + dont_translate = 1; + goto out; + } + + if (icmp_is_error_message (icmp0)) + { + next0 = SNAT_IN2OUT_NEXT_DROP; + goto out; + } + + b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION]; + next0 = SNAT_IN2OUT_NEXT_DROP; + goto out; + } + + if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request && + (icmp0->type != ICMP4_echo_reply || !is_addr_only) && + !icmp_is_error_message (icmp0))) + { + b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE]; + next0 = SNAT_IN2OUT_NEXT_DROP; + goto out; + } + +out: + *p_value = sm0; +out2: + *p_proto = key0.protocol; + *p_dont_translate = dont_translate; + return next0; +} + +static inline u32 icmp_in2out (snat_main_t *sm, + vlib_buffer_t * b0, + ip4_header_t * ip0, + icmp46_header_t * icmp0, + u32 sw_if_index0, + u32 rx_fib_index0, + vlib_node_runtime_t * node, + u32 next0, + u32 thread_index, + void *d, + void *e) +{ + snat_session_key_t sm0; + u8 protocol; + icmp_echo_header_t *echo0, *inner_echo0 = 0; + ip4_header_t *inner_ip0; + void *l4_header = 0; + icmp46_header_t *inner_icmp0; + u8 dont_translate; + u32 new_addr0, old_addr0; + u16 old_id0, new_id0; + ip_csum_t sum0; + u16 checksum0; + u32 next0_tmp; + + echo0 = (icmp_echo_header_t *)(icmp0+1); + + next0_tmp = sm->icmp_match_in2out_cb(sm, node, thread_index, b0, + &protocol, &sm0, &dont_translate, d, e); + if (next0_tmp != ~0) + next0 = next0_tmp; + if (next0 == SNAT_IN2OUT_NEXT_DROP || dont_translate) + goto out; sum0 = ip_incremental_checksum (0, icmp0, ntohs(ip0->length) - ip4_header_bytes (ip0)); @@ -530,28 +703,34 @@ static inline u32 icmp_in2out_slow_path (snat_main_t *sm, } old_addr0 = ip0->src_address.as_u32; - ip0->src_address = s0->out2in.addr; - new_addr0 = ip0->src_address.as_u32; - vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index; + new_addr0 = ip0->src_address.as_u32 = sm0.addr.as_u32; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index; sum0 = ip0->checksum; sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, src_address /* changed member */); ip0->checksum = ip_csum_fold (sum0); - if (!is_error_message) + if (!icmp_is_error_message (icmp0)) { - old_id0 = echo0->identifier; - new_id0 = s0->out2in.port; - echo0->identifier = new_id0; - - sum0 = icmp0->checksum; - sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t, - identifier); - icmp0->checksum = ip_csum_fold (sum0); + new_id0 = sm0.port; + if (PREDICT_FALSE(new_id0 != echo0->identifier)) + { + old_id0 = echo0->identifier; + new_id0 = sm0.port; + echo0->identifier = new_id0; + + sum0 = icmp0->checksum; + sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t, + identifier); + icmp0->checksum = ip_csum_fold (sum0); + } } else { + inner_ip0 = (ip4_header_t *)(echo0+1); + l4_header = ip4_next_header (inner_ip0); + if (!ip4_header_checksum_is_valid (inner_ip0)) { next0 = SNAT_IN2OUT_NEXT_DROP; @@ -559,19 +738,22 @@ static inline u32 icmp_in2out_slow_path (snat_main_t *sm, } old_addr0 = inner_ip0->dst_address.as_u32; - inner_ip0->dst_address = s0->out2in.addr; - new_addr0 = inner_ip0->src_address.as_u32; + inner_ip0->dst_address = sm0.addr; + new_addr0 = inner_ip0->dst_address.as_u32; sum0 = icmp0->checksum; sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, dst_address /* changed member */); icmp0->checksum = ip_csum_fold (sum0); - switch (key0.protocol) + switch (protocol) { case SNAT_PROTOCOL_ICMP: + inner_icmp0 = (icmp46_header_t*)l4_header; + inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1); + old_id0 = inner_echo0->identifier; - new_id0 = s0->out2in.port; + new_id0 = sm0.port; inner_echo0->identifier = new_id0; sum0 = icmp0->checksum; @@ -582,7 +764,7 @@ static inline u32 icmp_in2out_slow_path (snat_main_t *sm, case SNAT_PROTOCOL_UDP: case SNAT_PROTOCOL_TCP: old_id0 = ((tcp_udp_header_t*)l4_header)->dst_port; - new_id0 = s0->out2in.port; + new_id0 = sm0.port; ((tcp_udp_header_t*)l4_header)->dst_port = new_id0; sum0 = icmp0->checksum; @@ -595,22 +777,7 @@ static inline u32 icmp_in2out_slow_path (snat_main_t *sm, } } - /* Accounting */ - s0->last_heard = now; - s0->total_pkts++; - s0->total_bytes += vlib_buffer_length_in_chain (sm->vlib_main, b0); - /* Per-user LRU list maintenance for dynamic translations */ - if (!snat_is_session_static (s0)) - { - clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool, - s0->per_user_index); - clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool, - s0->per_user_list_head_index, - s0->per_user_index); - } - out: - *p_s0 = s0; return next0; } @@ -654,7 +821,7 @@ snat_hairpinning (snat_main_t *sm, if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0)) { /* or static mappings */ - if (!snat_static_mapping_match(sm, key0, &sm0, 1)) + if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0)) { new_dst_addr0 = sm0.addr.as_u32; new_dst_port0 = sm0.port; @@ -694,26 +861,438 @@ snat_hairpinning (snat_main_t *sm, ip4_header_t, dst_address); ip0->checksum = ip_csum_fold (sum0); - old_dst_port0 = tcp0->dst; - if (PREDICT_TRUE(new_dst_port0 != old_dst_port0)) - { - if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) + old_dst_port0 = tcp0->dst; + if (PREDICT_TRUE(new_dst_port0 != old_dst_port0)) + { + if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) + { + tcp0->dst = new_dst_port0; + sum0 = tcp0->checksum; + sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0, + ip4_header_t, dst_address); + sum0 = ip_csum_update (sum0, old_dst_port0, new_dst_port0, + ip4_header_t /* cheat */, length); + tcp0->checksum = ip_csum_fold(sum0); + } + else + { + udp0->dst_port = new_dst_port0; + udp0->checksum = 0; + } + } + else + { + if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) + { + sum0 = tcp0->checksum; + sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0, + ip4_header_t, dst_address); + tcp0->checksum = ip_csum_fold(sum0); + } + } + } +} + +static inline u32 icmp_in2out_slow_path (snat_main_t *sm, + vlib_buffer_t * b0, + ip4_header_t * ip0, + icmp46_header_t * icmp0, + u32 sw_if_index0, + u32 rx_fib_index0, + vlib_node_runtime_t * node, + u32 next0, + f64 now, + u32 thread_index, + snat_session_t ** p_s0) +{ + snat_session_key_t key0, sm0; + clib_bihash_kv_8_8_t kv0, value0; + snat_worker_key_t k0; + u32 new_dst_addr0 = 0, old_dst_addr0, si, ti = 0; + ip_csum_t sum0; + + next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, + next0, thread_index, p_s0, 0); + snat_session_t * s0 = *p_s0; + if (PREDICT_TRUE(next0 != SNAT_IN2OUT_NEXT_DROP && s0)) + { + /* Hairpinning */ + if (!icmp_is_error_message (icmp0)) + { + icmp_echo_header_t *echo0 = (icmp_echo_header_t *)(icmp0+1); + u16 icmp_id0 = echo0->identifier; + key0.addr = ip0->dst_address; + key0.port = icmp_id0; + key0.protocol = SNAT_PROTOCOL_ICMP; + key0.fib_index = sm->outside_fib_index; + kv0.key = key0.as_u64; + + /* Check if destination is in active sessions */ + if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0)) + { + /* or static mappings */ + if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0)) + { + new_dst_addr0 = sm0.addr.as_u32; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index; + } + } + else + { + si = value0.value; + if (sm->num_workers > 1) + { + k0.addr = ip0->dst_address; + k0.port = icmp_id0; + k0.fib_index = sm->outside_fib_index; + kv0.key = k0.as_u64; + if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0)) + ASSERT(0); + else + ti = value0.value; + } + else + ti = sm->num_workers; + + s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si); + new_dst_addr0 = s0->in2out.addr.as_u32; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index; + echo0->identifier = s0->in2out.port; + sum0 = icmp0->checksum; + sum0 = ip_csum_update (sum0, icmp_id0, s0->in2out.port, + icmp_echo_header_t, identifier); + icmp0->checksum = ip_csum_fold (sum0); + } + + /* Destination is behind the same NAT, use internal address and port */ + if (new_dst_addr0) + { + old_dst_addr0 = ip0->dst_address.as_u32; + ip0->dst_address.as_u32 = new_dst_addr0; + sum0 = ip0->checksum; + sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0, + ip4_header_t, dst_address); + ip0->checksum = ip_csum_fold (sum0); + } + } + + /* Accounting */ + s0->last_heard = now; + s0->total_pkts++; + s0->total_bytes += vlib_buffer_length_in_chain (sm->vlib_main, b0); + /* Per-user LRU list maintenance for dynamic translations */ + if (!snat_is_session_static (s0)) + { + clib_dlist_remove (sm->per_thread_data[thread_index].list_pool, + s0->per_user_index); + clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool, + s0->per_user_list_head_index, + s0->per_user_index); + } + } + return next0; +} + +static void +snat_in2out_unknown_proto (snat_main_t *sm, + vlib_buffer_t * b, + ip4_header_t * ip, + u32 rx_fib_index, + u32 thread_index, + f64 now, + vlib_main_t * vm) +{ + clib_bihash_kv_8_8_t kv, value; + clib_bihash_kv_16_8_t s_kv, s_value; + snat_static_mapping_t *m; + snat_session_key_t m_key; + u32 old_addr, new_addr = 0; + ip_csum_t sum; + snat_user_key_t u_key; + snat_user_t *u; + dlist_elt_t *head, *elt, *oldest; + snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; + u32 elt_index, head_index, ses_index, oldest_index; + snat_session_t * s; + snat_unk_proto_ses_key_t key; + u32 address_index = ~0; + int i; + u8 is_sm = 0; + + old_addr = ip->src_address.as_u32; + + key.l_addr = ip->src_address; + key.r_addr = ip->dst_address; + key.fib_index = rx_fib_index; + key.proto = ip->protocol; + key.rsvd[0] = key.rsvd[1] = key.rsvd[2] = 0; + s_kv.key[0] = key.as_u64[0]; + s_kv.key[1] = key.as_u64[1]; + + if (!clib_bihash_search_16_8 (&sm->in2out_unk_proto, &s_kv, &s_value)) + { + s = pool_elt_at_index (tsm->sessions, s_value.value); + new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32; + } + else + { + u_key.addr = ip->src_address; + u_key.fib_index = rx_fib_index; + kv.key = u_key.as_u64; + + /* Ever heard of the "user" = src ip4 address before? */ + if (clib_bihash_search_8_8 (&sm->user_hash, &kv, &value)) + { + /* no, make a new one */ + pool_get (tsm->users, u); + memset (u, 0, sizeof (*u)); + u->addr = ip->src_address; + u->fib_index = rx_fib_index; + + pool_get (tsm->list_pool, head); + u->sessions_per_user_list_head_index = head - tsm->list_pool; + + clib_dlist_init (tsm->list_pool, + u->sessions_per_user_list_head_index); + + kv.value = u - tsm->users; + + /* add user */ + clib_bihash_add_del_8_8 (&sm->user_hash, &kv, 1); + } + else + { + u = pool_elt_at_index (tsm->users, value.value); + } + + m_key.addr = ip->src_address; + m_key.port = 0; + m_key.protocol = 0; + m_key.fib_index = rx_fib_index; + kv.key = m_key.as_u64; + + /* Try to find static mapping first */ + if (!clib_bihash_search_8_8 (&sm->static_mapping_by_local, &kv, &value)) + { + m = pool_elt_at_index (sm->static_mappings, value.value); + new_addr = ip->src_address.as_u32 = m->external_addr.as_u32; + is_sm = 1; + goto create_ses; + } + /* Fallback to 3-tuple key */ + else + { + /* Choose same out address as for TCP/UDP session to same destination */ + if (!clib_bihash_search_8_8 (&sm->user_hash, &kv, &value)) + { + head_index = u->sessions_per_user_list_head_index; + head = pool_elt_at_index (tsm->list_pool, head_index); + elt_index = head->next; + elt = pool_elt_at_index (tsm->list_pool, elt_index); + ses_index = elt->value; + while (ses_index != ~0) + { + s = pool_elt_at_index (tsm->sessions, ses_index); + elt_index = elt->next; + elt = pool_elt_at_index (tsm->list_pool, elt_index); + ses_index = elt->value; + + if (s->ext_host_addr.as_u32 == ip->dst_address.as_u32) + { + new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32; + address_index = s->outside_address_index; + + key.fib_index = sm->outside_fib_index; + key.l_addr.as_u32 = new_addr; + s_kv.key[0] = key.as_u64[0]; + s_kv.key[1] = key.as_u64[1]; + if (clib_bihash_search_16_8 (&sm->out2in_unk_proto, &s_kv, &s_value)) + break; + + goto create_ses; + } + } + } + key.fib_index = sm->outside_fib_index; + for (i = 0; i < vec_len (sm->addresses); i++) + { + key.l_addr.as_u32 = sm->addresses[i].addr.as_u32; + s_kv.key[0] = key.as_u64[0]; + s_kv.key[1] = key.as_u64[1]; + if (clib_bihash_search_16_8 (&sm->out2in_unk_proto, &s_kv, &s_value)) + { + new_addr = ip->src_address.as_u32 = key.l_addr.as_u32; + address_index = i; + goto create_ses; + } + } + return; + } + +create_ses: + /* Over quota? Recycle the least recently used dynamic translation */ + if (u->nsessions >= sm->max_translations_per_user && !is_sm) + { + /* Remove the oldest dynamic translation */ + do { + oldest_index = clib_dlist_remove_head ( + tsm->list_pool, u->sessions_per_user_list_head_index); + + ASSERT (oldest_index != ~0); + + /* add it back to the end of the LRU list */ + clib_dlist_addtail (tsm->list_pool, + u->sessions_per_user_list_head_index, + oldest_index); + /* Get the list element */ + oldest = pool_elt_at_index (tsm->list_pool, oldest_index); + + /* Get the session index from the list element */ + ses_index = oldest->value; + + /* Get the session */ + s = pool_elt_at_index (tsm->sessions, ses_index); + } while (snat_is_session_static (s)); + + if (snat_is_unk_proto_session (s)) { - tcp0->dst = new_dst_port0; - sum0 = tcp0->checksum; - sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0, - ip4_header_t, dst_address); - sum0 = ip_csum_update (sum0, old_dst_port0, new_dst_port0, - ip4_header_t /* cheat */, length); - tcp0->checksum = ip_csum_fold(sum0); + /* Remove from lookup tables */ + key.l_addr = s->in2out.addr; + key.r_addr = s->ext_host_addr; + key.fib_index = s->in2out.fib_index; + key.proto = s->in2out.port; + s_kv.key[0] = key.as_u64[0]; + s_kv.key[1] = key.as_u64[1]; + if (clib_bihash_add_del_16_8 (&sm->in2out_unk_proto, &s_kv, 0)) + clib_warning ("in2out key del failed"); + + key.l_addr = s->out2in.addr; + key.fib_index = s->out2in.fib_index; + s_kv.key[0] = key.as_u64[0]; + s_kv.key[1] = key.as_u64[1]; + if (clib_bihash_add_del_16_8 (&sm->out2in_unk_proto, &s_kv, 0)) + clib_warning ("out2in key del failed"); } else { - udp0->dst_port = new_dst_port0; - udp0->checksum = 0; + /* log NAT event */ + snat_ipfix_logging_nat44_ses_delete(s->in2out.addr.as_u32, + s->out2in.addr.as_u32, + s->in2out.protocol, + s->in2out.port, + s->out2in.port, + s->in2out.fib_index); + + snat_free_outside_address_and_port (sm, &s->out2in, + s->outside_address_index); + + /* Remove in2out, out2in keys */ + kv.key = s->in2out.as_u64; + if (clib_bihash_add_del_8_8 (&sm->in2out, &kv, 0)) + clib_warning ("in2out key del failed"); + kv.key = s->out2in.as_u64; + if (clib_bihash_add_del_8_8 (&sm->out2in, &kv, 0)) + clib_warning ("out2in key del failed"); } } + else + { + /* Create a new session */ + pool_get (tsm->sessions, s); + memset (s, 0, sizeof (*s)); + + /* Create list elts */ + pool_get (tsm->list_pool, elt); + clib_dlist_init (tsm->list_pool, elt - tsm->list_pool); + elt->value = s - tsm->sessions; + s->per_user_index = elt - tsm->list_pool; + s->per_user_list_head_index = u->sessions_per_user_list_head_index; + clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index, + s->per_user_index); + } + + s->ext_host_addr.as_u32 = ip->dst_address.as_u32; + s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO; + s->outside_address_index = address_index; + s->out2in.addr.as_u32 = new_addr; + s->out2in.fib_index = sm->outside_fib_index; + s->in2out.addr.as_u32 = old_addr; + s->in2out.fib_index = rx_fib_index; + s->in2out.port = s->out2in.port = ip->protocol; + if (is_sm) + { + u->nstaticsessions++; + s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING; + } + else + { + u->nsessions++; + } + + /* Add to lookup tables */ + key.l_addr.as_u32 = old_addr; + key.r_addr = ip->dst_address; + key.proto = ip->protocol; + key.fib_index = rx_fib_index; + s_kv.key[0] = key.as_u64[0]; + s_kv.key[1] = key.as_u64[1]; + s_kv.value = s - tsm->sessions; + if (clib_bihash_add_del_16_8 (&sm->in2out_unk_proto, &s_kv, 1)) + clib_warning ("in2out key add failed"); + + key.l_addr.as_u32 = new_addr; + key.fib_index = sm->outside_fib_index; + s_kv.key[0] = key.as_u64[0]; + s_kv.key[1] = key.as_u64[1]; + if (clib_bihash_add_del_16_8 (&sm->out2in_unk_proto, &s_kv, 1)) + clib_warning ("out2in key add failed"); + } + + /* Update IP checksum */ + sum = ip->checksum; + sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address); + ip->checksum = ip_csum_fold (sum); + + /* Accounting */ + s->last_heard = now; + s->total_pkts++; + s->total_bytes += vlib_buffer_length_in_chain (vm, b); + /* Per-user LRU list maintenance */ + clib_dlist_remove (tsm->list_pool, s->per_user_index); + clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index, + s->per_user_index); + + /* Hairpinning */ + old_addr = ip->dst_address.as_u32; + key.l_addr.as_u32 = ip->dst_address.as_u32; + key.r_addr.as_u32 = new_addr; + key.fib_index = sm->outside_fib_index; + s_kv.key[0] = key.as_u64[0]; + s_kv.key[1] = key.as_u64[1]; + if (clib_bihash_search_16_8 (&sm->out2in_unk_proto, &s_kv, &s_value)) + { + m_key.addr = ip->dst_address; + m_key.fib_index = sm->outside_fib_index; + kv.key = m_key.as_u64; + if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value)) + { + vnet_buffer(b)->sw_if_index[VLIB_TX] = sm->outside_fib_index; + return; + } + + m = pool_elt_at_index (sm->static_mappings, value.value); + vnet_buffer(b)->sw_if_index[VLIB_TX] = m->fib_index; + new_addr = ip->dst_address.as_u32 = m->local_addr.as_u32; + } + else + { + s = pool_elt_at_index (tsm->sessions, s_value.value); + vnet_buffer(b)->sw_if_index[VLIB_TX] = s->in2out.fib_index; + new_addr = ip->dst_address.as_u32 = s->in2out.addr.as_u32; } + sum = ip->checksum; + sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address); + ip->checksum = ip_csum_fold (sum); } static inline uword @@ -725,10 +1304,9 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, snat_in2out_next_t next_index; u32 pkts_processed = 0; snat_main_t * sm = &snat_main; - snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data; f64 now = vlib_time_now (vm); u32 stats_node_index; - u32 cpu_index = os_get_cpu_number (); + u32 thread_index = vlib_get_thread_index (); stats_node_index = is_slow_path ? snat_in2out_slowpath_node.index : snat_in2out_node.index; @@ -799,8 +1377,6 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, next0 = next1 = SNAT_IN2OUT_NEXT_LOOKUP; - proto0 = ip_proto_to_snat_proto (ip0->protocol); - if (PREDICT_FALSE(ip0->ttl == 1)) { vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; @@ -811,17 +1387,23 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, goto trace00; } + proto0 = ip_proto_to_snat_proto (ip0->protocol); + /* Next configured feature, probably ip4-lookup */ if (is_slow_path) { if (PREDICT_FALSE (proto0 == ~0)) - goto trace00; - + { + snat_in2out_unknown_proto (sm, b0, ip0, rx_fib_index0, + thread_index, now, vm); + goto trace00; + } + if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP)) { next0 = icmp_in2out_slow_path (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, - node, next0, now, cpu_index, &s0); + node, next0, now, thread_index, &s0); goto trace00; } } @@ -845,12 +1427,12 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, { if (is_slow_path) { - if (PREDICT_FALSE(snat_not_translate(sm, rt, sw_if_index0, ip0, + if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0, ip0, proto0, rx_fib_index0))) goto trace00; next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0, - &s0, node, next0, cpu_index); + &s0, node, next0, thread_index); if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP)) goto trace00; } @@ -861,7 +1443,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, } } else - s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions, + s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions, value0.value); old_addr0 = ip0->src_address.as_u32; @@ -907,9 +1489,9 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, /* Per-user LRU list maintenance for dynamic translation */ if (!snat_is_session_static (s0)) { - clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool, + clib_dlist_remove (sm->per_thread_data[thread_index].list_pool, s0->per_user_index); - clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool, + clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool, s0->per_user_list_head_index, s0->per_user_index); } @@ -925,7 +1507,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, t->next_index = next0; t->session_index = ~0; if (s0) - t->session_index = s0 - sm->per_thread_data[cpu_index].sessions; + t->session_index = s0 - sm->per_thread_data[thread_index].sessions; } pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP; @@ -939,29 +1521,33 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, sw_if_index1); - proto1 = ip_proto_to_snat_proto (ip1->protocol); - - if (PREDICT_FALSE(ip0->ttl == 1)) + if (PREDICT_FALSE(ip1->ttl == 1)) { vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0; icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded, ICMP4_time_exceeded_ttl_exceeded_in_transit, 0); - next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR; + next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR; goto trace01; } + proto1 = ip_proto_to_snat_proto (ip1->protocol); + /* Next configured feature, probably ip4-lookup */ if (is_slow_path) { if (PREDICT_FALSE (proto1 == ~0)) - goto trace01; - + { + snat_in2out_unknown_proto (sm, b1, ip1, rx_fib_index1, + thread_index, now, vm); + goto trace01; + } + if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP)) { next1 = icmp_in2out_slow_path (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node, - next1, now, cpu_index, &s1); + next1, now, thread_index, &s1); goto trace01; } } @@ -985,12 +1571,12 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, { if (is_slow_path) { - if (PREDICT_FALSE(snat_not_translate(sm, rt, sw_if_index1, ip1, + if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index1, ip1, proto1, rx_fib_index1))) goto trace01; next1 = slow_path (sm, b1, ip1, rx_fib_index1, &key1, - &s1, node, next1, cpu_index); + &s1, node, next1, thread_index); if (PREDICT_FALSE (next1 == SNAT_IN2OUT_NEXT_DROP)) goto trace01; } @@ -1001,7 +1587,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, } } else - s1 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions, + s1 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions, value1.value); old_addr1 = ip1->src_address.as_u32; @@ -1047,9 +1633,9 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, /* Per-user LRU list maintenance for dynamic translation */ if (!snat_is_session_static (s1)) { - clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool, + clib_dlist_remove (sm->per_thread_data[thread_index].list_pool, s1->per_user_index); - clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool, + clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool, s1->per_user_list_head_index, s1->per_user_index); } @@ -1064,7 +1650,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, t->next_index = next1; t->session_index = ~0; if (s1) - t->session_index = s1 - sm->per_thread_data[cpu_index].sessions; + t->session_index = s1 - sm->per_thread_data[thread_index].sessions; } pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP; @@ -1114,8 +1700,6 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, sw_if_index0); - proto0 = ip_proto_to_snat_proto (ip0->protocol); - if (PREDICT_FALSE(ip0->ttl == 1)) { vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; @@ -1126,17 +1710,23 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, goto trace0; } + proto0 = ip_proto_to_snat_proto (ip0->protocol); + /* Next configured feature, probably ip4-lookup */ if (is_slow_path) { if (PREDICT_FALSE (proto0 == ~0)) - goto trace0; - + { + snat_in2out_unknown_proto (sm, b0, ip0, rx_fib_index0, + thread_index, now, vm); + goto trace0; + } + if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP)) { next0 = icmp_in2out_slow_path (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, - next0, now, cpu_index, &s0); + next0, now, thread_index, &s0); goto trace0; } } @@ -1160,12 +1750,12 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, { if (is_slow_path) { - if (PREDICT_FALSE(snat_not_translate(sm, rt, sw_if_index0, ip0, + if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0, ip0, proto0, rx_fib_index0))) goto trace0; next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0, - &s0, node, next0, cpu_index); + &s0, node, next0, thread_index); if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP)) goto trace0; @@ -1177,7 +1767,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, } } else - s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions, + s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions, value0.value); old_addr0 = ip0->src_address.as_u32; @@ -1223,9 +1813,9 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, /* Per-user LRU list maintenance for dynamic translation */ if (!snat_is_session_static (s0)) { - clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool, + clib_dlist_remove (sm->per_thread_data[thread_index].list_pool, s0->per_user_index); - clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool, + clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool, s0->per_user_list_head_index, s0->per_user_index); } @@ -1241,7 +1831,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, t->next_index = next0; t->session_index = ~0; if (s0) - t->session_index = s0 - sm->per_thread_data[cpu_index].sessions; + t->session_index = s0 - sm->per_thread_data[thread_index].sessions; } pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP; @@ -1340,6 +1930,7 @@ snat_det_in2out_node_fn (vlib_main_t * vm, u32 pkts_processed = 0; snat_main_t * sm = &snat_main; u32 now = (u32) vlib_time_now (vm); + u32 thread_index = vlib_get_thread_index (); from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; @@ -1361,14 +1952,16 @@ snat_det_in2out_node_fn (vlib_main_t * vm, ip4_header_t * ip0, * ip1; ip_csum_t sum0, sum1; ip4_address_t new_addr0, old_addr0, new_addr1, old_addr1; - u16 old_port0, new_port0, lo_port0, i; - u16 old_port1, new_port1, lo_port1; + u16 old_port0, new_port0, lo_port0, i0; + u16 old_port1, new_port1, lo_port1, i1; udp_header_t * udp0, * udp1; tcp_header_t * tcp0, * tcp1; u32 proto0, proto1; snat_det_out_key_t key0, key1; snat_det_map_t * dm0, * dm1; snat_det_session_t * ses0 = 0, * ses1 = 0; + u32 rx_fib_index0, rx_fib_index1; + icmp46_header_t * icmp0, * icmp1; /* Prefetch next iteration. */ { @@ -1404,25 +1997,51 @@ snat_det_in2out_node_fn (vlib_main_t * vm, sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + if (PREDICT_FALSE(ip0->ttl == 1)) + { + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded, + ICMP4_time_exceeded_ttl_exceeded_in_transit, + 0); + next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR; + goto trace0; + } + + proto0 = ip_proto_to_snat_proto (ip0->protocol); + + if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP)) + { + rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0); + icmp0 = (icmp46_header_t *) udp0; + + next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0, + rx_fib_index0, node, next0, thread_index, + &ses0, &dm0); + goto trace0; + } + dm0 = snat_det_map_by_user(sm, &ip0->src_address); if (PREDICT_FALSE(!dm0)) { clib_warning("no match for internal host %U", format_ip4_address, &ip0->src_address); + next0 = SNAT_IN2OUT_NEXT_DROP; + b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION]; goto trace0; } snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0); - ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src); + key0.ext_host_addr = ip0->dst_address; + key0.ext_host_port = tcp0->dst; + + ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src, key0); if (PREDICT_FALSE(!ses0)) { - key0.ext_host_addr = ip0->dst_address; - key0.ext_host_port = tcp0->dst; - for (i = 0; i < dm0->ports_per_host; i++) + for (i0 = 0; i0 < dm0->ports_per_host; i0++) { - key0.out_port = clib_host_to_net_u16 (lo_port0 + i + - (clib_net_to_host_u16 (tcp0->src) % dm0->ports_per_host)); + key0.out_port = clib_host_to_net_u16 (lo_port0 + + ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host)); if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64)) continue; @@ -1430,15 +2049,20 @@ snat_det_in2out_node_fn (vlib_main_t * vm, ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0); break; } - if (PREDICT_FALSE(!ses0)) - { - next0 = SNAT_IN2OUT_NEXT_DROP; - goto trace0; - } + if (PREDICT_FALSE(!ses0)) + { + /* too many sessions for user, send ICMP error packet */ + + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + icmp4_error_set_vnet_buffer (b0, ICMP4_destination_unreachable, + ICMP4_destination_unreachable_destination_unreachable_host, + 0); + next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR; + goto trace0; + } } new_port0 = ses0->out.out_port; - proto0 = ip_proto_to_snat_proto (ip0->protocol); old_addr0.as_u32 = ip0->src_address.as_u32; ip0->src_address.as_u32 = new_addr0.as_u32; @@ -1488,16 +2112,16 @@ snat_det_in2out_node_fn (vlib_main_t * vm, switch(ses0->state) { case SNAT_SESSION_UDP_ACTIVE: - ses0->expire = now + SNAT_UDP_TIMEOUT; + ses0->expire = now + sm->udp_timeout; break; case SNAT_SESSION_TCP_SYN_SENT: case SNAT_SESSION_TCP_FIN_WAIT: case SNAT_SESSION_TCP_CLOSE_WAIT: case SNAT_SESSION_TCP_LAST_ACK: - ses0->expire = now + SNAT_TCP_TRANSITORY_TIMEOUT; + ses0->expire = now + sm->tcp_transitory_timeout; break; case SNAT_SESSION_TCP_ESTABLISHED: - ses0->expire = now + SNAT_TCP_ESTABLISHED_TIMEOUT; + ses0->expire = now + sm->tcp_established_timeout; break; } @@ -1523,26 +2147,51 @@ snat_det_in2out_node_fn (vlib_main_t * vm, sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX]; + if (PREDICT_FALSE(ip1->ttl == 1)) + { + vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0; + icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded, + ICMP4_time_exceeded_ttl_exceeded_in_transit, + 0); + next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR; + goto trace1; + } + + proto1 = ip_proto_to_snat_proto (ip1->protocol); + + if (PREDICT_FALSE(proto1 == SNAT_PROTOCOL_ICMP)) + { + rx_fib_index1 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index1); + icmp1 = (icmp46_header_t *) udp1; + + next1 = icmp_in2out(sm, b1, ip1, icmp1, sw_if_index1, + rx_fib_index1, node, next1, thread_index, + &ses1, &dm1); + goto trace1; + } + dm1 = snat_det_map_by_user(sm, &ip1->src_address); if (PREDICT_FALSE(!dm1)) { clib_warning("no match for internal host %U", format_ip4_address, &ip0->src_address); + next1 = SNAT_IN2OUT_NEXT_DROP; + b1->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION]; goto trace1; } snat_det_forward(dm1, &ip1->src_address, &new_addr1, &lo_port1); + key1.ext_host_addr = ip1->dst_address; + key1.ext_host_port = tcp1->dst; - ses1 = snat_det_find_ses_by_in(dm1, &ip1->src_address, tcp1->src); + ses1 = snat_det_find_ses_by_in(dm1, &ip1->src_address, tcp1->src, key1); if (PREDICT_FALSE(!ses1)) { - key1.ext_host_addr = ip1->dst_address; - key1.ext_host_port = tcp1->dst; - for (i = 0; i < dm1->ports_per_host; i++) + for (i1 = 0; i1 < dm1->ports_per_host; i1++) { - key1.out_port = clib_host_to_net_u16 (lo_port1 + i + - (clib_net_to_host_u16 (tcp1->src) % dm1->ports_per_host)); + key1.out_port = clib_host_to_net_u16 (lo_port1 + + ((i1 + clib_net_to_host_u16 (tcp1->src)) % dm1->ports_per_host)); if (snat_det_get_ses_by_out (dm1, &ip1->src_address, key1.as_u64)) continue; @@ -1550,15 +2199,20 @@ snat_det_in2out_node_fn (vlib_main_t * vm, ses1 = snat_det_ses_create(dm1, &ip1->src_address, tcp1->src, &key1); break; } - if (PREDICT_FALSE(!ses1)) - { - next1 = SNAT_IN2OUT_NEXT_DROP; - goto trace1; - } + if (PREDICT_FALSE(!ses1)) + { + /* too many sessions for user, send ICMP error packet */ + + vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0; + icmp4_error_set_vnet_buffer (b1, ICMP4_destination_unreachable, + ICMP4_destination_unreachable_destination_unreachable_host, + 0); + next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR; + goto trace1; + } } new_port1 = ses1->out.out_port; - proto1 = ip_proto_to_snat_proto (ip1->protocol); old_addr1.as_u32 = ip1->src_address.as_u32; ip1->src_address.as_u32 = new_addr1.as_u32; @@ -1608,16 +2262,16 @@ snat_det_in2out_node_fn (vlib_main_t * vm, switch(ses1->state) { case SNAT_SESSION_UDP_ACTIVE: - ses1->expire = now + SNAT_UDP_TIMEOUT; + ses1->expire = now + sm->udp_timeout; break; case SNAT_SESSION_TCP_SYN_SENT: case SNAT_SESSION_TCP_FIN_WAIT: case SNAT_SESSION_TCP_CLOSE_WAIT: case SNAT_SESSION_TCP_LAST_ACK: - ses1->expire = now + SNAT_TCP_TRANSITORY_TIMEOUT; + ses1->expire = now + sm->tcp_transitory_timeout; break; case SNAT_SESSION_TCP_ESTABLISHED: - ses1->expire = now + SNAT_TCP_ESTABLISHED_TIMEOUT; + ses1->expire = now + sm->tcp_established_timeout; break; } @@ -1652,13 +2306,15 @@ snat_det_in2out_node_fn (vlib_main_t * vm, ip4_header_t * ip0; ip_csum_t sum0; ip4_address_t new_addr0, old_addr0; - u16 old_port0, new_port0, lo_port0, i; + u16 old_port0, new_port0, lo_port0, i0; udp_header_t * udp0; tcp_header_t * tcp0; u32 proto0; snat_det_out_key_t key0; snat_det_map_t * dm0; snat_det_session_t * ses0 = 0; + u32 rx_fib_index0; + icmp46_header_t * icmp0; /* speculatively enqueue b0 to the current next frame */ bi0 = from[0]; @@ -1677,25 +2333,51 @@ snat_det_in2out_node_fn (vlib_main_t * vm, sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + if (PREDICT_FALSE(ip0->ttl == 1)) + { + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded, + ICMP4_time_exceeded_ttl_exceeded_in_transit, + 0); + next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR; + goto trace00; + } + + proto0 = ip_proto_to_snat_proto (ip0->protocol); + + if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP)) + { + rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0); + icmp0 = (icmp46_header_t *) udp0; + + next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0, + rx_fib_index0, node, next0, thread_index, + &ses0, &dm0); + goto trace00; + } + dm0 = snat_det_map_by_user(sm, &ip0->src_address); if (PREDICT_FALSE(!dm0)) { clib_warning("no match for internal host %U", format_ip4_address, &ip0->src_address); + next0 = SNAT_IN2OUT_NEXT_DROP; + b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION]; goto trace00; } snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0); - ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src); + key0.ext_host_addr = ip0->dst_address; + key0.ext_host_port = tcp0->dst; + + ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src, key0); if (PREDICT_FALSE(!ses0)) { - key0.ext_host_addr = ip0->dst_address; - key0.ext_host_port = tcp0->dst; - for (i = 0; i < dm0->ports_per_host; i++) + for (i0 = 0; i0 < dm0->ports_per_host; i0++) { - key0.out_port = clib_host_to_net_u16 (lo_port0 + i + - (clib_net_to_host_u16 (tcp0->src) % dm0->ports_per_host)); + key0.out_port = clib_host_to_net_u16 (lo_port0 + + ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host)); if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64)) continue; @@ -1703,15 +2385,20 @@ snat_det_in2out_node_fn (vlib_main_t * vm, ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0); break; } - if (PREDICT_FALSE(!ses0)) - { - next0 = SNAT_IN2OUT_NEXT_DROP; - goto trace00; - } + if (PREDICT_FALSE(!ses0)) + { + /* too many sessions for user, send ICMP error packet */ + + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + icmp4_error_set_vnet_buffer (b0, ICMP4_destination_unreachable, + ICMP4_destination_unreachable_destination_unreachable_host, + 0); + next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR; + goto trace00; + } } new_port0 = ses0->out.out_port; - proto0 = ip_proto_to_snat_proto (ip0->protocol); old_addr0.as_u32 = ip0->src_address.as_u32; ip0->src_address.as_u32 = new_addr0.as_u32; @@ -1761,16 +2448,16 @@ snat_det_in2out_node_fn (vlib_main_t * vm, switch(ses0->state) { case SNAT_SESSION_UDP_ACTIVE: - ses0->expire = now + SNAT_UDP_TIMEOUT; + ses0->expire = now + sm->udp_timeout; break; case SNAT_SESSION_TCP_SYN_SENT: case SNAT_SESSION_TCP_FIN_WAIT: case SNAT_SESSION_TCP_CLOSE_WAIT: case SNAT_SESSION_TCP_LAST_ACK: - ses0->expire = now + SNAT_TCP_TRANSITORY_TIMEOUT; + ses0->expire = now + sm->tcp_transitory_timeout; break; case SNAT_SESSION_TCP_ESTABLISHED: - ses0->expire = now + SNAT_TCP_ESTABLISHED_TIMEOUT; + ses0->expire = now + sm->tcp_established_timeout; break; } @@ -1817,17 +2504,176 @@ VLIB_REGISTER_NODE (snat_det_in2out_node) = { .runtime_data_bytes = sizeof (snat_runtime_t), - .n_next_nodes = 2, + .n_next_nodes = 3, /* edit / add dispositions here */ .next_nodes = { [SNAT_IN2OUT_NEXT_DROP] = "error-drop", [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup", + [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error", }, }; VLIB_NODE_FUNCTION_MULTIARCH (snat_det_in2out_node, snat_det_in2out_node_fn); +/** + * Get address and port values to be used for packet SNAT translation + * and create session if needed + * + * @param[in,out] sm SNAT main + * @param[in,out] node SNAT node runtime + * @param[in] thread_index thread index + * @param[in,out] b0 buffer containing packet to be translated + * @param[out] p_proto protocol used for matching + * @param[out] p_value address and port after NAT translation + * @param[out] p_dont_translate if packet should not be translated + * @param d optional parameter + * @param e optional parameter + */ +u32 icmp_match_in2out_det(snat_main_t *sm, vlib_node_runtime_t *node, + u32 thread_index, vlib_buffer_t *b0, u8 *p_proto, + snat_session_key_t *p_value, + u8 *p_dont_translate, void *d, void *e) +{ + ip4_header_t *ip0; + icmp46_header_t *icmp0; + u32 sw_if_index0; + u32 rx_fib_index0; + u8 protocol; + snat_det_out_key_t key0; + u8 dont_translate = 0; + u32 next0 = ~0; + icmp_echo_header_t *echo0, *inner_echo0 = 0; + ip4_header_t *inner_ip0; + void *l4_header = 0; + icmp46_header_t *inner_icmp0; + snat_det_map_t * dm0 = 0; + ip4_address_t new_addr0; + u16 lo_port0, i0; + snat_det_session_t * ses0 = 0; + ip4_address_t in_addr; + u16 in_port; + + ip0 = vlib_buffer_get_current (b0); + icmp0 = (icmp46_header_t *) ip4_next_header (ip0); + echo0 = (icmp_echo_header_t *)(icmp0+1); + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0); + + if (!icmp_is_error_message (icmp0)) + { + protocol = SNAT_PROTOCOL_ICMP; + in_addr = ip0->src_address; + in_port = echo0->identifier; + } + else + { + inner_ip0 = (ip4_header_t *)(echo0+1); + l4_header = ip4_next_header (inner_ip0); + protocol = ip_proto_to_snat_proto (inner_ip0->protocol); + in_addr = inner_ip0->dst_address; + switch (protocol) + { + case SNAT_PROTOCOL_ICMP: + inner_icmp0 = (icmp46_header_t*)l4_header; + inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1); + in_port = inner_echo0->identifier; + break; + case SNAT_PROTOCOL_UDP: + case SNAT_PROTOCOL_TCP: + in_port = ((tcp_udp_header_t*)l4_header)->dst_port; + break; + default: + b0->error = node->errors[SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL]; + next0 = SNAT_IN2OUT_NEXT_DROP; + goto out; + } + } + + dm0 = snat_det_map_by_user(sm, &in_addr); + if (PREDICT_FALSE(!dm0)) + { + clib_warning("no match for internal host %U", + format_ip4_address, &in_addr); + if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0, + IP_PROTOCOL_ICMP, rx_fib_index0))) + { + dont_translate = 1; + goto out; + } + next0 = SNAT_IN2OUT_NEXT_DROP; + b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION]; + goto out; + } + + snat_det_forward(dm0, &in_addr, &new_addr0, &lo_port0); + + key0.ext_host_addr = ip0->dst_address; + key0.ext_host_port = 0; + + ses0 = snat_det_find_ses_by_in(dm0, &in_addr, in_port, key0); + if (PREDICT_FALSE(!ses0)) + { + if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0, + IP_PROTOCOL_ICMP, rx_fib_index0))) + { + dont_translate = 1; + goto out; + } + if (icmp0->type != ICMP4_echo_request) + { + b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE]; + next0 = SNAT_IN2OUT_NEXT_DROP; + goto out; + } + for (i0 = 0; i0 < dm0->ports_per_host; i0++) + { + key0.out_port = clib_host_to_net_u16 (lo_port0 + + ((i0 + clib_net_to_host_u16 (echo0->identifier)) % dm0->ports_per_host)); + + if (snat_det_get_ses_by_out (dm0, &in_addr, key0.as_u64)) + continue; + + ses0 = snat_det_ses_create(dm0, &in_addr, echo0->identifier, &key0); + break; + } + if (PREDICT_FALSE(!ses0)) + { + next0 = SNAT_IN2OUT_NEXT_DROP; + b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS]; + goto out; + } + } + + if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request && + !icmp_is_error_message (icmp0))) + { + b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE]; + next0 = SNAT_IN2OUT_NEXT_DROP; + goto out; + } + + u32 now = (u32) vlib_time_now (sm->vlib_main); + + ses0->state = SNAT_SESSION_ICMP_ACTIVE; + ses0->expire = now + sm->icmp_timeout; + +out: + *p_proto = protocol; + if (ses0) + { + p_value->addr = new_addr0; + p_value->fib_index = sm->outside_fib_index; + p_value->port = ses0->out.out_port; + } + *p_dont_translate = dont_translate; + if (d) + *(snat_det_session_t**)d = ses0; + if (e) + *(snat_det_map_t**)e = dm0; + return next0; +} + /**********************/ /*** worker handoff ***/ /**********************/ @@ -1848,7 +2694,7 @@ snat_in2out_worker_handoff_fn (vlib_main_t * vm, u32 n_left_to_next_worker = 0, *to_next_worker = 0; u32 next_worker_index = 0; u32 current_worker_index = ~0; - u32 cpu_index = os_get_cpu_number (); + u32 thread_index = vlib_get_thread_index (); ASSERT (vec_len (sm->workers)); @@ -1886,7 +2732,7 @@ snat_in2out_worker_handoff_fn (vlib_main_t * vm, next_worker_index = sm->worker_in2out_cb(ip0, rx_fib_index0); - if (PREDICT_FALSE (next_worker_index != cpu_index)) + if (PREDICT_FALSE (next_worker_index != thread_index)) { do_handoff = 1; @@ -1991,67 +2837,6 @@ VLIB_REGISTER_NODE (snat_in2out_worker_handoff_node) = { VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_worker_handoff_node, snat_in2out_worker_handoff_fn); -/********************************/ -/*** static mapping only mode ***/ -/********************************/ -static inline u32 icmp_in2out_static_map (snat_main_t *sm, - vlib_buffer_t * b0, - ip4_header_t * ip0, - icmp46_header_t * icmp0, - u32 sw_if_index0, - vlib_node_runtime_t * node, - u32 next0, - u32 rx_fib_index0) -{ - snat_session_key_t key0, sm0; - icmp_echo_header_t *echo0; - u32 new_addr0, old_addr0; - u16 old_id0, new_id0; - ip_csum_t sum0; - snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data; - - echo0 = (icmp_echo_header_t *)(icmp0+1); - - key0.addr = ip0->src_address; - key0.port = echo0->identifier; - key0.fib_index = rx_fib_index0; - - if (snat_static_mapping_match(sm, key0, &sm0, 0)) - { - if (PREDICT_FALSE(snat_not_translate(sm, rt, sw_if_index0, ip0, - IP_PROTOCOL_ICMP, rx_fib_index0))) - return next0; - - b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION]; - return SNAT_IN2OUT_NEXT_DROP; - } - - new_addr0 = sm0.addr.as_u32; - new_id0 = sm0.port; - vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index; - old_addr0 = ip0->src_address.as_u32; - ip0->src_address.as_u32 = new_addr0; - - sum0 = ip0->checksum; - sum0 = ip_csum_update (sum0, old_addr0, new_addr0, - ip4_header_t, - src_address /* changed member */); - ip0->checksum = ip_csum_fold (sum0); - - if (PREDICT_FALSE(new_id0 != echo0->identifier)) - { - old_id0 = echo0->identifier; - echo0->identifier = new_id0; - - sum0 = icmp0->checksum; - sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t, - identifier); - icmp0->checksum = ip_csum_fold (sum0); - } - - return next0; -} - static uword snat_in2out_fast_static_map_fn (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -2061,7 +2846,6 @@ snat_in2out_fast_static_map_fn (vlib_main_t * vm, snat_in2out_next_t next_index; u32 pkts_processed = 0; snat_main_t * sm = &snat_main; - snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data; u32 stats_node_index; stats_node_index = snat_in2out_fast_node.index; @@ -2113,6 +2897,16 @@ snat_in2out_fast_static_map_fn (vlib_main_t * vm, sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0); + if (PREDICT_FALSE(ip0->ttl == 1)) + { + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded, + ICMP4_time_exceeded_ttl_exceeded_in_transit, + 0); + next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR; + goto trace0; + } + proto0 = ip_proto_to_snat_proto (ip0->protocol); if (PREDICT_FALSE (proto0 == ~0)) @@ -2120,12 +2914,8 @@ snat_in2out_fast_static_map_fn (vlib_main_t * vm, if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP)) { - if (PREDICT_FALSE(snat_not_translate(sm, rt, sw_if_index0, ip0, - proto0, rx_fib_index0))) - goto trace0; - - next0 = icmp_in2out_static_map - (sm, b0, ip0, icmp0, sw_if_index0, node, next0, rx_fib_index0); + next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0, + rx_fib_index0, node, next0, ~0, 0, 0); goto trace0; } @@ -2133,7 +2923,7 @@ snat_in2out_fast_static_map_fn (vlib_main_t * vm, key0.port = udp0->src_port; key0.fib_index = rx_fib_index0; - if (snat_static_mapping_match(sm, key0, &sm0, 0)) + if (snat_static_mapping_match(sm, key0, &sm0, 0, 0)) { b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION]; next0= SNAT_IN2OUT_NEXT_DROP;