X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fplugins%2Fsnat%2Fin2out.c;h=e5ee965f7aea11c3d5643a57bfa45a1066a212ef;hb=586afd762bfa149f5ca167bd5fd5a0cd59ce94fe;hp=e30c913c02dc7de197fea18b1cee9c05f99840e5;hpb=b33f413af46ec8dff7f222dbd5bc3bcec1502d3d;p=vpp.git diff --git a/src/plugins/snat/in2out.c b/src/plugins/snat/in2out.c index e30c913c02d..e5ee965f7ae 100644 --- a/src/plugins/snat/in2out.c +++ b/src/plugins/snat/in2out.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -86,6 +87,7 @@ vlib_node_registration_t snat_in2out_node; vlib_node_registration_t snat_in2out_slowpath_node; vlib_node_registration_t snat_in2out_fast_node; vlib_node_registration_t snat_in2out_worker_handoff_node; +vlib_node_registration_t snat_det_in2out_node; #define foreach_snat_in2out_error \ _(UNSUPPORTED_PROTOCOL, "Unsupported protocol") \ @@ -210,7 +212,7 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0, snat_session_t ** sessionp, vlib_node_runtime_t * node, u32 next0, - u32 cpu_index) + u32 thread_index) { snat_user_t *u; snat_user_key_t user_key; @@ -244,26 +246,27 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0, if (clib_bihash_search_8_8 (&sm->user_hash, &kv0, &value0)) { /* no, make a new one */ - pool_get (sm->per_thread_data[cpu_index].users, u); + pool_get (sm->per_thread_data[thread_index].users, u); memset (u, 0, sizeof (*u)); u->addr = ip0->src_address; + u->fib_index = rx_fib_index0; - pool_get (sm->per_thread_data[cpu_index].list_pool, per_user_list_head_elt); + pool_get (sm->per_thread_data[thread_index].list_pool, per_user_list_head_elt); u->sessions_per_user_list_head_index = per_user_list_head_elt - - sm->per_thread_data[cpu_index].list_pool; + sm->per_thread_data[thread_index].list_pool; - clib_dlist_init (sm->per_thread_data[cpu_index].list_pool, + clib_dlist_init (sm->per_thread_data[thread_index].list_pool, u->sessions_per_user_list_head_index); - kv0.value = u - sm->per_thread_data[cpu_index].users; + kv0.value = u - sm->per_thread_data[thread_index].users; /* add user */ clib_bihash_add_del_8_8 (&sm->user_hash, &kv0, 1 /* is_add */); } else { - u = pool_elt_at_index (sm->per_thread_data[cpu_index].users, + u = pool_elt_at_index (sm->per_thread_data[thread_index].users, value0.value); } @@ -273,25 +276,25 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0, /* Remove the oldest dynamic translation */ do { oldest_per_user_translation_list_index = - clib_dlist_remove_head (sm->per_thread_data[cpu_index].list_pool, + clib_dlist_remove_head (sm->per_thread_data[thread_index].list_pool, u->sessions_per_user_list_head_index); ASSERT (oldest_per_user_translation_list_index != ~0); /* add it back to the end of the LRU list */ - clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool, + clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool, u->sessions_per_user_list_head_index, oldest_per_user_translation_list_index); /* Get the list element */ oldest_per_user_translation_list_elt = - pool_elt_at_index (sm->per_thread_data[cpu_index].list_pool, + pool_elt_at_index (sm->per_thread_data[thread_index].list_pool, oldest_per_user_translation_list_index); /* Get the session index from the list element */ session_index = oldest_per_user_translation_list_elt->value; /* Get the session */ - s = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions, + s = pool_elt_at_index (sm->per_thread_data[thread_index].sessions, session_index); } while (snat_is_session_static (s)); @@ -315,7 +318,8 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0, (sm, &s->out2in, s->outside_address_index); s->outside_address_index = ~0; - if (snat_alloc_outside_address_and_port (sm, &key1, &address_index)) + if (snat_alloc_outside_address_and_port (sm, rx_fib_index0, &key1, + &address_index)) { ASSERT(0); @@ -333,7 +337,8 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0, { static_mapping = 0; /* Try to create dynamic translation */ - if (snat_alloc_outside_address_and_port (sm, &key1, &address_index)) + if (snat_alloc_outside_address_and_port (sm, rx_fib_index0, &key1, + &address_index)) { b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS]; return SNAT_IN2OUT_NEXT_DROP; @@ -341,7 +346,7 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0, } /* Create a new session */ - pool_get (sm->per_thread_data[cpu_index].sessions, s); + pool_get (sm->per_thread_data[thread_index].sessions, s); memset (s, 0, sizeof (*s)); s->outside_address_index = address_index; @@ -357,22 +362,22 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0, } /* Create list elts */ - pool_get (sm->per_thread_data[cpu_index].list_pool, + pool_get (sm->per_thread_data[thread_index].list_pool, per_user_translation_list_elt); - clib_dlist_init (sm->per_thread_data[cpu_index].list_pool, + clib_dlist_init (sm->per_thread_data[thread_index].list_pool, per_user_translation_list_elt - - sm->per_thread_data[cpu_index].list_pool); + sm->per_thread_data[thread_index].list_pool); per_user_translation_list_elt->value = - s - sm->per_thread_data[cpu_index].sessions; + s - sm->per_thread_data[thread_index].sessions; s->per_user_index = per_user_translation_list_elt - - sm->per_thread_data[cpu_index].list_pool; + sm->per_thread_data[thread_index].list_pool; s->per_user_list_head_index = u->sessions_per_user_list_head_index; - clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool, + clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool, s->per_user_list_head_index, per_user_translation_list_elt - - sm->per_thread_data[cpu_index].list_pool); + sm->per_thread_data[thread_index].list_pool); } s->in2out = *key0; @@ -383,12 +388,12 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0, /* Add to translation hashes */ kv0.key = s->in2out.as_u64; - kv0.value = s - sm->per_thread_data[cpu_index].sessions; + kv0.value = s - sm->per_thread_data[thread_index].sessions; if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 1 /* is_add */)) clib_warning ("in2out key add failed"); kv0.key = s->out2in.as_u64; - kv0.value = s - sm->per_thread_data[cpu_index].sessions; + kv0.value = s - sm->per_thread_data[thread_index].sessions; if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 1 /* is_add */)) clib_warning ("out2in key add failed"); @@ -398,7 +403,7 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0, worker_by_out_key.port = s->out2in.port; worker_by_out_key.fib_index = s->out2in.fib_index; kv0.key = worker_by_out_key.as_u64; - kv0.value = cpu_index; + kv0.value = thread_index; clib_bihash_add_del_8_8 (&sm->worker_by_out, &kv0, 1); /* log NAT event */ @@ -410,61 +415,23 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0, s->in2out.fib_index); return next0; } - -typedef struct { - u16 src_port, dst_port; -} tcp_udp_header_t; -static inline u32 icmp_in2out_slow_path (snat_main_t *sm, - vlib_buffer_t * b0, - ip4_header_t * ip0, - icmp46_header_t * icmp0, - u32 sw_if_index0, - u32 rx_fib_index0, - vlib_node_runtime_t * node, - u32 next0, - f64 now, - u32 cpu_index, - snat_session_t ** p_s0) +static_always_inline +snat_in2out_error_t icmp_get_key(icmp46_header_t *icmp0, + snat_session_key_t *p_key0) { snat_session_key_t key0; icmp_echo_header_t *echo0, *inner_echo0 = 0; - ip4_header_t *inner_ip0; + ip4_header_t *inner_ip0 = 0; void *l4_header = 0; icmp46_header_t *inner_icmp0; - clib_bihash_kv_8_8_t kv0, value0; - snat_session_t * s0 = 0; - u32 new_addr0, old_addr0; - u16 old_id0, new_id0; - ip_csum_t sum0; - u16 checksum0; - snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data; - u8 is_error_message = 0; echo0 = (icmp_echo_header_t *)(icmp0+1); - key0.addr = ip0->src_address; - key0.fib_index = rx_fib_index0; - - switch(icmp0->type) - { - case ICMP4_destination_unreachable: - case ICMP4_time_exceeded: - case ICMP4_parameter_problem: - case ICMP4_source_quench: - case ICMP4_redirect: - case ICMP4_alternate_host_address: - is_error_message = 1; - } - - if (!is_error_message) + if (!icmp_is_error_message (icmp0)) { if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request)) - { - b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE]; - next0 = SNAT_IN2OUT_NEXT_DROP; - goto out; - } + return SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE; key0.protocol = SNAT_PROTOCOL_ICMP; key0.port = echo0->identifier; } @@ -485,36 +452,202 @@ static inline u32 icmp_in2out_slow_path (snat_main_t *sm, key0.port = ((tcp_udp_header_t*)l4_header)->dst_port; break; default: - b0->error = node->errors[SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL]; - next0 = SNAT_IN2OUT_NEXT_DROP; - goto out; + return SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL; } } + *p_key0 = key0; + return -1; /* success */ +} + +/** + * Get address and port values to be used for packet SNAT translation + * and create session if needed + * + * @param[in,out] sm SNAT main + * @param[in,out] node SNAT node runtime + * @param[in] thread_index thread index + * @param[in,out] b0 buffer containing packet to be translated + * @param[out] p_key address and port before NAT translation + * @param[out] p_value address and port after NAT translation + * @param[out] p_dont_translate if packet should not be translated + * @param d optional parameter + */ +u32 icmp_match_in2out_slow(snat_main_t *sm, vlib_node_runtime_t *node, + u32 thread_index, vlib_buffer_t *b0, + snat_session_key_t *p_key, + snat_session_key_t *p_value, + u8 *p_dont_translate, void *d) +{ + snat_runtime_t *rt; + ip4_header_t *ip0; + icmp46_header_t *icmp0; + u32 sw_if_index0; + u32 rx_fib_index0; + snat_session_key_t key0; + snat_session_t *s0 = 0; + u8 dont_translate = 0; + clib_bihash_kv_8_8_t kv0, value0; + u32 next0 = ~0; + int err; + + rt = (snat_runtime_t *) node->runtime_data; + ip0 = vlib_buffer_get_current (b0); + icmp0 = (icmp46_header_t *) ip4_next_header (ip0); + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0); + + err = icmp_get_key (icmp0, &key0); + if (err != -1) + { + b0->error = node->errors[err]; + next0 = SNAT_IN2OUT_NEXT_DROP; + goto out; + } + key0.addr = ip0->src_address; + key0.fib_index = rx_fib_index0; kv0.key = key0.as_u64; - + if (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0)) { if (PREDICT_FALSE(snat_not_translate(sm, rt, sw_if_index0, ip0, IP_PROTOCOL_ICMP, rx_fib_index0))) - goto out; + { + dont_translate = 1; + goto out; + } - if (is_error_message) + if (icmp_is_error_message (icmp0)) { next0 = SNAT_IN2OUT_NEXT_DROP; goto out; } next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0, - &s0, node, next0, cpu_index); - + &s0, node, next0, thread_index); + if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP)) goto out; } else - s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions, + s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions, value0.value); +out: + *p_key = key0; + if (s0) + *p_value = s0->out2in; + *p_dont_translate = dont_translate; + if (d) + *(snat_session_t**)d = s0; + return next0; +} + +/** + * Get address and port values to be used for packet SNAT translation + * + * @param[in] sm SNAT main + * @param[in,out] node SNAT node runtime + * @param[in] thread_index thread index + * @param[in,out] b0 buffer containing packet to be translated + * @param[out] p_key address and port before NAT translation + * @param[out] p_value address and port after NAT translation + * @param[out] p_dont_translate if packet should not be translated + * @param d optional parameter + */ +u32 icmp_match_in2out_fast(snat_main_t *sm, vlib_node_runtime_t *node, + u32 thread_index, vlib_buffer_t *b0, + snat_session_key_t *p_key, + snat_session_key_t *p_value, + u8 *p_dont_translate, void *d) +{ + snat_runtime_t *rt; + ip4_header_t *ip0; + icmp46_header_t *icmp0; + u32 sw_if_index0; + u32 rx_fib_index0; + snat_session_key_t key0; + snat_session_key_t sm0; + u8 dont_translate = 0; + u32 next0 = ~0; + int err; + + rt = (snat_runtime_t *) node->runtime_data; + ip0 = vlib_buffer_get_current (b0); + icmp0 = (icmp46_header_t *) ip4_next_header (ip0); + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0); + + err = icmp_get_key (icmp0, &key0); + if (err != -1) + { + b0->error = node->errors[err]; + next0 = SNAT_IN2OUT_NEXT_DROP; + goto out2; + } + key0.addr = ip0->src_address; + key0.fib_index = rx_fib_index0; + + if (snat_static_mapping_match(sm, key0, &sm0, 0)) + { + if (PREDICT_FALSE(snat_not_translate(sm, rt, sw_if_index0, ip0, + IP_PROTOCOL_ICMP, rx_fib_index0))) + { + dont_translate = 1; + goto out; + } + + if (icmp_is_error_message (icmp0)) + { + next0 = SNAT_IN2OUT_NEXT_DROP; + goto out; + } + + b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION]; + next0 = SNAT_IN2OUT_NEXT_DROP; + goto out; + } + +out: + *p_value = sm0; +out2: + *p_key = key0; + *p_dont_translate = dont_translate; + return next0; +} + +static inline u32 icmp_in2out (snat_main_t *sm, + vlib_buffer_t * b0, + ip4_header_t * ip0, + icmp46_header_t * icmp0, + u32 sw_if_index0, + u32 rx_fib_index0, + vlib_node_runtime_t * node, + u32 next0, + u32 thread_index, + void *d) +{ + snat_session_key_t key0, sm0; + icmp_echo_header_t *echo0, *inner_echo0 = 0; + ip4_header_t *inner_ip0; + void *l4_header = 0; + icmp46_header_t *inner_icmp0; + u8 dont_translate; + u32 new_addr0, old_addr0; + u16 old_id0, new_id0; + ip_csum_t sum0; + u16 checksum0; + u32 next0_tmp; + + echo0 = (icmp_echo_header_t *)(icmp0+1); + + next0_tmp = sm->icmp_match_in2out_cb(sm, node, thread_index, b0, + &key0, &sm0, &dont_translate, d); + if (next0_tmp != ~0) + next0 = next0_tmp; + if (next0 == SNAT_IN2OUT_NEXT_DROP || dont_translate) + goto out; + sum0 = ip_incremental_checksum (0, icmp0, ntohs(ip0->length) - ip4_header_bytes (ip0)); checksum0 = ~ip_csum_fold (sum0); @@ -525,28 +658,34 @@ static inline u32 icmp_in2out_slow_path (snat_main_t *sm, } old_addr0 = ip0->src_address.as_u32; - ip0->src_address = s0->out2in.addr; - new_addr0 = ip0->src_address.as_u32; - vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index; + new_addr0 = ip0->src_address.as_u32 = sm0.addr.as_u32; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index; sum0 = ip0->checksum; sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, src_address /* changed member */); ip0->checksum = ip_csum_fold (sum0); - if (!is_error_message) + if (!icmp_is_error_message (icmp0)) { - old_id0 = echo0->identifier; - new_id0 = s0->out2in.port; - echo0->identifier = new_id0; - - sum0 = icmp0->checksum; - sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t, - identifier); - icmp0->checksum = ip_csum_fold (sum0); + new_id0 = sm0.port; + if (PREDICT_FALSE(new_id0 != echo0->identifier)) + { + old_id0 = echo0->identifier; + new_id0 = sm0.port; + echo0->identifier = new_id0; + + sum0 = icmp0->checksum; + sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t, + identifier); + icmp0->checksum = ip_csum_fold (sum0); + } } else { + inner_ip0 = (ip4_header_t *)(echo0+1); + l4_header = ip4_next_header (inner_ip0); + if (!ip4_header_checksum_is_valid (inner_ip0)) { next0 = SNAT_IN2OUT_NEXT_DROP; @@ -554,7 +693,7 @@ static inline u32 icmp_in2out_slow_path (snat_main_t *sm, } old_addr0 = inner_ip0->dst_address.as_u32; - inner_ip0->dst_address = s0->out2in.addr; + inner_ip0->dst_address = sm0.addr; new_addr0 = inner_ip0->src_address.as_u32; sum0 = icmp0->checksum; @@ -565,8 +704,11 @@ static inline u32 icmp_in2out_slow_path (snat_main_t *sm, switch (key0.protocol) { case SNAT_PROTOCOL_ICMP: + inner_icmp0 = (icmp46_header_t*)l4_header; + inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1); + old_id0 = inner_echo0->identifier; - new_id0 = s0->out2in.port; + new_id0 = sm0.port; inner_echo0->identifier = new_id0; sum0 = icmp0->checksum; @@ -577,7 +719,7 @@ static inline u32 icmp_in2out_slow_path (snat_main_t *sm, case SNAT_PROTOCOL_UDP: case SNAT_PROTOCOL_TCP: old_id0 = ((tcp_udp_header_t*)l4_header)->dst_port; - new_id0 = s0->out2in.port; + new_id0 = sm0.port; ((tcp_udp_header_t*)l4_header)->dst_port = new_id0; sum0 = icmp0->checksum; @@ -590,22 +732,7 @@ static inline u32 icmp_in2out_slow_path (snat_main_t *sm, } } - /* Accounting */ - s0->last_heard = now; - s0->total_pkts++; - s0->total_bytes += vlib_buffer_length_in_chain (sm->vlib_main, b0); - /* Per-user LRU list maintenance for dynamic translations */ - if (!snat_is_session_static (s0)) - { - clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool, - s0->per_user_index); - clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool, - s0->per_user_list_head_index, - s0->per_user_index); - } - out: - *p_s0 = s0; return next0; } @@ -689,12 +816,12 @@ snat_hairpinning (snat_main_t *sm, ip4_header_t, dst_address); ip0->checksum = ip_csum_fold (sum0); - old_dst_port0 = tcp0->ports.dst; + old_dst_port0 = tcp0->dst; if (PREDICT_TRUE(new_dst_port0 != old_dst_port0)) { if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) { - tcp0->ports.dst = new_dst_port0; + tcp0->dst = new_dst_port0; sum0 = tcp0->checksum; sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0, ip4_header_t, dst_address); @@ -711,6 +838,40 @@ snat_hairpinning (snat_main_t *sm, } } +static inline u32 icmp_in2out_slow_path (snat_main_t *sm, + vlib_buffer_t * b0, + ip4_header_t * ip0, + icmp46_header_t * icmp0, + u32 sw_if_index0, + u32 rx_fib_index0, + vlib_node_runtime_t * node, + u32 next0, + f64 now, + u32 thread_index, + snat_session_t ** p_s0) +{ + next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, + next0, thread_index, p_s0); + snat_session_t * s0 = *p_s0; + if (PREDICT_TRUE(next0 != SNAT_IN2OUT_NEXT_DROP && s0)) + { + /* Accounting */ + s0->last_heard = now; + s0->total_pkts++; + s0->total_bytes += vlib_buffer_length_in_chain (sm->vlib_main, b0); + /* Per-user LRU list maintenance for dynamic translations */ + if (!snat_is_session_static (s0)) + { + clib_dlist_remove (sm->per_thread_data[thread_index].list_pool, + s0->per_user_index); + clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool, + s0->per_user_list_head_index, + s0->per_user_index); + } + } + return next0; +} + static inline uword snat_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -723,7 +884,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data; f64 now = vlib_time_now (vm); u32 stats_node_index; - u32 cpu_index = os_get_cpu_number (); + u32 thread_index = vlib_get_thread_index (); stats_node_index = is_slow_path ? snat_in2out_slowpath_node.index : snat_in2out_node.index; @@ -816,7 +977,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, { next0 = icmp_in2out_slow_path (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, - node, next0, now, cpu_index, &s0); + node, next0, now, thread_index, &s0); goto trace00; } } @@ -845,7 +1006,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, goto trace00; next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0, - &s0, node, next0, cpu_index); + &s0, node, next0, thread_index); if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP)) goto trace00; } @@ -856,7 +1017,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, } } else - s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions, + s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions, value0.value); old_addr0 = ip0->src_address.as_u32; @@ -872,9 +1033,9 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) { - old_port0 = tcp0->ports.src; - tcp0->ports.src = s0->out2in.port; - new_port0 = tcp0->ports.src; + old_port0 = tcp0->src_port; + tcp0->src_port = s0->out2in.port; + new_port0 = tcp0->src_port; sum0 = tcp0->checksum; sum0 = ip_csum_update (sum0, old_addr0, new_addr0, @@ -902,9 +1063,9 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, /* Per-user LRU list maintenance for dynamic translation */ if (!snat_is_session_static (s0)) { - clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool, + clib_dlist_remove (sm->per_thread_data[thread_index].list_pool, s0->per_user_index); - clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool, + clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool, s0->per_user_list_head_index, s0->per_user_index); } @@ -920,7 +1081,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, t->next_index = next0; t->session_index = ~0; if (s0) - t->session_index = s0 - sm->per_thread_data[cpu_index].sessions; + t->session_index = s0 - sm->per_thread_data[thread_index].sessions; } pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP; @@ -956,7 +1117,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, { next1 = icmp_in2out_slow_path (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node, - next1, now, cpu_index, &s1); + next1, now, thread_index, &s1); goto trace01; } } @@ -985,7 +1146,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, goto trace01; next1 = slow_path (sm, b1, ip1, rx_fib_index1, &key1, - &s1, node, next1, cpu_index); + &s1, node, next1, thread_index); if (PREDICT_FALSE (next1 == SNAT_IN2OUT_NEXT_DROP)) goto trace01; } @@ -996,7 +1157,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, } } else - s1 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions, + s1 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions, value1.value); old_addr1 = ip1->src_address.as_u32; @@ -1012,9 +1173,9 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP)) { - old_port1 = tcp1->ports.src; - tcp1->ports.src = s1->out2in.port; - new_port1 = tcp1->ports.src; + old_port1 = tcp1->src_port; + tcp1->src_port = s1->out2in.port; + new_port1 = tcp1->src_port; sum1 = tcp1->checksum; sum1 = ip_csum_update (sum1, old_addr1, new_addr1, @@ -1042,9 +1203,9 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, /* Per-user LRU list maintenance for dynamic translation */ if (!snat_is_session_static (s1)) { - clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool, + clib_dlist_remove (sm->per_thread_data[thread_index].list_pool, s1->per_user_index); - clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool, + clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool, s1->per_user_list_head_index, s1->per_user_index); } @@ -1059,7 +1220,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, t->next_index = next1; t->session_index = ~0; if (s1) - t->session_index = s1 - sm->per_thread_data[cpu_index].sessions; + t->session_index = s1 - sm->per_thread_data[thread_index].sessions; } pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP; @@ -1131,7 +1292,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, { next0 = icmp_in2out_slow_path (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, - next0, now, cpu_index, &s0); + next0, now, thread_index, &s0); goto trace0; } } @@ -1160,7 +1321,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, goto trace0; next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0, - &s0, node, next0, cpu_index); + &s0, node, next0, thread_index); if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP)) goto trace0; @@ -1172,7 +1333,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, } } else - s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions, + s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions, value0.value); old_addr0 = ip0->src_address.as_u32; @@ -1188,9 +1349,9 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) { - old_port0 = tcp0->ports.src; - tcp0->ports.src = s0->out2in.port; - new_port0 = tcp0->ports.src; + old_port0 = tcp0->src_port; + tcp0->src_port = s0->out2in.port; + new_port0 = tcp0->src_port; sum0 = tcp0->checksum; sum0 = ip_csum_update (sum0, old_addr0, new_addr0, @@ -1218,9 +1379,9 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, /* Per-user LRU list maintenance for dynamic translation */ if (!snat_is_session_static (s0)) { - clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool, + clib_dlist_remove (sm->per_thread_data[thread_index].list_pool, s0->per_user_index); - clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool, + clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool, s0->per_user_list_head_index, s0->per_user_index); } @@ -1236,7 +1397,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, t->next_index = next0; t->session_index = ~0; if (s0) - t->session_index = s0 - sm->per_thread_data[cpu_index].sessions; + t->session_index = s0 - sm->per_thread_data[thread_index].sessions; } pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP; @@ -1322,6 +1483,516 @@ VLIB_REGISTER_NODE (snat_in2out_slowpath_node) = { VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_slowpath_node, snat_in2out_slow_path_fn); +/**************************/ +/*** deterministic mode ***/ +/**************************/ +static uword +snat_det_in2out_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, * from, * to_next; + snat_in2out_next_t next_index; + u32 pkts_processed = 0; + snat_main_t * sm = &snat_main; + u32 now = (u32) vlib_time_now (vm); + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + u32 next0, next1; + u32 sw_if_index0, sw_if_index1; + ip4_header_t * ip0, * ip1; + ip_csum_t sum0, sum1; + ip4_address_t new_addr0, old_addr0, new_addr1, old_addr1; + u16 old_port0, new_port0, lo_port0, i0; + u16 old_port1, new_port1, lo_port1, i1; + udp_header_t * udp0, * udp1; + tcp_header_t * tcp0, * tcp1; + u32 proto0, proto1; + snat_det_out_key_t key0, key1; + snat_det_map_t * dm0, * dm1; + snat_det_session_t * ses0 = 0, * ses1 = 0; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + /* speculatively enqueue b0 and b1 to the current next frame */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + next0 = SNAT_IN2OUT_NEXT_LOOKUP; + next1 = SNAT_IN2OUT_NEXT_LOOKUP; + + ip0 = vlib_buffer_get_current (b0); + udp0 = ip4_next_header (ip0); + tcp0 = (tcp_header_t *) udp0; + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + + dm0 = snat_det_map_by_user(sm, &ip0->src_address); + if (PREDICT_FALSE(!dm0)) + { + clib_warning("no match for internal host %U", + format_ip4_address, &ip0->src_address); + b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION]; + goto trace0; + } + + snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0); + + ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src); + if (PREDICT_FALSE(!ses0)) + { + key0.ext_host_addr = ip0->dst_address; + key0.ext_host_port = tcp0->dst; + for (i0 = 0; i0 < dm0->ports_per_host; i0++) + { + key0.out_port = clib_host_to_net_u16 (lo_port0 + + ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host)); + + if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64)) + continue; + + ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0); + break; + } + if (PREDICT_FALSE(!ses0)) + { + next0 = SNAT_IN2OUT_NEXT_DROP; + b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS]; + goto trace0; + } + } + + new_port0 = ses0->out.out_port; + proto0 = ip_proto_to_snat_proto (ip0->protocol); + + old_addr0.as_u32 = ip0->src_address.as_u32; + ip0->src_address.as_u32 = new_addr0.as_u32; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index; + + sum0 = ip0->checksum; + sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32, + ip4_header_t, + src_address /* changed member */); + ip0->checksum = ip_csum_fold (sum0); + + if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) + { + if (tcp0->flags & TCP_FLAG_SYN) + ses0->state = SNAT_SESSION_TCP_SYN_SENT; + else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT) + ses0->state = SNAT_SESSION_TCP_ESTABLISHED; + else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED) + ses0->state = SNAT_SESSION_TCP_FIN_WAIT; + else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT) + snat_det_ses_close(dm0, ses0); + else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT) + ses0->state = SNAT_SESSION_TCP_LAST_ACK; + else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN) + ses0->state = SNAT_SESSION_TCP_ESTABLISHED; + + old_port0 = tcp0->src; + tcp0->src = new_port0; + + sum0 = tcp0->checksum; + sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32, + ip4_header_t, + dst_address /* changed member */); + sum0 = ip_csum_update (sum0, old_port0, new_port0, + ip4_header_t /* cheat */, + length /* changed member */); + tcp0->checksum = ip_csum_fold(sum0); + } + else + { + ses0->state = SNAT_SESSION_UDP_ACTIVE; + old_port0 = udp0->src_port; + udp0->src_port = new_port0; + udp0->checksum = 0; + } + + switch(ses0->state) + { + case SNAT_SESSION_UDP_ACTIVE: + ses0->expire = now + SNAT_UDP_TIMEOUT; + break; + case SNAT_SESSION_TCP_SYN_SENT: + case SNAT_SESSION_TCP_FIN_WAIT: + case SNAT_SESSION_TCP_CLOSE_WAIT: + case SNAT_SESSION_TCP_LAST_ACK: + ses0->expire = now + SNAT_TCP_TRANSITORY_TIMEOUT; + break; + case SNAT_SESSION_TCP_ESTABLISHED: + ses0->expire = now + SNAT_TCP_ESTABLISHED_TIMEOUT; + break; + } + + trace0: + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + snat_in2out_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->is_slow_path = 0; + t->sw_if_index = sw_if_index0; + t->next_index = next0; + t->session_index = ~0; + if (ses0) + t->session_index = ses0 - dm0->sessions; + } + + pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP; + + ip1 = vlib_buffer_get_current (b1); + udp1 = ip4_next_header (ip1); + tcp1 = (tcp_header_t *) udp1; + + sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX]; + + dm1 = snat_det_map_by_user(sm, &ip1->src_address); + if (PREDICT_FALSE(!dm1)) + { + clib_warning("no match for internal host %U", + format_ip4_address, &ip0->src_address); + b1->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION]; + goto trace1; + } + + snat_det_forward(dm1, &ip1->src_address, &new_addr1, &lo_port1); + + + ses1 = snat_det_find_ses_by_in(dm1, &ip1->src_address, tcp1->src); + if (PREDICT_FALSE(!ses1)) + { + key1.ext_host_addr = ip1->dst_address; + key1.ext_host_port = tcp1->dst; + for (i1 = 0; i1 < dm1->ports_per_host; i1++) + { + key1.out_port = clib_host_to_net_u16 (lo_port1 + + ((i1 + clib_net_to_host_u16 (tcp1->src)) % dm1->ports_per_host)); + + if (snat_det_get_ses_by_out (dm1, &ip1->src_address, key1.as_u64)) + continue; + + ses1 = snat_det_ses_create(dm1, &ip1->src_address, tcp1->src, &key1); + break; + } + if (PREDICT_FALSE(!ses1)) + { + next1 = SNAT_IN2OUT_NEXT_DROP; + b1->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS]; + goto trace1; + } + } + + new_port1 = ses1->out.out_port; + proto1 = ip_proto_to_snat_proto (ip1->protocol); + + old_addr1.as_u32 = ip1->src_address.as_u32; + ip1->src_address.as_u32 = new_addr1.as_u32; + vnet_buffer(b1)->sw_if_index[VLIB_TX] = sm->outside_fib_index; + + sum1 = ip1->checksum; + sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32, + ip4_header_t, + src_address /* changed member */); + ip1->checksum = ip_csum_fold (sum1); + + if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP)) + { + if (tcp1->flags & TCP_FLAG_SYN) + ses1->state = SNAT_SESSION_TCP_SYN_SENT; + else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_SYN_SENT) + ses1->state = SNAT_SESSION_TCP_ESTABLISHED; + else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_ESTABLISHED) + ses1->state = SNAT_SESSION_TCP_FIN_WAIT; + else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_FIN_WAIT) + snat_det_ses_close(dm1, ses1); + else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_CLOSE_WAIT) + ses1->state = SNAT_SESSION_TCP_LAST_ACK; + else if (tcp1->flags == 0 && ses1->state == SNAT_SESSION_UNKNOWN) + ses1->state = SNAT_SESSION_TCP_ESTABLISHED; + + old_port1 = tcp1->src; + tcp1->src = new_port1; + + sum1 = tcp1->checksum; + sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32, + ip4_header_t, + dst_address /* changed member */); + sum1 = ip_csum_update (sum1, old_port1, new_port1, + ip4_header_t /* cheat */, + length /* changed member */); + tcp1->checksum = ip_csum_fold(sum1); + } + else + { + ses1->state = SNAT_SESSION_UDP_ACTIVE; + old_port1 = udp1->src_port; + udp1->src_port = new_port1; + udp1->checksum = 0; + } + + switch(ses1->state) + { + case SNAT_SESSION_UDP_ACTIVE: + ses1->expire = now + SNAT_UDP_TIMEOUT; + break; + case SNAT_SESSION_TCP_SYN_SENT: + case SNAT_SESSION_TCP_FIN_WAIT: + case SNAT_SESSION_TCP_CLOSE_WAIT: + case SNAT_SESSION_TCP_LAST_ACK: + ses1->expire = now + SNAT_TCP_TRANSITORY_TIMEOUT; + break; + case SNAT_SESSION_TCP_ESTABLISHED: + ses1->expire = now + SNAT_TCP_ESTABLISHED_TIMEOUT; + break; + } + + trace1: + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b1->flags & VLIB_BUFFER_IS_TRACED))) + { + snat_in2out_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + t->is_slow_path = 0; + t->sw_if_index = sw_if_index1; + t->next_index = next1; + t->session_index = ~0; + if (ses1) + t->session_index = ses1 - dm1->sessions; + } + + pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP; + + /* verify speculative enqueues, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0; + u32 sw_if_index0; + ip4_header_t * ip0; + ip_csum_t sum0; + ip4_address_t new_addr0, old_addr0; + u16 old_port0, new_port0, lo_port0, i0; + udp_header_t * udp0; + tcp_header_t * tcp0; + u32 proto0; + snat_det_out_key_t key0; + snat_det_map_t * dm0; + snat_det_session_t * ses0 = 0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + next0 = SNAT_IN2OUT_NEXT_LOOKUP; + + ip0 = vlib_buffer_get_current (b0); + udp0 = ip4_next_header (ip0); + tcp0 = (tcp_header_t *) udp0; + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + + dm0 = snat_det_map_by_user(sm, &ip0->src_address); + if (PREDICT_FALSE(!dm0)) + { + clib_warning("no match for internal host %U", + format_ip4_address, &ip0->src_address); + b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION]; + goto trace00; + } + + snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0); + + ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src); + if (PREDICT_FALSE(!ses0)) + { + key0.ext_host_addr = ip0->dst_address; + key0.ext_host_port = tcp0->dst; + for (i0 = 0; i0 < dm0->ports_per_host; i0++) + { + key0.out_port = clib_host_to_net_u16 (lo_port0 + + ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host)); + + if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64)) + continue; + + ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0); + break; + } + if (PREDICT_FALSE(!ses0)) + { + next0 = SNAT_IN2OUT_NEXT_DROP; + b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS]; + goto trace00; + } + } + + new_port0 = ses0->out.out_port; + proto0 = ip_proto_to_snat_proto (ip0->protocol); + + old_addr0.as_u32 = ip0->src_address.as_u32; + ip0->src_address.as_u32 = new_addr0.as_u32; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index; + + sum0 = ip0->checksum; + sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32, + ip4_header_t, + src_address /* changed member */); + ip0->checksum = ip_csum_fold (sum0); + + if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) + { + if (tcp0->flags & TCP_FLAG_SYN) + ses0->state = SNAT_SESSION_TCP_SYN_SENT; + else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT) + ses0->state = SNAT_SESSION_TCP_ESTABLISHED; + else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED) + ses0->state = SNAT_SESSION_TCP_FIN_WAIT; + else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT) + snat_det_ses_close(dm0, ses0); + else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT) + ses0->state = SNAT_SESSION_TCP_LAST_ACK; + else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN) + ses0->state = SNAT_SESSION_TCP_ESTABLISHED; + + old_port0 = tcp0->src; + tcp0->src = new_port0; + + sum0 = tcp0->checksum; + sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32, + ip4_header_t, + dst_address /* changed member */); + sum0 = ip_csum_update (sum0, old_port0, new_port0, + ip4_header_t /* cheat */, + length /* changed member */); + tcp0->checksum = ip_csum_fold(sum0); + } + else + { + ses0->state = SNAT_SESSION_UDP_ACTIVE; + old_port0 = udp0->src_port; + udp0->src_port = new_port0; + udp0->checksum = 0; + } + + switch(ses0->state) + { + case SNAT_SESSION_UDP_ACTIVE: + ses0->expire = now + SNAT_UDP_TIMEOUT; + break; + case SNAT_SESSION_TCP_SYN_SENT: + case SNAT_SESSION_TCP_FIN_WAIT: + case SNAT_SESSION_TCP_CLOSE_WAIT: + case SNAT_SESSION_TCP_LAST_ACK: + ses0->expire = now + SNAT_TCP_TRANSITORY_TIMEOUT; + break; + case SNAT_SESSION_TCP_ESTABLISHED: + ses0->expire = now + SNAT_TCP_ESTABLISHED_TIMEOUT; + break; + } + + trace00: + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + snat_in2out_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->is_slow_path = 0; + t->sw_if_index = sw_if_index0; + t->next_index = next0; + t->session_index = ~0; + if (ses0) + t->session_index = ses0 - dm0->sessions; + } + + pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP; + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, snat_det_in2out_node.index, + SNAT_IN2OUT_ERROR_IN2OUT_PACKETS, + pkts_processed); + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (snat_det_in2out_node) = { + .function = snat_det_in2out_node_fn, + .name = "snat-det-in2out", + .vector_size = sizeof (u32), + .format_trace = format_snat_in2out_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(snat_in2out_error_strings), + .error_strings = snat_in2out_error_strings, + + .runtime_data_bytes = sizeof (snat_runtime_t), + + .n_next_nodes = 2, + + /* edit / add dispositions here */ + .next_nodes = { + [SNAT_IN2OUT_NEXT_DROP] = "error-drop", + [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (snat_det_in2out_node, snat_det_in2out_node_fn); + +/**********************/ +/*** worker handoff ***/ +/**********************/ static uword snat_in2out_worker_handoff_fn (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -1339,7 +2010,7 @@ snat_in2out_worker_handoff_fn (vlib_main_t * vm, u32 n_left_to_next_worker = 0, *to_next_worker = 0; u32 next_worker_index = 0; u32 current_worker_index = ~0; - u32 cpu_index = os_get_cpu_number (); + u32 thread_index = vlib_get_thread_index (); ASSERT (vec_len (sm->workers)); @@ -1362,8 +2033,6 @@ snat_in2out_worker_handoff_fn (vlib_main_t * vm, u32 sw_if_index0; u32 rx_fib_index0; ip4_header_t * ip0; - snat_user_key_t key0; - clib_bihash_kv_8_8_t kv0, value0; u8 do_handoff; bi0 = from[0]; @@ -1377,30 +2046,9 @@ snat_in2out_worker_handoff_fn (vlib_main_t * vm, ip0 = vlib_buffer_get_current (b0); - key0.addr = ip0->src_address; - key0.fib_index = rx_fib_index0; - - kv0.key = key0.as_u64; + next_worker_index = sm->worker_in2out_cb(ip0, rx_fib_index0); - /* Ever heard of of the "user" before? */ - if (clib_bihash_search_8_8 (&sm->worker_by_in, &kv0, &value0)) - { - /* No, assign next available worker (RR) */ - next_worker_index = sm->first_worker_index; - if (vec_len (sm->workers)) - { - next_worker_index += - sm->workers[sm->next_worker++ % _vec_len (sm->workers)]; - } - - /* add non-traslated packets worker lookup */ - kv0.value = next_worker_index; - clib_bihash_add_del_8_8 (&sm->worker_by_in, &kv0, 1); - } - else - next_worker_index = value0.value; - - if (PREDICT_FALSE (next_worker_index != cpu_index)) + if (PREDICT_FALSE (next_worker_index != thread_index)) { do_handoff = 1; @@ -1438,7 +2086,7 @@ snat_in2out_worker_handoff_fn (vlib_main_t * vm, /* if this is 1st frame */ if (!f) { - f = vlib_get_frame_to_node (vm, snat_in2out_node.index); + f = vlib_get_frame_to_node (vm, sm->in2out_node_index); to_next = vlib_frame_vector_args (f); } @@ -1458,7 +2106,7 @@ snat_in2out_worker_handoff_fn (vlib_main_t * vm, } if (f) - vlib_put_frame_to_node (vm, snat_in2out_node.index, f); + vlib_put_frame_to_node (vm, sm->in2out_node_index, f); if (hf) hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker; @@ -1505,64 +2153,6 @@ VLIB_REGISTER_NODE (snat_in2out_worker_handoff_node) = { VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_worker_handoff_node, snat_in2out_worker_handoff_fn); -static inline u32 icmp_in2out_static_map (snat_main_t *sm, - vlib_buffer_t * b0, - ip4_header_t * ip0, - icmp46_header_t * icmp0, - u32 sw_if_index0, - vlib_node_runtime_t * node, - u32 next0, - u32 rx_fib_index0) -{ - snat_session_key_t key0, sm0; - icmp_echo_header_t *echo0; - u32 new_addr0, old_addr0; - u16 old_id0, new_id0; - ip_csum_t sum0; - snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data; - - echo0 = (icmp_echo_header_t *)(icmp0+1); - - key0.addr = ip0->src_address; - key0.port = echo0->identifier; - key0.fib_index = rx_fib_index0; - - if (snat_static_mapping_match(sm, key0, &sm0, 0)) - { - if (PREDICT_FALSE(snat_not_translate(sm, rt, sw_if_index0, ip0, - IP_PROTOCOL_ICMP, rx_fib_index0))) - return next0; - - b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION]; - return SNAT_IN2OUT_NEXT_DROP; - } - - new_addr0 = sm0.addr.as_u32; - new_id0 = sm0.port; - vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index; - old_addr0 = ip0->src_address.as_u32; - ip0->src_address.as_u32 = new_addr0; - - sum0 = ip0->checksum; - sum0 = ip_csum_update (sum0, old_addr0, new_addr0, - ip4_header_t, - src_address /* changed member */); - ip0->checksum = ip_csum_fold (sum0); - - if (PREDICT_FALSE(new_id0 != echo0->identifier)) - { - old_id0 = echo0->identifier; - echo0->identifier = new_id0; - - sum0 = icmp0->checksum; - sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t, - identifier); - icmp0->checksum = ip_csum_fold (sum0); - } - - return next0; -} - static uword snat_in2out_fast_static_map_fn (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -1635,8 +2225,8 @@ snat_in2out_fast_static_map_fn (vlib_main_t * vm, proto0, rx_fib_index0))) goto trace0; - next0 = icmp_in2out_static_map - (sm, b0, ip0, icmp0, sw_if_index0, node, next0, rx_fib_index0); + next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0, + rx_fib_index0, node, next0, ~0, 0); goto trace0; } @@ -1667,8 +2257,8 @@ snat_in2out_fast_static_map_fn (vlib_main_t * vm, { if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) { - old_port0 = tcp0->ports.src; - tcp0->ports.src = new_port0; + old_port0 = tcp0->src_port; + tcp0->src_port = new_port0; sum0 = tcp0->checksum; sum0 = ip_csum_update (sum0, old_addr0, new_addr0,