X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fplugins%2Fsnat%2Fin2out.c;h=4abf8758af4a5572b71d7b5034b3e8a7af46ced5;hb=80f54e20270ed0628ee725e3e3c515731a0188f2;hp=b0047737121a1f21951760d0d9d62ec148394653;hpb=31c31aa3b68f434e047309224ce0923600a59e16;p=vpp.git diff --git a/src/plugins/snat/in2out.c b/src/plugins/snat/in2out.c index b0047737121..4abf8758af4 100644 --- a/src/plugins/snat/in2out.c +++ b/src/plugins/snat/in2out.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -86,6 +87,7 @@ vlib_node_registration_t snat_in2out_node; vlib_node_registration_t snat_in2out_slowpath_node; vlib_node_registration_t snat_in2out_fast_node; vlib_node_registration_t snat_in2out_worker_handoff_node; +vlib_node_registration_t snat_det_in2out_node; #define foreach_snat_in2out_error \ _(UNSUPPORTED_PROTOCOL, "Unsupported protocol") \ @@ -112,6 +114,7 @@ typedef enum { SNAT_IN2OUT_NEXT_LOOKUP, SNAT_IN2OUT_NEXT_DROP, SNAT_IN2OUT_NEXT_SLOW_PATH, + SNAT_IN2OUT_NEXT_ICMP_ERROR, SNAT_IN2OUT_N_NEXT, } snat_in2out_next_t; @@ -246,6 +249,7 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0, pool_get (sm->per_thread_data[cpu_index].users, u); memset (u, 0, sizeof (*u)); u->addr = ip0->src_address; + u->fib_index = rx_fib_index0; pool_get (sm->per_thread_data[cpu_index].list_pool, per_user_list_head_elt); @@ -314,7 +318,8 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0, (sm, &s->out2in, s->outside_address_index); s->outside_address_index = ~0; - if (snat_alloc_outside_address_and_port (sm, &key1, &address_index)) + if (snat_alloc_outside_address_and_port (sm, rx_fib_index0, &key1, + &address_index)) { ASSERT(0); @@ -332,7 +337,8 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0, { static_mapping = 0; /* Try to create dynamic translation */ - if (snat_alloc_outside_address_and_port (sm, &key1, &address_index)) + if (snat_alloc_outside_address_and_port (sm, rx_fib_index0, &key1, + &address_index)) { b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS]; return SNAT_IN2OUT_NEXT_DROP; @@ -410,6 +416,10 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0, return next0; } +typedef struct { + u16 src_port, dst_port; +} tcp_udp_header_t; + static inline u32 icmp_in2out_slow_path (snat_main_t *sm, vlib_buffer_t * b0, ip4_header_t * ip0, @@ -419,67 +429,171 @@ static inline u32 icmp_in2out_slow_path (snat_main_t *sm, vlib_node_runtime_t * node, u32 next0, f64 now, - u32 cpu_index) + u32 cpu_index, + snat_session_t ** p_s0) { snat_session_key_t key0; - icmp_echo_header_t *echo0; + icmp_echo_header_t *echo0, *inner_echo0 = 0; + ip4_header_t *inner_ip0 = 0; + void *l4_header = 0; + icmp46_header_t *inner_icmp0; clib_bihash_kv_8_8_t kv0, value0; - snat_session_t * s0; + snat_session_t * s0 = 0; u32 new_addr0, old_addr0; u16 old_id0, new_id0; ip_csum_t sum0; + u16 checksum0; snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data; + u8 is_error_message = 0; - if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request)) - { - b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE]; - return SNAT_IN2OUT_NEXT_DROP; - } - echo0 = (icmp_echo_header_t *)(icmp0+1); key0.addr = ip0->src_address; - key0.port = echo0->identifier; - key0.protocol = SNAT_PROTOCOL_ICMP; key0.fib_index = rx_fib_index0; + switch(icmp0->type) + { + case ICMP4_destination_unreachable: + case ICMP4_time_exceeded: + case ICMP4_parameter_problem: + case ICMP4_source_quench: + case ICMP4_redirect: + case ICMP4_alternate_host_address: + is_error_message = 1; + } + + if (!is_error_message) + { + if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request)) + { + b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE]; + next0 = SNAT_IN2OUT_NEXT_DROP; + goto out; + } + key0.protocol = SNAT_PROTOCOL_ICMP; + key0.port = echo0->identifier; + } + else + { + inner_ip0 = (ip4_header_t *)(echo0+1); + l4_header = ip4_next_header (inner_ip0); + key0.protocol = ip_proto_to_snat_proto (inner_ip0->protocol); + switch (key0.protocol) + { + case SNAT_PROTOCOL_ICMP: + inner_icmp0 = (icmp46_header_t*)l4_header; + inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1); + key0.port = inner_echo0->identifier; + break; + case SNAT_PROTOCOL_UDP: + case SNAT_PROTOCOL_TCP: + key0.port = ((tcp_udp_header_t*)l4_header)->dst_port; + break; + default: + b0->error = node->errors[SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL]; + next0 = SNAT_IN2OUT_NEXT_DROP; + goto out; + } + } + kv0.key = key0.as_u64; if (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0)) { if (PREDICT_FALSE(snat_not_translate(sm, rt, sw_if_index0, ip0, IP_PROTOCOL_ICMP, rx_fib_index0))) - return next0; + goto out; + + if (is_error_message) + { + next0 = SNAT_IN2OUT_NEXT_DROP; + goto out; + } next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0, &s0, node, next0, cpu_index); if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP)) - return next0; + goto out; } else s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions, value0.value); + sum0 = ip_incremental_checksum (0, icmp0, + ntohs(ip0->length) - ip4_header_bytes (ip0)); + checksum0 = ~ip_csum_fold (sum0); + if (PREDICT_FALSE(checksum0 != 0 && checksum0 != 0xffff)) + { + next0 = SNAT_IN2OUT_NEXT_DROP; + goto out; + } + old_addr0 = ip0->src_address.as_u32; ip0->src_address = s0->out2in.addr; new_addr0 = ip0->src_address.as_u32; vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index; - + sum0 = ip0->checksum; - sum0 = ip_csum_update (sum0, old_addr0, new_addr0, - ip4_header_t, + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, src_address /* changed member */); ip0->checksum = ip_csum_fold (sum0); - old_id0 = echo0->identifier; - new_id0 = s0->out2in.port; - echo0->identifier = new_id0; + if (!is_error_message) + { + old_id0 = echo0->identifier; + new_id0 = s0->out2in.port; + echo0->identifier = new_id0; + + sum0 = icmp0->checksum; + sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t, + identifier); + icmp0->checksum = ip_csum_fold (sum0); + } + else + { + if (!ip4_header_checksum_is_valid (inner_ip0)) + { + next0 = SNAT_IN2OUT_NEXT_DROP; + goto out; + } - sum0 = icmp0->checksum; - sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t, - identifier); - icmp0->checksum = ip_csum_fold (sum0); + old_addr0 = inner_ip0->dst_address.as_u32; + inner_ip0->dst_address = s0->out2in.addr; + new_addr0 = inner_ip0->src_address.as_u32; + + sum0 = icmp0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, + dst_address /* changed member */); + icmp0->checksum = ip_csum_fold (sum0); + + switch (key0.protocol) + { + case SNAT_PROTOCOL_ICMP: + old_id0 = inner_echo0->identifier; + new_id0 = s0->out2in.port; + inner_echo0->identifier = new_id0; + + sum0 = icmp0->checksum; + sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t, + identifier); + icmp0->checksum = ip_csum_fold (sum0); + break; + case SNAT_PROTOCOL_UDP: + case SNAT_PROTOCOL_TCP: + old_id0 = ((tcp_udp_header_t*)l4_header)->dst_port; + new_id0 = s0->out2in.port; + ((tcp_udp_header_t*)l4_header)->dst_port = new_id0; + + sum0 = icmp0->checksum; + sum0 = ip_csum_update (sum0, old_id0, new_id0, tcp_udp_header_t, + dst_port); + icmp0->checksum = ip_csum_fold (sum0); + break; + default: + ASSERT(0); + } + } /* Accounting */ s0->last_heard = now; @@ -495,6 +609,8 @@ static inline u32 icmp_in2out_slow_path (snat_main_t *sm, s0->per_user_index); } +out: + *p_s0 = s0; return next0; } @@ -578,12 +694,12 @@ snat_hairpinning (snat_main_t *sm, ip4_header_t, dst_address); ip0->checksum = ip_csum_fold (sum0); - old_dst_port0 = tcp0->ports.dst; + old_dst_port0 = tcp0->dst; if (PREDICT_TRUE(new_dst_port0 != old_dst_port0)) { if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) { - tcp0->ports.dst = new_dst_port0; + tcp0->dst = new_dst_port0; sum0 = tcp0->checksum; sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0, ip4_header_t, dst_address); @@ -685,6 +801,16 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, proto0 = ip_proto_to_snat_proto (ip0->protocol); + if (PREDICT_FALSE(ip0->ttl == 1)) + { + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded, + ICMP4_time_exceeded_ttl_exceeded_in_transit, + 0); + next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR; + goto trace00; + } + /* Next configured feature, probably ip4-lookup */ if (is_slow_path) { @@ -695,7 +821,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, { next0 = icmp_in2out_slow_path (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, - node, next0, now, cpu_index); + node, next0, now, cpu_index, &s0); goto trace00; } } @@ -751,9 +877,9 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) { - old_port0 = tcp0->ports.src; - tcp0->ports.src = s0->out2in.port; - new_port0 = tcp0->ports.src; + old_port0 = tcp0->src_port; + tcp0->src_port = s0->out2in.port; + new_port0 = tcp0->src_port; sum0 = tcp0->checksum; sum0 = ip_csum_update (sum0, old_addr0, new_addr0, @@ -815,6 +941,16 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, proto1 = ip_proto_to_snat_proto (ip1->protocol); + if (PREDICT_FALSE(ip0->ttl == 1)) + { + vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0; + icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded, + ICMP4_time_exceeded_ttl_exceeded_in_transit, + 0); + next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR; + goto trace01; + } + /* Next configured feature, probably ip4-lookup */ if (is_slow_path) { @@ -825,7 +961,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, { next1 = icmp_in2out_slow_path (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node, - next1, now, cpu_index); + next1, now, cpu_index, &s1); goto trace01; } } @@ -881,9 +1017,9 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP)) { - old_port1 = tcp1->ports.src; - tcp1->ports.src = s1->out2in.port; - new_port1 = tcp1->ports.src; + old_port1 = tcp1->src_port; + tcp1->src_port = s1->out2in.port; + new_port1 = tcp1->src_port; sum1 = tcp1->checksum; sum1 = ip_csum_update (sum1, old_addr1, new_addr1, @@ -980,6 +1116,16 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, proto0 = ip_proto_to_snat_proto (ip0->protocol); + if (PREDICT_FALSE(ip0->ttl == 1)) + { + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded, + ICMP4_time_exceeded_ttl_exceeded_in_transit, + 0); + next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR; + goto trace0; + } + /* Next configured feature, probably ip4-lookup */ if (is_slow_path) { @@ -990,7 +1136,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, { next0 = icmp_in2out_slow_path (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, - next0, now, cpu_index); + next0, now, cpu_index, &s0); goto trace0; } } @@ -1020,6 +1166,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0, &s0, node, next0, cpu_index); + if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP)) goto trace0; } @@ -1046,9 +1193,9 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) { - old_port0 = tcp0->ports.src; - tcp0->ports.src = s0->out2in.port; - new_port0 = tcp0->ports.src; + old_port0 = tcp0->src_port; + tcp0->src_port = s0->out2in.port; + new_port0 = tcp0->src_port; sum0 = tcp0->checksum; sum0 = ip_csum_update (sum0, old_addr0, new_addr0, @@ -1141,6 +1288,7 @@ VLIB_REGISTER_NODE (snat_in2out_node) = { [SNAT_IN2OUT_NEXT_DROP] = "error-drop", [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup", [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath", + [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error", }, }; @@ -1173,11 +1321,522 @@ VLIB_REGISTER_NODE (snat_in2out_slowpath_node) = { [SNAT_IN2OUT_NEXT_DROP] = "error-drop", [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup", [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath", + [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error", }, }; VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_slowpath_node, snat_in2out_slow_path_fn); +/**************************/ +/*** deterministic mode ***/ +/**************************/ +static uword +snat_det_in2out_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, * from, * to_next; + snat_in2out_next_t next_index; + u32 pkts_processed = 0; + snat_main_t * sm = &snat_main; + u32 now = (u32) vlib_time_now (vm); + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + u32 next0, next1; + u32 sw_if_index0, sw_if_index1; + ip4_header_t * ip0, * ip1; + ip_csum_t sum0, sum1; + ip4_address_t new_addr0, old_addr0, new_addr1, old_addr1; + u16 old_port0, new_port0, lo_port0, i0; + u16 old_port1, new_port1, lo_port1, i1; + udp_header_t * udp0, * udp1; + tcp_header_t * tcp0, * tcp1; + u32 proto0, proto1; + snat_det_out_key_t key0, key1; + snat_det_map_t * dm0, * dm1; + snat_det_session_t * ses0 = 0, * ses1 = 0; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + /* speculatively enqueue b0 and b1 to the current next frame */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + next0 = SNAT_IN2OUT_NEXT_LOOKUP; + next1 = SNAT_IN2OUT_NEXT_LOOKUP; + + ip0 = vlib_buffer_get_current (b0); + udp0 = ip4_next_header (ip0); + tcp0 = (tcp_header_t *) udp0; + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + + dm0 = snat_det_map_by_user(sm, &ip0->src_address); + if (PREDICT_FALSE(!dm0)) + { + clib_warning("no match for internal host %U", + format_ip4_address, &ip0->src_address); + b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION]; + goto trace0; + } + + snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0); + + ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src); + if (PREDICT_FALSE(!ses0)) + { + key0.ext_host_addr = ip0->dst_address; + key0.ext_host_port = tcp0->dst; + for (i0 = 0; i0 < dm0->ports_per_host; i0++) + { + key0.out_port = clib_host_to_net_u16 (lo_port0 + + ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host)); + + if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64)) + continue; + + ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0); + break; + } + if (PREDICT_FALSE(!ses0)) + { + next0 = SNAT_IN2OUT_NEXT_DROP; + b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS]; + goto trace0; + } + } + + new_port0 = ses0->out.out_port; + proto0 = ip_proto_to_snat_proto (ip0->protocol); + + old_addr0.as_u32 = ip0->src_address.as_u32; + ip0->src_address.as_u32 = new_addr0.as_u32; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index; + + sum0 = ip0->checksum; + sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32, + ip4_header_t, + src_address /* changed member */); + ip0->checksum = ip_csum_fold (sum0); + + if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) + { + if (tcp0->flags & TCP_FLAG_SYN) + ses0->state = SNAT_SESSION_TCP_SYN_SENT; + else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT) + ses0->state = SNAT_SESSION_TCP_ESTABLISHED; + else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED) + ses0->state = SNAT_SESSION_TCP_FIN_WAIT; + else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT) + snat_det_ses_close(dm0, ses0); + else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT) + ses0->state = SNAT_SESSION_TCP_LAST_ACK; + else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN) + ses0->state = SNAT_SESSION_TCP_ESTABLISHED; + + old_port0 = tcp0->src; + tcp0->src = new_port0; + + sum0 = tcp0->checksum; + sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32, + ip4_header_t, + dst_address /* changed member */); + sum0 = ip_csum_update (sum0, old_port0, new_port0, + ip4_header_t /* cheat */, + length /* changed member */); + tcp0->checksum = ip_csum_fold(sum0); + } + else + { + ses0->state = SNAT_SESSION_UDP_ACTIVE; + old_port0 = udp0->src_port; + udp0->src_port = new_port0; + udp0->checksum = 0; + } + + switch(ses0->state) + { + case SNAT_SESSION_UDP_ACTIVE: + ses0->expire = now + SNAT_UDP_TIMEOUT; + break; + case SNAT_SESSION_TCP_SYN_SENT: + case SNAT_SESSION_TCP_FIN_WAIT: + case SNAT_SESSION_TCP_CLOSE_WAIT: + case SNAT_SESSION_TCP_LAST_ACK: + ses0->expire = now + SNAT_TCP_TRANSITORY_TIMEOUT; + break; + case SNAT_SESSION_TCP_ESTABLISHED: + ses0->expire = now + SNAT_TCP_ESTABLISHED_TIMEOUT; + break; + } + + trace0: + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + snat_in2out_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->is_slow_path = 0; + t->sw_if_index = sw_if_index0; + t->next_index = next0; + t->session_index = ~0; + if (ses0) + t->session_index = ses0 - dm0->sessions; + } + + pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP; + + ip1 = vlib_buffer_get_current (b1); + udp1 = ip4_next_header (ip1); + tcp1 = (tcp_header_t *) udp1; + + sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX]; + + dm1 = snat_det_map_by_user(sm, &ip1->src_address); + if (PREDICT_FALSE(!dm1)) + { + clib_warning("no match for internal host %U", + format_ip4_address, &ip0->src_address); + b1->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION]; + goto trace1; + } + + snat_det_forward(dm1, &ip1->src_address, &new_addr1, &lo_port1); + + + ses1 = snat_det_find_ses_by_in(dm1, &ip1->src_address, tcp1->src); + if (PREDICT_FALSE(!ses1)) + { + key1.ext_host_addr = ip1->dst_address; + key1.ext_host_port = tcp1->dst; + for (i1 = 0; i1 < dm1->ports_per_host; i1++) + { + key1.out_port = clib_host_to_net_u16 (lo_port1 + + ((i1 + clib_net_to_host_u16 (tcp1->src)) % dm1->ports_per_host)); + + if (snat_det_get_ses_by_out (dm1, &ip1->src_address, key1.as_u64)) + continue; + + ses1 = snat_det_ses_create(dm1, &ip1->src_address, tcp1->src, &key1); + break; + } + if (PREDICT_FALSE(!ses1)) + { + next1 = SNAT_IN2OUT_NEXT_DROP; + b1->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS]; + goto trace1; + } + } + + new_port1 = ses1->out.out_port; + proto1 = ip_proto_to_snat_proto (ip1->protocol); + + old_addr1.as_u32 = ip1->src_address.as_u32; + ip1->src_address.as_u32 = new_addr1.as_u32; + vnet_buffer(b1)->sw_if_index[VLIB_TX] = sm->outside_fib_index; + + sum1 = ip1->checksum; + sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32, + ip4_header_t, + src_address /* changed member */); + ip1->checksum = ip_csum_fold (sum1); + + if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP)) + { + if (tcp1->flags & TCP_FLAG_SYN) + ses1->state = SNAT_SESSION_TCP_SYN_SENT; + else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_SYN_SENT) + ses1->state = SNAT_SESSION_TCP_ESTABLISHED; + else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_ESTABLISHED) + ses1->state = SNAT_SESSION_TCP_FIN_WAIT; + else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_FIN_WAIT) + snat_det_ses_close(dm1, ses1); + else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_CLOSE_WAIT) + ses1->state = SNAT_SESSION_TCP_LAST_ACK; + else if (tcp1->flags == 0 && ses1->state == SNAT_SESSION_UNKNOWN) + ses1->state = SNAT_SESSION_TCP_ESTABLISHED; + + old_port1 = tcp1->src; + tcp1->src = new_port1; + + sum1 = tcp1->checksum; + sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32, + ip4_header_t, + dst_address /* changed member */); + sum1 = ip_csum_update (sum1, old_port1, new_port1, + ip4_header_t /* cheat */, + length /* changed member */); + tcp1->checksum = ip_csum_fold(sum1); + } + else + { + ses1->state = SNAT_SESSION_UDP_ACTIVE; + old_port1 = udp1->src_port; + udp1->src_port = new_port1; + udp1->checksum = 0; + } + + switch(ses1->state) + { + case SNAT_SESSION_UDP_ACTIVE: + ses1->expire = now + SNAT_UDP_TIMEOUT; + break; + case SNAT_SESSION_TCP_SYN_SENT: + case SNAT_SESSION_TCP_FIN_WAIT: + case SNAT_SESSION_TCP_CLOSE_WAIT: + case SNAT_SESSION_TCP_LAST_ACK: + ses1->expire = now + SNAT_TCP_TRANSITORY_TIMEOUT; + break; + case SNAT_SESSION_TCP_ESTABLISHED: + ses1->expire = now + SNAT_TCP_ESTABLISHED_TIMEOUT; + break; + } + + trace1: + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b1->flags & VLIB_BUFFER_IS_TRACED))) + { + snat_in2out_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + t->is_slow_path = 0; + t->sw_if_index = sw_if_index1; + t->next_index = next1; + t->session_index = ~0; + if (ses1) + t->session_index = ses1 - dm1->sessions; + } + + pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP; + + /* verify speculative enqueues, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0; + u32 sw_if_index0; + ip4_header_t * ip0; + ip_csum_t sum0; + ip4_address_t new_addr0, old_addr0; + u16 old_port0, new_port0, lo_port0, i0; + udp_header_t * udp0; + tcp_header_t * tcp0; + u32 proto0; + snat_det_out_key_t key0; + snat_det_map_t * dm0; + snat_det_session_t * ses0 = 0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + next0 = SNAT_IN2OUT_NEXT_LOOKUP; + + ip0 = vlib_buffer_get_current (b0); + udp0 = ip4_next_header (ip0); + tcp0 = (tcp_header_t *) udp0; + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + + dm0 = snat_det_map_by_user(sm, &ip0->src_address); + if (PREDICT_FALSE(!dm0)) + { + clib_warning("no match for internal host %U", + format_ip4_address, &ip0->src_address); + b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION]; + goto trace00; + } + + snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0); + + ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src); + if (PREDICT_FALSE(!ses0)) + { + key0.ext_host_addr = ip0->dst_address; + key0.ext_host_port = tcp0->dst; + for (i0 = 0; i0 < dm0->ports_per_host; i0++) + { + key0.out_port = clib_host_to_net_u16 (lo_port0 + + ((i0 + clib_net_to_host_u16 (tcp0->src)) % dm0->ports_per_host)); + + if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64)) + continue; + + ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0); + break; + } + if (PREDICT_FALSE(!ses0)) + { + next0 = SNAT_IN2OUT_NEXT_DROP; + b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS]; + goto trace00; + } + } + + new_port0 = ses0->out.out_port; + proto0 = ip_proto_to_snat_proto (ip0->protocol); + + old_addr0.as_u32 = ip0->src_address.as_u32; + ip0->src_address.as_u32 = new_addr0.as_u32; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index; + + sum0 = ip0->checksum; + sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32, + ip4_header_t, + src_address /* changed member */); + ip0->checksum = ip_csum_fold (sum0); + + if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) + { + if (tcp0->flags & TCP_FLAG_SYN) + ses0->state = SNAT_SESSION_TCP_SYN_SENT; + else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT) + ses0->state = SNAT_SESSION_TCP_ESTABLISHED; + else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED) + ses0->state = SNAT_SESSION_TCP_FIN_WAIT; + else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT) + snat_det_ses_close(dm0, ses0); + else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT) + ses0->state = SNAT_SESSION_TCP_LAST_ACK; + else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN) + ses0->state = SNAT_SESSION_TCP_ESTABLISHED; + + old_port0 = tcp0->src; + tcp0->src = new_port0; + + sum0 = tcp0->checksum; + sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32, + ip4_header_t, + dst_address /* changed member */); + sum0 = ip_csum_update (sum0, old_port0, new_port0, + ip4_header_t /* cheat */, + length /* changed member */); + tcp0->checksum = ip_csum_fold(sum0); + } + else + { + ses0->state = SNAT_SESSION_UDP_ACTIVE; + old_port0 = udp0->src_port; + udp0->src_port = new_port0; + udp0->checksum = 0; + } + + switch(ses0->state) + { + case SNAT_SESSION_UDP_ACTIVE: + ses0->expire = now + SNAT_UDP_TIMEOUT; + break; + case SNAT_SESSION_TCP_SYN_SENT: + case SNAT_SESSION_TCP_FIN_WAIT: + case SNAT_SESSION_TCP_CLOSE_WAIT: + case SNAT_SESSION_TCP_LAST_ACK: + ses0->expire = now + SNAT_TCP_TRANSITORY_TIMEOUT; + break; + case SNAT_SESSION_TCP_ESTABLISHED: + ses0->expire = now + SNAT_TCP_ESTABLISHED_TIMEOUT; + break; + } + + trace00: + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + snat_in2out_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->is_slow_path = 0; + t->sw_if_index = sw_if_index0; + t->next_index = next0; + t->session_index = ~0; + if (ses0) + t->session_index = ses0 - dm0->sessions; + } + + pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP; + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, snat_det_in2out_node.index, + SNAT_IN2OUT_ERROR_IN2OUT_PACKETS, + pkts_processed); + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (snat_det_in2out_node) = { + .function = snat_det_in2out_node_fn, + .name = "snat-det-in2out", + .vector_size = sizeof (u32), + .format_trace = format_snat_in2out_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(snat_in2out_error_strings), + .error_strings = snat_in2out_error_strings, + + .runtime_data_bytes = sizeof (snat_runtime_t), + + .n_next_nodes = 2, + + /* edit / add dispositions here */ + .next_nodes = { + [SNAT_IN2OUT_NEXT_DROP] = "error-drop", + [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (snat_det_in2out_node, snat_det_in2out_node_fn); + +/**********************/ +/*** worker handoff ***/ +/**********************/ static uword snat_in2out_worker_handoff_fn (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -1218,8 +1877,6 @@ snat_in2out_worker_handoff_fn (vlib_main_t * vm, u32 sw_if_index0; u32 rx_fib_index0; ip4_header_t * ip0; - snat_user_key_t key0; - clib_bihash_kv_8_8_t kv0, value0; u8 do_handoff; bi0 = from[0]; @@ -1233,28 +1890,7 @@ snat_in2out_worker_handoff_fn (vlib_main_t * vm, ip0 = vlib_buffer_get_current (b0); - key0.addr = ip0->src_address; - key0.fib_index = rx_fib_index0; - - kv0.key = key0.as_u64; - - /* Ever heard of of the "user" before? */ - if (clib_bihash_search_8_8 (&sm->worker_by_in, &kv0, &value0)) - { - /* No, assign next available worker (RR) */ - next_worker_index = sm->first_worker_index; - if (vec_len (sm->workers)) - { - next_worker_index += - sm->workers[sm->next_worker++ % _vec_len (sm->workers)]; - } - - /* add non-traslated packets worker lookup */ - kv0.value = next_worker_index; - clib_bihash_add_del_8_8 (&sm->worker_by_in, &kv0, 1); - } - else - next_worker_index = value0.value; + next_worker_index = sm->worker_in2out_cb(ip0, rx_fib_index0); if (PREDICT_FALSE (next_worker_index != cpu_index)) { @@ -1294,7 +1930,7 @@ snat_in2out_worker_handoff_fn (vlib_main_t * vm, /* if this is 1st frame */ if (!f) { - f = vlib_get_frame_to_node (vm, snat_in2out_node.index); + f = vlib_get_frame_to_node (vm, sm->in2out_node_index); to_next = vlib_frame_vector_args (f); } @@ -1314,7 +1950,7 @@ snat_in2out_worker_handoff_fn (vlib_main_t * vm, } if (f) - vlib_put_frame_to_node (vm, snat_in2out_node.index, f); + vlib_put_frame_to_node (vm, sm->in2out_node_index, f); if (hf) hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker; @@ -1361,6 +1997,9 @@ VLIB_REGISTER_NODE (snat_in2out_worker_handoff_node) = { VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_worker_handoff_node, snat_in2out_worker_handoff_fn); +/********************************/ +/*** static mapping only mode ***/ +/********************************/ static inline u32 icmp_in2out_static_map (snat_main_t *sm, vlib_buffer_t * b0, ip4_header_t * ip0, @@ -1523,8 +2162,8 @@ snat_in2out_fast_static_map_fn (vlib_main_t * vm, { if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) { - old_port0 = tcp0->ports.src; - tcp0->ports.src = new_port0; + old_port0 = tcp0->src_port; + tcp0->src_port = new_port0; sum0 = tcp0->checksum; sum0 = ip_csum_update (sum0, old_addr0, new_addr0, @@ -1604,6 +2243,7 @@ VLIB_REGISTER_NODE (snat_in2out_fast_node) = { [SNAT_IN2OUT_NEXT_DROP] = "error-drop", [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup", [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath", + [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error", }, };