2 * Copyright (c) 2017 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
15 #include <nat/dslite/dslite.h>
16 #include <nat/nat_inlines.h>
17 #include <nat/nat_syslog.h>
21 DSLITE_IN2OUT_NEXT_IP4_LOOKUP,
22 DSLITE_IN2OUT_NEXT_IP6_ICMP,
23 DSLITE_IN2OUT_NEXT_DROP,
24 DSLITE_IN2OUT_NEXT_SLOWPATH,
26 } dslite_in2out_next_t;
28 static char *dslite_in2out_error_strings[] = {
29 #define _(sym,string) string,
35 slow_path (dslite_main_t * dm, dslite_session_key_t * in2out_key,
36 dslite_session_t ** sp, u32 next, u8 * error, u32 thread_index)
39 clib_bihash_kv_16_8_t b4_kv, b4_value;
40 clib_bihash_kv_24_8_t in2out_kv;
41 clib_bihash_kv_8_8_t out2in_kv;
42 dlist_elt_t *head_elt, *oldest_elt, *elt;
45 snat_session_key_t out2in_key;
46 nat_ip4_addr_port_t addr_port;
49 out2in_key.protocol = in2out_key->proto;
50 out2in_key.fib_index = 0;
52 b4_kv.key[0] = in2out_key->softwire_id.as_u64[0];
53 b4_kv.key[1] = in2out_key->softwire_id.as_u64[1];
55 if (clib_bihash_search_16_8
56 (&dm->per_thread_data[thread_index].b4_hash, &b4_kv, &b4_value))
58 pool_get (dm->per_thread_data[thread_index].b4s, b4);
59 clib_memset (b4, 0, sizeof (*b4));
60 b4->addr.as_u64[0] = in2out_key->softwire_id.as_u64[0];
61 b4->addr.as_u64[1] = in2out_key->softwire_id.as_u64[1];
63 pool_get (dm->per_thread_data[thread_index].list_pool, head_elt);
64 b4->sessions_per_b4_list_head_index =
65 head_elt - dm->per_thread_data[thread_index].list_pool;
66 clib_dlist_init (dm->per_thread_data[thread_index].list_pool,
67 b4->sessions_per_b4_list_head_index);
69 b4_index = b4_kv.value = b4 - dm->per_thread_data[thread_index].b4s;
70 clib_bihash_add_del_16_8 (&dm->per_thread_data[thread_index].b4_hash,
73 vlib_set_simple_counter (&dm->total_b4s, thread_index, 0,
75 per_thread_data[thread_index].b4s));
79 b4_index = b4_value.value;
81 pool_elt_at_index (dm->per_thread_data[thread_index].b4s,
85 //TODO configurable quota
86 if (b4->nsessions >= 1000)
89 clib_dlist_remove_head (dm->per_thread_data[thread_index].list_pool,
90 b4->sessions_per_b4_list_head_index);
91 ASSERT (oldest_index != ~0);
92 clib_dlist_addtail (dm->per_thread_data[thread_index].list_pool,
93 b4->sessions_per_b4_list_head_index, oldest_index);
95 pool_elt_at_index (dm->per_thread_data[thread_index].list_pool,
98 pool_elt_at_index (dm->per_thread_data[thread_index].sessions,
101 in2out_kv.key[0] = s->in2out.as_u64[0];
102 in2out_kv.key[1] = s->in2out.as_u64[1];
103 in2out_kv.key[2] = s->in2out.as_u64[2];
104 clib_bihash_add_del_24_8 (&dm->per_thread_data[thread_index].in2out,
106 out2in_kv.key = s->out2in.as_u64;
107 clib_bihash_add_del_8_8 (&dm->per_thread_data[thread_index].out2in,
110 addr_port.addr.as_u32 = s->out2in.addr.as_u32;
111 addr_port.port = s->out2in.port;
113 nat_free_ip4_addr_and_port (&dm->pool, thread_index,
114 s->out2in.protocol, &addr_port);
116 nat_syslog_dslite_apmdel (b4_index, &s->in2out.softwire_id,
117 &s->in2out.addr, s->in2out.port,
118 &s->out2in.addr, s->out2in.port,
121 if (nat_alloc_ip4_addr_and_port
122 (&dm->pool, 0, thread_index, thread_index,
123 dm->port_per_thread, out2in_key.protocol, &addr_port))
126 out2in_key.addr.as_u32 = addr_port.addr.as_u32;
127 out2in_key.port = addr_port.port;
131 if (nat_alloc_ip4_addr_and_port
132 (&dm->pool, 0, thread_index, thread_index,
133 dm->port_per_thread, out2in_key.protocol, &addr_port))
135 *error = DSLITE_ERROR_OUT_OF_PORTS;
136 return DSLITE_IN2OUT_NEXT_DROP;
139 out2in_key.addr.as_u32 = addr_port.addr.as_u32;
140 out2in_key.port = addr_port.port;
142 pool_get (dm->per_thread_data[thread_index].sessions, s);
143 clib_memset (s, 0, sizeof (*s));
146 pool_get (dm->per_thread_data[thread_index].list_pool, elt);
147 clib_dlist_init (dm->per_thread_data[thread_index].list_pool,
148 elt - dm->per_thread_data[thread_index].list_pool);
149 elt->value = s - dm->per_thread_data[thread_index].sessions;
150 s->per_b4_index = elt - dm->per_thread_data[thread_index].list_pool;
151 s->per_b4_list_head_index = b4->sessions_per_b4_list_head_index;
152 clib_dlist_addtail (dm->per_thread_data[thread_index].list_pool,
153 s->per_b4_list_head_index,
154 elt - dm->per_thread_data[thread_index].list_pool);
156 vlib_set_simple_counter (&dm->total_sessions, thread_index, 0,
157 pool_elts (dm->per_thread_data
158 [thread_index].sessions));
161 s->in2out = *in2out_key;
162 s->out2in = out2in_key;
164 in2out_kv.key[0] = s->in2out.as_u64[0];
165 in2out_kv.key[1] = s->in2out.as_u64[1];
166 in2out_kv.key[2] = s->in2out.as_u64[2];
167 in2out_kv.value = s - dm->per_thread_data[thread_index].sessions;
168 clib_bihash_add_del_24_8 (&dm->per_thread_data[thread_index].in2out,
170 out2in_kv.key = s->out2in.as_u64;
171 out2in_kv.value = s - dm->per_thread_data[thread_index].sessions;
172 clib_bihash_add_del_8_8 (&dm->per_thread_data[thread_index].out2in,
175 nat_syslog_dslite_apmadd (b4_index, &s->in2out.softwire_id, &s->in2out.addr,
176 s->in2out.port, &s->out2in.addr, s->out2in.port,
183 dslite_icmp_in2out (dslite_main_t * dm, ip6_header_t * ip6,
184 ip4_header_t * ip4, dslite_session_t ** sp, u32 next,
185 u8 * error, u32 thread_index)
187 dslite_session_t *s = 0;
188 icmp46_header_t *icmp = ip4_next_header (ip4);
189 clib_bihash_kv_24_8_t kv, value;
190 dslite_session_key_t key;
192 icmp_echo_header_t *echo;
193 u32 new_addr, old_addr;
197 if (icmp_type_is_error_message (icmp->type))
199 n = DSLITE_IN2OUT_NEXT_DROP;
200 *error = DSLITE_ERROR_BAD_ICMP_TYPE;
204 echo = (icmp_echo_header_t *) (icmp + 1);
206 key.addr = ip4->src_address;
207 key.port = echo->identifier;
208 key.proto = SNAT_PROTOCOL_ICMP;
209 key.softwire_id.as_u64[0] = ip6->src_address.as_u64[0];
210 key.softwire_id.as_u64[1] = ip6->src_address.as_u64[1];
212 kv.key[0] = key.as_u64[0];
213 kv.key[1] = key.as_u64[1];
214 kv.key[2] = key.as_u64[2];
216 if (clib_bihash_search_24_8
217 (&dm->per_thread_data[thread_index].in2out, &kv, &value))
219 n = slow_path (dm, &key, &s, next, error, thread_index);
220 if (PREDICT_FALSE (next == DSLITE_IN2OUT_NEXT_DROP))
226 pool_elt_at_index (dm->per_thread_data[thread_index].sessions,
230 old_addr = ip4->src_address.as_u32;
231 ip4->src_address = s->out2in.addr;
232 new_addr = ip4->src_address.as_u32;
234 sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
235 ip4->checksum = ip_csum_fold (sum);
237 old_id = echo->identifier;
238 echo->identifier = new_id = s->out2in.port;
239 sum = icmp->checksum;
240 sum = ip_csum_update (sum, old_id, new_id, icmp_echo_header_t, identifier);
241 icmp->checksum = ip_csum_fold (sum);
249 dslite_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
250 vlib_frame_t * frame, u8 is_slow_path)
252 u32 n_left_from, *from, *to_next;
253 dslite_in2out_next_t next_index;
255 vlib_node_runtime_t *error_node;
256 u32 thread_index = vm->thread_index;
257 f64 now = vlib_time_now (vm);
258 dslite_main_t *dm = &dslite_main;
261 is_slow_path ? dm->dslite_in2out_slowpath_node_index :
262 dm->dslite_in2out_node_index;
264 error_node = vlib_node_get_runtime (vm, node_index);
266 from = vlib_frame_vector_args (frame);
267 n_left_from = frame->n_vectors;
268 next_index = node->cached_next_index;
270 while (n_left_from > 0)
274 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
276 while (n_left_from > 0 && n_left_to_next > 0)
280 u32 next0 = DSLITE_IN2OUT_NEXT_IP4_LOOKUP;
283 u8 error0 = DSLITE_ERROR_IN2OUT;
285 dslite_session_t *s0 = 0;
286 clib_bihash_kv_24_8_t kv0, value0;
287 dslite_session_key_t key0;
291 u32 new_addr0, old_addr0;
292 u16 old_port0, new_port0;
294 /* speculatively enqueue b0 to the current next frame */
302 b0 = vlib_get_buffer (vm, bi0);
303 ip60 = vlib_buffer_get_current (b0);
305 if (PREDICT_FALSE (ip60->protocol != IP_PROTOCOL_IP_IN_IP))
307 if (ip60->protocol == IP_PROTOCOL_ICMP6)
309 next0 = DSLITE_IN2OUT_NEXT_IP6_ICMP;
312 error0 = DSLITE_ERROR_BAD_IP6_PROTOCOL;
313 next0 = DSLITE_IN2OUT_NEXT_DROP;
317 ip40 = vlib_buffer_get_current (b0) + sizeof (ip6_header_t);
318 proto0 = ip_proto_to_snat_proto (ip40->protocol);
320 if (PREDICT_FALSE (proto0 == ~0))
322 error0 = DSLITE_ERROR_UNSUPPORTED_PROTOCOL;
323 next0 = DSLITE_IN2OUT_NEXT_DROP;
327 udp0 = ip4_next_header (ip40);
328 tcp0 = (tcp_header_t *) udp0;
332 if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
335 dslite_icmp_in2out (dm, ip60, ip40, &s0, next0, &error0,
337 if (PREDICT_FALSE (next0 == DSLITE_IN2OUT_NEXT_DROP))
345 if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
347 next0 = DSLITE_IN2OUT_NEXT_SLOWPATH;
352 key0.addr = ip40->src_address;
353 key0.port = udp0->src_port;
355 key0.softwire_id.as_u64[0] = ip60->src_address.as_u64[0];
356 key0.softwire_id.as_u64[1] = ip60->src_address.as_u64[1];
358 kv0.key[0] = key0.as_u64[0];
359 kv0.key[1] = key0.as_u64[1];
360 kv0.key[2] = key0.as_u64[2];
362 if (clib_bihash_search_24_8
363 (&dm->per_thread_data[thread_index].in2out, &kv0, &value0))
368 slow_path (dm, &key0, &s0, next0, &error0, thread_index);
369 if (PREDICT_FALSE (next0 == DSLITE_IN2OUT_NEXT_DROP))
374 next0 = DSLITE_IN2OUT_NEXT_SLOWPATH;
381 pool_elt_at_index (dm->per_thread_data[thread_index].sessions,
385 old_addr0 = ip40->src_address.as_u32;
386 ip40->src_address = s0->out2in.addr;
387 new_addr0 = ip40->src_address.as_u32;
388 sum0 = ip40->checksum;
390 ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
392 ip40->checksum = ip_csum_fold (sum0);
393 if (PREDICT_TRUE (proto0 == SNAT_PROTOCOL_TCP))
395 old_port0 = tcp0->src_port;
396 tcp0->src_port = s0->out2in.port;
397 new_port0 = tcp0->src_port;
399 sum0 = tcp0->checksum;
401 ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
404 ip_csum_update (sum0, old_port0, new_port0, ip4_header_t,
406 //mss_clamping (&dslite_main, tcp0, &sum0);
407 tcp0->checksum = ip_csum_fold (sum0);
411 old_port0 = udp0->src_port;
412 udp0->src_port = s0->out2in.port;
418 s0->last_heard = now;
420 s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
421 /* Per-B4 LRU list maintenance */
422 clib_dlist_remove (dm->per_thread_data[thread_index].list_pool,
424 clib_dlist_addtail (dm->per_thread_data[thread_index].list_pool,
425 s0->per_b4_list_head_index, s0->per_b4_index);
428 (clib_net_to_host_u32
429 (ip60->ip_version_traffic_class_and_flow_label) & 0x0ff00000) >>
431 vlib_buffer_advance (b0, sizeof (ip6_header_t));
434 if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
435 && (b0->flags & VLIB_BUFFER_IS_TRACED)))
437 dslite_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
438 t->next_index = next0;
439 t->session_index = ~0;
442 s0 - dm->per_thread_data[thread_index].sessions;
445 b0->error = error_node->errors[error0];
447 /* verify speculative enqueue, maybe switch current next frame */
448 vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
449 n_left_to_next, bi0, next0);
451 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
454 return frame->n_vectors;
457 VLIB_NODE_FN (dslite_in2out_node) (vlib_main_t * vm,
458 vlib_node_runtime_t * node,
459 vlib_frame_t * frame)
461 return dslite_in2out_node_fn_inline (vm, node, frame, 0);
465 VLIB_REGISTER_NODE (dslite_in2out_node) = {
466 .name = "dslite-in2out",
467 .vector_size = sizeof (u32),
468 .format_trace = format_dslite_trace,
469 .type = VLIB_NODE_TYPE_INTERNAL,
470 .n_errors = ARRAY_LEN (dslite_in2out_error_strings),
471 .error_strings = dslite_in2out_error_strings,
472 .n_next_nodes = DSLITE_IN2OUT_N_NEXT,
473 /* edit / add dispositions here */
475 [DSLITE_IN2OUT_NEXT_DROP] = "error-drop",
476 [DSLITE_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
477 [DSLITE_IN2OUT_NEXT_IP6_ICMP] = "ip6-icmp-input",
478 [DSLITE_IN2OUT_NEXT_SLOWPATH] = "dslite-in2out-slowpath",
483 VLIB_NODE_FN (dslite_in2out_slowpath_node) (vlib_main_t * vm,
484 vlib_node_runtime_t * node,
485 vlib_frame_t * frame)
487 return dslite_in2out_node_fn_inline (vm, node, frame, 1);
491 VLIB_REGISTER_NODE (dslite_in2out_slowpath_node) = {
492 .name = "dslite-in2out-slowpath",
493 .vector_size = sizeof (u32),
494 .format_trace = format_dslite_trace,
495 .type = VLIB_NODE_TYPE_INTERNAL,
496 .n_errors = ARRAY_LEN (dslite_in2out_error_strings),
497 .error_strings = dslite_in2out_error_strings,
498 .n_next_nodes = DSLITE_IN2OUT_N_NEXT,
499 /* edit / add dispositions here */
501 [DSLITE_IN2OUT_NEXT_DROP] = "error-drop",
502 [DSLITE_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
503 [DSLITE_IN2OUT_NEXT_IP6_ICMP] = "ip6-lookup",
504 [DSLITE_IN2OUT_NEXT_SLOWPATH] = "dslite-in2out-slowpath",
510 * fd.io coding-style-patch-verification: ON
513 * eval: (c-set-style "gnu")