2 * Copyright (c) 2020 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 #ifndef __CNAT_NODE_H__
17 #define __CNAT_NODE_H__
19 #include <vlibmemory/api.h>
20 #include <vnet/dpo/load_balance.h>
21 #include <vnet/dpo/load_balance_map.h>
22 #include <vnet/ip/ip_psh_cksum.h>
24 #include <cnat/cnat_session.h>
25 #include <cnat/cnat_client.h>
26 #include <cnat/cnat_inline.h>
27 #include <cnat/cnat_translation.h>
29 #include <vnet/ip/ip4_inlines.h>
30 #include <vnet/ip/ip6_inlines.h>
32 typedef uword (*cnat_node_sub_t) (vlib_main_t * vm,
33 vlib_node_runtime_t * node,
35 cnat_node_ctx_t * ctx, int rv,
36 cnat_session_t * session);
38 typedef struct cnat_trace_element_t_
40 cnat_session_t session;
41 cnat_translation_t tr;
42 u32 sw_if_index[VLIB_N_RX_TX];
43 u32 snat_policy_result;
45 } cnat_trace_element_t;
47 typedef enum cnat_trace_element_flag_t_
49 CNAT_TRACE_SESSION_FOUND = (1 << 0),
50 CNAT_TRACE_SESSION_CREATED = (1 << 1),
51 CNAT_TRACE_TRANSLATION_FOUND = (1 << 2),
52 CNAT_TRACE_NO_NAT = (1 << 3),
53 } cnat_trace_element_flag_t;
55 static_always_inline void
56 cnat_add_trace (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_buffer_t *b,
57 cnat_session_t *session, const cnat_translation_t *ct,
60 cnat_trace_element_t *t;
62 flags |= CNAT_TRACE_TRANSLATION_FOUND;
64 t = vlib_add_trace (vm, node, b, sizeof (*t));
65 t->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_RX];
66 t->sw_if_index[VLIB_TX] = vnet_buffer (b)->sw_if_index[VLIB_TX];
68 if (flags & (CNAT_TRACE_SESSION_FOUND | CNAT_TRACE_SESSION_CREATED))
69 clib_memcpy (&t->session, session, sizeof (t->session));
70 if (flags & CNAT_TRACE_TRANSLATION_FOUND)
71 clib_memcpy (&t->tr, ct, sizeof (cnat_translation_t));
76 format_cnat_trace (u8 *s, va_list *args)
78 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
79 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
80 cnat_trace_element_t *t = va_arg (*args, cnat_trace_element_t *);
81 u32 indent = format_get_indent (s);
82 vnet_main_t *vnm = vnet_get_main ();
84 if (t->flags & CNAT_TRACE_SESSION_CREATED)
85 s = format (s, "created session");
86 else if (t->flags & CNAT_TRACE_SESSION_FOUND)
87 s = format (s, "found session");
89 s = format (s, "session not found");
91 if (t->flags & (CNAT_TRACE_NO_NAT))
92 s = format (s, " [policy:skip]");
94 s = format (s, "\n%Uin:%U out:%U ", format_white_space, indent,
95 format_vnet_sw_if_index_name, vnm, t->sw_if_index[VLIB_RX],
96 format_vnet_sw_if_index_name, vnm, t->sw_if_index[VLIB_TX]);
98 if (t->flags & (CNAT_TRACE_SESSION_CREATED | CNAT_TRACE_SESSION_FOUND))
99 s = format (s, "\n%U%U", format_white_space, indent, format_cnat_session,
102 if (t->flags & CNAT_TRACE_TRANSLATION_FOUND)
103 s = format (s, "\n%Utranslation: %U", format_white_space, indent,
104 format_cnat_translation, &t->tr, 0);
109 static_always_inline u8
110 icmp_type_is_error_message (u8 icmp_type)
114 case ICMP4_destination_unreachable:
115 case ICMP4_time_exceeded:
116 case ICMP4_parameter_problem:
117 case ICMP4_source_quench:
119 case ICMP4_alternate_host_address:
125 static_always_inline u8
126 icmp_type_is_echo (u8 icmp_type)
130 case ICMP4_echo_request:
131 case ICMP4_echo_reply:
137 static_always_inline u8
138 icmp6_type_is_echo (u8 icmp_type)
142 case ICMP6_echo_request:
143 case ICMP6_echo_reply:
149 static_always_inline u8
150 icmp6_type_is_error_message (u8 icmp_type)
154 case ICMP6_destination_unreachable:
155 case ICMP6_time_exceeded:
156 case ICMP6_parameter_problem:
162 static_always_inline u8
163 cmp_ip6_address (const ip6_address_t * a1, const ip6_address_t * a2)
165 return ((a1->as_u64[0] == a2->as_u64[0])
166 && (a1->as_u64[1] == a2->as_u64[1]));
170 * Inline translation functions
173 static_always_inline u16
174 ip4_pseudo_header_cksum2 (ip4_header_t *ip4, ip4_address_t address[VLIB_N_DIR])
176 ip4_psh_t psh = { 0 };
177 psh.src = address[VLIB_RX];
178 psh.dst = address[VLIB_TX];
179 psh.proto = ip4->protocol;
180 psh.l4len = clib_host_to_net_u16 (clib_net_to_host_u16 (ip4->length) -
181 sizeof (ip4_header_t));
182 return ~(clib_ip_csum ((u8 *) &psh, sizeof (ip4_psh_t)));
185 static_always_inline void
186 cnat_ip4_translate_l4 (ip4_header_t *ip4, udp_header_t *udp, ip_csum_t *sum,
187 ip4_address_t new_addr[VLIB_N_DIR],
188 u16 new_port[VLIB_N_DIR], u32 oflags)
190 u16 old_port[VLIB_N_DIR];
191 old_port[VLIB_TX] = udp->dst_port;
192 old_port[VLIB_RX] = udp->src_port;
194 udp->dst_port = new_port[VLIB_TX];
195 udp->src_port = new_port[VLIB_RX];
198 (VNET_BUFFER_OFFLOAD_F_TCP_CKSUM | VNET_BUFFER_OFFLOAD_F_UDP_CKSUM))
200 *sum = ip4_pseudo_header_cksum2 (ip4, new_addr);
204 *sum = ip_csum_update (*sum, ip4->dst_address.as_u32,
205 new_addr[VLIB_TX].as_u32, ip4_header_t, dst_address);
206 *sum = ip_csum_update (*sum, ip4->src_address.as_u32,
207 new_addr[VLIB_RX].as_u32, ip4_header_t, src_address);
209 *sum = ip_csum_update (*sum, old_port[VLIB_TX], new_port[VLIB_TX],
210 udp_header_t, dst_port);
211 *sum = ip_csum_update (*sum, old_port[VLIB_RX], new_port[VLIB_RX],
212 udp_header_t, src_port);
215 static_always_inline void
216 cnat_ip4_translate_sctp (ip4_header_t *ip4, sctp_header_t *sctp,
217 u16 new_port[VLIB_N_DIR])
219 /* Fastpath no checksum */
220 if (PREDICT_TRUE (0 == sctp->checksum))
222 sctp->dst_port = new_port[VLIB_TX];
223 sctp->src_port = new_port[VLIB_RX];
227 if (new_port[VLIB_TX])
228 sctp->dst_port = new_port[VLIB_TX];
229 if (new_port[VLIB_RX])
230 sctp->src_port = new_port[VLIB_RX];
233 sctp->checksum = clib_host_to_little_u32 (~clib_crc32c_with_init (
234 (u8 *) sctp, ntohs (ip4->length) - sizeof (ip4_header_t),
235 ~0 /* init value */));
238 static_always_inline void
239 cnat_ip4_translate_l3 (ip4_header_t *ip4, ip4_address_t new_addr[VLIB_N_DIR],
242 ip4_address_t old_addr[VLIB_N_DIR];
244 old_addr[VLIB_TX] = ip4->dst_address;
245 old_addr[VLIB_RX] = ip4->src_address;
247 ip4->dst_address = new_addr[VLIB_TX];
248 ip4->src_address = new_addr[VLIB_RX];
250 // We always compute the IP checksum even if oflags &
251 // VNET_BUFFER_OFFLOAD_F_IP_CKSUM is set as this is relatively inexpensive
252 // and will allow avoiding issues in driver that do not behave properly
255 sum = ip_csum_update (sum, old_addr[VLIB_TX].as_u32,
256 new_addr[VLIB_TX].as_u32, ip4_header_t, dst_address);
257 sum = ip_csum_update (sum, old_addr[VLIB_RX].as_u32,
258 new_addr[VLIB_RX].as_u32, ip4_header_t, src_address);
259 ip4->checksum = ip_csum_fold (sum);
262 static_always_inline void
263 cnat_tcp_update_session_lifetime (tcp_header_t * tcp, u32 index)
265 cnat_main_t *cm = &cnat_main;
266 if (PREDICT_FALSE (tcp_fin (tcp)))
267 cnat_timestamp_set_lifetime (index, CNAT_DEFAULT_TCP_RST_TIMEOUT);
269 if (PREDICT_FALSE (tcp_rst (tcp)))
270 cnat_timestamp_set_lifetime (index, CNAT_DEFAULT_TCP_RST_TIMEOUT);
272 if (PREDICT_FALSE (tcp_syn (tcp) && tcp_ack (tcp)))
273 cnat_timestamp_set_lifetime (index, cm->tcp_max_age);
276 static_always_inline void
277 cnat_translation_icmp4_echo (ip4_header_t *ip4, icmp46_header_t *icmp,
278 ip4_address_t new_addr[VLIB_N_DIR],
279 u16 new_port[VLIB_N_DIR], u32 oflags)
283 cnat_echo_header_t *echo = (cnat_echo_header_t *) (icmp + 1);
285 cnat_ip4_translate_l3 (ip4, new_addr, oflags);
286 old_port = echo->identifier;
287 echo->identifier = new_port[VLIB_RX];
289 sum = icmp->checksum;
291 ip_csum_update (sum, old_port, new_port[VLIB_RX], udp_header_t, src_port);
293 icmp->checksum = ip_csum_fold (sum);
296 static_always_inline void
297 cnat_translation_icmp4_error (ip4_header_t *outer_ip4, icmp46_header_t *icmp,
298 ip4_address_t outer_new_addr[VLIB_N_DIR],
299 u16 outer_new_port[VLIB_N_DIR], u8 snat_outer_ip,
302 ip4_address_t new_addr[VLIB_N_DIR];
303 ip4_address_t old_addr[VLIB_N_DIR];
304 u16 new_port[VLIB_N_DIR];
305 u16 old_port[VLIB_N_DIR];
306 ip_csum_t sum, old_ip_sum, inner_l4_sum, inner_l4_old_sum;
308 ip4_header_t *ip4 = (ip4_header_t *) (icmp + 2);
309 udp_header_t *udp = (udp_header_t *) (ip4 + 1);
310 tcp_header_t *tcp = (tcp_header_t *) udp;
312 /* Swap inner ports */
313 new_addr[VLIB_TX] = outer_new_addr[VLIB_RX];
314 new_addr[VLIB_RX] = outer_new_addr[VLIB_TX];
315 new_port[VLIB_TX] = outer_new_port[VLIB_RX];
316 new_port[VLIB_RX] = outer_new_port[VLIB_TX];
318 old_addr[VLIB_TX] = ip4->dst_address;
319 old_addr[VLIB_RX] = ip4->src_address;
320 old_port[VLIB_RX] = udp->src_port;
321 old_port[VLIB_TX] = udp->dst_port;
323 sum = icmp->checksum;
324 old_ip_sum = ip4->checksum;
326 /* translate outer ip. */
328 outer_new_addr[VLIB_RX] = outer_ip4->src_address;
329 cnat_ip4_translate_l3 (outer_ip4, outer_new_addr, oflags);
331 if (ip4->protocol == IP_PROTOCOL_TCP)
333 inner_l4_old_sum = inner_l4_sum = tcp->checksum;
334 cnat_ip4_translate_l4 (ip4, udp, &inner_l4_sum, new_addr, new_port,
336 tcp->checksum = ip_csum_fold (inner_l4_sum);
338 else if (ip4->protocol == IP_PROTOCOL_UDP)
340 inner_l4_old_sum = inner_l4_sum = udp->checksum;
341 cnat_ip4_translate_l4 (ip4, udp, &inner_l4_sum, new_addr, new_port,
343 udp->checksum = ip_csum_fold (inner_l4_sum);
348 /* UDP/TCP checksum changed */
349 sum = ip_csum_update (sum, inner_l4_old_sum, inner_l4_sum,
350 ip4_header_t, checksum);
352 /* UDP/TCP Ports changed */
353 if (old_port[VLIB_TX] && new_port[VLIB_TX])
354 sum = ip_csum_update (sum, old_port[VLIB_TX], new_port[VLIB_TX],
355 udp_header_t, dst_port);
357 if (old_port[VLIB_RX] && new_port[VLIB_RX])
358 sum = ip_csum_update (sum, old_port[VLIB_RX], new_port[VLIB_RX],
359 udp_header_t, src_port);
361 cnat_ip4_translate_l3 (ip4, new_addr, 0 /* oflags */);
362 ip_csum_t new_ip_sum = ip4->checksum;
363 /* IP checksum changed */
364 sum = ip_csum_update (sum, old_ip_sum, new_ip_sum, ip4_header_t, checksum);
366 /* IP src/dst addr changed */
367 sum = ip_csum_update (sum, old_addr[VLIB_TX].as_u32,
368 new_addr[VLIB_TX].as_u32, ip4_header_t, dst_address);
370 sum = ip_csum_update (sum, old_addr[VLIB_RX].as_u32,
371 new_addr[VLIB_RX].as_u32, ip4_header_t, src_address);
373 icmp->checksum = ip_csum_fold (sum);
376 static_always_inline void
377 cnat_translation_ip4 (const cnat_session_t *session, ip4_header_t *ip4,
378 udp_header_t *udp, u32 oflags)
380 tcp_header_t *tcp = (tcp_header_t *) udp;
381 ip4_address_t new_addr[VLIB_N_DIR];
382 u16 new_port[VLIB_N_DIR];
384 new_addr[VLIB_TX] = session->value.cs_ip[VLIB_TX].ip4;
385 new_addr[VLIB_RX] = session->value.cs_ip[VLIB_RX].ip4;
386 new_port[VLIB_TX] = session->value.cs_port[VLIB_TX];
387 new_port[VLIB_RX] = session->value.cs_port[VLIB_RX];
389 if (ip4->protocol == IP_PROTOCOL_TCP)
391 ip_csum_t sum = tcp->checksum;
392 cnat_ip4_translate_l4 (ip4, udp, &sum, new_addr, new_port, oflags);
393 tcp->checksum = ip_csum_fold (sum);
394 cnat_ip4_translate_l3 (ip4, new_addr, oflags);
395 cnat_tcp_update_session_lifetime (tcp, session->value.cs_ts_index);
397 else if (ip4->protocol == IP_PROTOCOL_UDP)
399 ip_csum_t sum = udp->checksum;
400 cnat_ip4_translate_l4 (ip4, udp, &sum, new_addr, new_port, oflags);
401 udp->checksum = ip_csum_fold (sum);
402 cnat_ip4_translate_l3 (ip4, new_addr, oflags);
404 else if (ip4->protocol == IP_PROTOCOL_SCTP)
406 sctp_header_t *sctp = (sctp_header_t *) udp;
407 cnat_ip4_translate_sctp (ip4, sctp, new_port);
408 cnat_ip4_translate_l3 (ip4, new_addr, oflags);
410 else if (ip4->protocol == IP_PROTOCOL_ICMP)
412 icmp46_header_t *icmp = (icmp46_header_t *) udp;
413 if (icmp_type_is_error_message (icmp->type))
415 /* SNAT only if src_addr was translated */
417 (ip4->src_address.as_u32 ==
418 session->key.cs_ip[VLIB_RX].ip4.as_u32);
419 cnat_translation_icmp4_error (ip4, icmp, new_addr, new_port,
420 snat_outer_ip, oflags);
422 else if (icmp_type_is_echo (icmp->type))
423 cnat_translation_icmp4_echo (ip4, icmp, new_addr, new_port, oflags);
427 static_always_inline void
428 cnat_ip6_translate_l3 (ip6_header_t * ip6, ip6_address_t new_addr[VLIB_N_DIR])
430 ip6_address_copy (&ip6->dst_address, &new_addr[VLIB_TX]);
431 ip6_address_copy (&ip6->src_address, &new_addr[VLIB_RX]);
434 static_always_inline u16
435 ip6_pseudo_header_cksum2 (ip6_header_t *ip6, ip6_address_t address[VLIB_N_DIR])
437 ip6_psh_t psh = { 0 };
438 psh.src = address[VLIB_RX];
439 psh.dst = address[VLIB_TX];
440 psh.l4len = ip6->payload_length;
441 psh.proto = clib_host_to_net_u32 ((u32) ip6->protocol);
442 return ~(clib_ip_csum ((u8 *) &psh, sizeof (ip6_psh_t)));
445 static_always_inline void
446 cnat_ip6_translate_l4 (ip6_header_t *ip6, udp_header_t *udp, ip_csum_t *sum,
447 ip6_address_t new_addr[VLIB_N_DIR],
448 u16 new_port[VLIB_N_DIR], u32 oflags)
450 u16 old_port[VLIB_N_DIR];
451 old_port[VLIB_TX] = udp->dst_port;
452 old_port[VLIB_RX] = udp->src_port;
454 udp->dst_port = new_port[VLIB_TX];
455 udp->src_port = new_port[VLIB_RX];
458 (VNET_BUFFER_OFFLOAD_F_TCP_CKSUM | VNET_BUFFER_OFFLOAD_F_UDP_CKSUM))
460 *sum = ip6_pseudo_header_cksum2 (ip6, new_addr);
464 *sum = ip_csum_add_even (*sum, new_addr[VLIB_TX].as_u64[0]);
465 *sum = ip_csum_add_even (*sum, new_addr[VLIB_TX].as_u64[1]);
466 *sum = ip_csum_sub_even (*sum, ip6->dst_address.as_u64[0]);
467 *sum = ip_csum_sub_even (*sum, ip6->dst_address.as_u64[1]);
469 *sum = ip_csum_add_even (*sum, new_addr[VLIB_RX].as_u64[0]);
470 *sum = ip_csum_add_even (*sum, new_addr[VLIB_RX].as_u64[1]);
471 *sum = ip_csum_sub_even (*sum, ip6->src_address.as_u64[0]);
472 *sum = ip_csum_sub_even (*sum, ip6->src_address.as_u64[1]);
474 *sum = ip_csum_update (*sum, old_port[VLIB_TX], new_port[VLIB_TX],
475 udp_header_t, dst_port);
477 *sum = ip_csum_update (*sum, old_port[VLIB_RX], new_port[VLIB_RX],
478 udp_header_t, src_port);
481 static_always_inline void
482 cnat_translation_icmp6_echo (ip6_header_t * ip6, icmp46_header_t * icmp,
483 ip6_address_t new_addr[VLIB_N_DIR],
484 u16 new_port[VLIB_N_DIR])
486 cnat_echo_header_t *echo = (cnat_echo_header_t *) (icmp + 1);
487 ip6_address_t old_addr[VLIB_N_DIR];
490 old_port = echo->identifier;
491 ip6_address_copy (&old_addr[VLIB_TX], &ip6->dst_address);
492 ip6_address_copy (&old_addr[VLIB_RX], &ip6->src_address);
494 sum = icmp->checksum;
496 cnat_ip6_translate_l3 (ip6, new_addr);
498 sum = ip_csum_add_even (sum, new_addr[VLIB_TX].as_u64[0]);
499 sum = ip_csum_add_even (sum, new_addr[VLIB_TX].as_u64[1]);
500 sum = ip_csum_sub_even (sum, old_addr[VLIB_TX].as_u64[0]);
501 sum = ip_csum_sub_even (sum, old_addr[VLIB_TX].as_u64[1]);
503 sum = ip_csum_add_even (sum, new_addr[VLIB_RX].as_u64[0]);
504 sum = ip_csum_add_even (sum, new_addr[VLIB_RX].as_u64[1]);
505 sum = ip_csum_sub_even (sum, old_addr[VLIB_RX].as_u64[0]);
506 sum = ip_csum_sub_even (sum, old_addr[VLIB_RX].as_u64[1]);
508 echo->identifier = new_port[VLIB_RX];
510 ip_csum_update (sum, old_port, new_port[VLIB_RX], udp_header_t, src_port);
512 icmp->checksum = ip_csum_fold (sum);
515 static_always_inline void
516 cnat_translation_icmp6_error (ip6_header_t * outer_ip6,
517 icmp46_header_t * icmp,
518 ip6_address_t outer_new_addr[VLIB_N_DIR],
519 u16 outer_new_port[VLIB_N_DIR],
522 ip6_address_t new_addr[VLIB_N_DIR];
523 ip6_address_t old_addr[VLIB_N_DIR];
524 ip6_address_t outer_old_addr[VLIB_N_DIR];
525 u16 new_port[VLIB_N_DIR];
526 u16 old_port[VLIB_N_DIR];
527 ip_csum_t sum, inner_l4_sum, inner_l4_old_sum;
529 if (!icmp6_type_is_error_message (icmp->type))
532 ip6_header_t *ip6 = (ip6_header_t *) (icmp + 2);
533 udp_header_t *udp = (udp_header_t *) (ip6 + 1);
534 tcp_header_t *tcp = (tcp_header_t *) udp;
536 /* Swap inner ports */
537 ip6_address_copy (&new_addr[VLIB_RX], &outer_new_addr[VLIB_TX]);
538 ip6_address_copy (&new_addr[VLIB_TX], &outer_new_addr[VLIB_RX]);
539 new_port[VLIB_TX] = outer_new_port[VLIB_RX];
540 new_port[VLIB_RX] = outer_new_port[VLIB_TX];
542 ip6_address_copy (&old_addr[VLIB_TX], &ip6->dst_address);
543 ip6_address_copy (&old_addr[VLIB_RX], &ip6->src_address);
544 old_port[VLIB_RX] = udp->src_port;
545 old_port[VLIB_TX] = udp->dst_port;
547 sum = icmp->checksum;
548 /* Translate outer ip */
549 ip6_address_copy (&outer_old_addr[VLIB_TX], &outer_ip6->dst_address);
550 ip6_address_copy (&outer_old_addr[VLIB_RX], &outer_ip6->src_address);
552 ip6_address_copy (&outer_new_addr[VLIB_RX], &outer_ip6->src_address);
553 cnat_ip6_translate_l3 (outer_ip6, outer_new_addr);
555 sum = ip_csum_add_even (sum, outer_new_addr[VLIB_TX].as_u64[0]);
556 sum = ip_csum_add_even (sum, outer_new_addr[VLIB_TX].as_u64[1]);
557 sum = ip_csum_sub_even (sum, outer_old_addr[VLIB_TX].as_u64[0]);
558 sum = ip_csum_sub_even (sum, outer_old_addr[VLIB_TX].as_u64[1]);
560 sum = ip_csum_add_even (sum, outer_new_addr[VLIB_RX].as_u64[0]);
561 sum = ip_csum_add_even (sum, outer_new_addr[VLIB_RX].as_u64[1]);
562 sum = ip_csum_sub_even (sum, outer_old_addr[VLIB_RX].as_u64[0]);
563 sum = ip_csum_sub_even (sum, outer_old_addr[VLIB_RX].as_u64[1]);
565 /* Translate inner TCP / UDP */
566 if (ip6->protocol == IP_PROTOCOL_TCP)
568 inner_l4_old_sum = inner_l4_sum = tcp->checksum;
569 cnat_ip6_translate_l4 (ip6, udp, &inner_l4_sum, new_addr, new_port,
571 tcp->checksum = ip_csum_fold (inner_l4_sum);
573 else if (ip6->protocol == IP_PROTOCOL_UDP)
575 inner_l4_old_sum = inner_l4_sum = udp->checksum;
576 cnat_ip6_translate_l4 (ip6, udp, &inner_l4_sum, new_addr, new_port,
578 udp->checksum = ip_csum_fold (inner_l4_sum);
583 /* UDP/TCP checksum changed */
584 sum = ip_csum_update (sum, inner_l4_old_sum, inner_l4_sum, ip4_header_t,
587 /* UDP/TCP Ports changed */
588 sum = ip_csum_update (sum, old_port[VLIB_TX], new_port[VLIB_TX],
589 udp_header_t, dst_port);
591 sum = ip_csum_update (sum, old_port[VLIB_RX], new_port[VLIB_RX],
592 udp_header_t, src_port);
594 cnat_ip6_translate_l3 (ip6, new_addr);
595 /* IP src/dst addr changed */
596 sum = ip_csum_add_even (sum, new_addr[VLIB_TX].as_u64[0]);
597 sum = ip_csum_add_even (sum, new_addr[VLIB_TX].as_u64[1]);
598 sum = ip_csum_sub_even (sum, old_addr[VLIB_TX].as_u64[0]);
599 sum = ip_csum_sub_even (sum, old_addr[VLIB_TX].as_u64[1]);
601 sum = ip_csum_add_even (sum, new_addr[VLIB_RX].as_u64[0]);
602 sum = ip_csum_add_even (sum, new_addr[VLIB_RX].as_u64[1]);
603 sum = ip_csum_sub_even (sum, old_addr[VLIB_RX].as_u64[0]);
604 sum = ip_csum_sub_even (sum, old_addr[VLIB_RX].as_u64[1]);
606 icmp->checksum = ip_csum_fold (sum);
609 static_always_inline void
610 cnat_translation_ip6 (const cnat_session_t *session, ip6_header_t *ip6,
611 udp_header_t *udp, u32 oflags)
613 tcp_header_t *tcp = (tcp_header_t *) udp;
614 ip6_address_t new_addr[VLIB_N_DIR];
615 u16 new_port[VLIB_N_DIR];
617 ip6_address_copy (&new_addr[VLIB_TX], &session->value.cs_ip[VLIB_TX].ip6);
618 ip6_address_copy (&new_addr[VLIB_RX], &session->value.cs_ip[VLIB_RX].ip6);
619 new_port[VLIB_TX] = session->value.cs_port[VLIB_TX];
620 new_port[VLIB_RX] = session->value.cs_port[VLIB_RX];
622 if (ip6->protocol == IP_PROTOCOL_TCP)
624 ip_csum_t sum = tcp->checksum;
625 cnat_ip6_translate_l4 (ip6, udp, &sum, new_addr, new_port, oflags);
626 tcp->checksum = ip_csum_fold (sum);
627 cnat_ip6_translate_l3 (ip6, new_addr);
628 cnat_tcp_update_session_lifetime (tcp, session->value.cs_ts_index);
630 else if (ip6->protocol == IP_PROTOCOL_UDP)
632 ip_csum_t sum = udp->checksum;
633 cnat_ip6_translate_l4 (ip6, udp, &sum, new_addr, new_port, oflags);
634 udp->checksum = ip_csum_fold (sum);
635 cnat_ip6_translate_l3 (ip6, new_addr);
637 else if (ip6->protocol == IP_PROTOCOL_ICMP6)
639 icmp46_header_t *icmp = (icmp46_header_t *) udp;
640 if (icmp6_type_is_error_message (icmp->type))
642 /* SNAT only if src_addr was translated */
643 u8 snat_outer_ip = cmp_ip6_address (&ip6->src_address,
646 cnat_translation_icmp6_error (ip6, icmp, new_addr, new_port,
649 else if (icmp6_type_is_echo (icmp->type))
650 cnat_translation_icmp6_echo (ip6, icmp, new_addr, new_port);
654 static_always_inline void
655 cnat_session_make_key (vlib_buffer_t *b, ip_address_family_t af,
656 cnat_session_location_t cs_loc, cnat_bihash_kv_t *bkey)
659 cnat_session_t *session = (cnat_session_t *) bkey;
661 session->key.cs_af = af;
663 session->key.cs_loc = cs_loc;
664 session->key.__cs_pad = 0;
665 if (cs_loc == CNAT_LOCATION_OUTPUT)
667 iph_offset = vnet_buffer (b)->ip.save_rewrite_length;
672 ip4 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b) + iph_offset);
674 if (PREDICT_FALSE (ip4->protocol == IP_PROTOCOL_ICMP))
676 icmp46_header_t *icmp = (icmp46_header_t *) (ip4 + 1);
677 if (icmp_type_is_error_message (icmp->type))
679 ip4 = (ip4_header_t *) (icmp + 2); /* Use inner packet */
680 udp = (udp_header_t *) (ip4 + 1);
681 /* Swap dst & src for search as ICMP payload is reversed */
682 ip46_address_set_ip4 (&session->key.cs_ip[VLIB_RX],
684 ip46_address_set_ip4 (&session->key.cs_ip[VLIB_TX],
686 session->key.cs_proto = ip4->protocol;
687 session->key.cs_port[VLIB_TX] = udp->src_port;
688 session->key.cs_port[VLIB_RX] = udp->dst_port;
690 else if (icmp_type_is_echo (icmp->type))
692 cnat_echo_header_t *echo = (cnat_echo_header_t *) (icmp + 1);
693 ip46_address_set_ip4 (&session->key.cs_ip[VLIB_TX],
695 ip46_address_set_ip4 (&session->key.cs_ip[VLIB_RX],
697 session->key.cs_proto = ip4->protocol;
698 session->key.cs_port[VLIB_TX] = echo->identifier;
699 session->key.cs_port[VLIB_RX] = echo->identifier;
704 else if (ip4->protocol == IP_PROTOCOL_UDP ||
705 ip4->protocol == IP_PROTOCOL_TCP)
707 udp = (udp_header_t *) (ip4 + 1);
708 ip46_address_set_ip4 (&session->key.cs_ip[VLIB_TX],
710 ip46_address_set_ip4 (&session->key.cs_ip[VLIB_RX],
712 session->key.cs_proto = ip4->protocol;
713 session->key.cs_port[VLIB_RX] = udp->src_port;
714 session->key.cs_port[VLIB_TX] = udp->dst_port;
716 else if (ip4->protocol == IP_PROTOCOL_SCTP)
719 sctp = (sctp_header_t *) (ip4 + 1);
720 ip46_address_set_ip4 (&session->key.cs_ip[VLIB_TX],
722 ip46_address_set_ip4 (&session->key.cs_ip[VLIB_RX],
724 session->key.cs_proto = ip4->protocol;
725 session->key.cs_port[VLIB_RX] = sctp->src_port;
726 session->key.cs_port[VLIB_TX] = sctp->dst_port;
734 ip6 = (ip6_header_t *) ((u8 *) vlib_buffer_get_current (b) + iph_offset);
735 if (PREDICT_FALSE (ip6->protocol == IP_PROTOCOL_ICMP6))
737 icmp46_header_t *icmp = (icmp46_header_t *) (ip6 + 1);
738 if (icmp6_type_is_error_message (icmp->type))
740 ip6 = (ip6_header_t *) (icmp + 2); /* Use inner packet */
741 udp = (udp_header_t *) (ip6 + 1);
742 /* Swap dst & src for search as ICMP payload is reversed */
743 ip46_address_set_ip6 (&session->key.cs_ip[VLIB_RX],
745 ip46_address_set_ip6 (&session->key.cs_ip[VLIB_TX],
747 session->key.cs_proto = ip6->protocol;
748 session->key.cs_port[VLIB_TX] = udp->src_port;
749 session->key.cs_port[VLIB_RX] = udp->dst_port;
751 else if (icmp6_type_is_echo (icmp->type))
753 cnat_echo_header_t *echo = (cnat_echo_header_t *) (icmp + 1);
754 ip46_address_set_ip6 (&session->key.cs_ip[VLIB_TX],
756 ip46_address_set_ip6 (&session->key.cs_ip[VLIB_RX],
758 session->key.cs_proto = ip6->protocol;
759 session->key.cs_port[VLIB_TX] = echo->identifier;
760 session->key.cs_port[VLIB_RX] = echo->identifier;
765 else if (ip6->protocol == IP_PROTOCOL_UDP ||
766 ip6->protocol == IP_PROTOCOL_TCP)
768 udp = (udp_header_t *) (ip6 + 1);
769 ip46_address_set_ip6 (&session->key.cs_ip[VLIB_TX],
771 ip46_address_set_ip6 (&session->key.cs_ip[VLIB_RX],
773 session->key.cs_port[VLIB_RX] = udp->src_port;
774 session->key.cs_port[VLIB_TX] = udp->dst_port;
775 session->key.cs_proto = ip6->protocol;
783 /* Ensure we dont find anything */
784 session->key.cs_proto = 0;
788 static_always_inline cnat_ep_trk_t *
789 cnat_load_balance (const cnat_translation_t *ct, ip_address_family_t af,
790 ip4_header_t *ip4, ip6_header_t *ip6, u32 *dpoi_index)
792 cnat_main_t *cm = &cnat_main;
793 const load_balance_t *lb0;
794 const dpo_id_t *dpo0;
795 u32 hash_c0, bucket0;
797 lb0 = load_balance_get (ct->ct_lb.dpoi_index);
798 if (PREDICT_FALSE (!lb0->lb_n_buckets))
801 /* session table miss */
802 hash_c0 = (AF_IP4 == af ? ip4_compute_flow_hash (ip4, lb0->lb_hash_config) :
803 ip6_compute_flow_hash (ip6, lb0->lb_hash_config));
805 if (PREDICT_FALSE (ct->lb_type == CNAT_LB_MAGLEV))
806 bucket0 = ct->lb_maglev[hash_c0 % cm->maglev_len];
808 bucket0 = hash_c0 % lb0->lb_n_buckets;
810 dpo0 = load_balance_get_fwd_bucket (lb0, bucket0);
812 *dpoi_index = dpo0->dpoi_index;
814 return &ct->ct_active_paths[bucket0];
818 * Create NAT sessions
819 * rsession_location is the location the (return) session will be
822 static_always_inline void
823 cnat_session_create (cnat_session_t *session, cnat_node_ctx_t *ctx,
824 cnat_session_location_t rsession_location,
828 cnat_bihash_kv_t rkey;
829 cnat_session_t *rsession = (cnat_session_t *) & rkey;
830 cnat_bihash_kv_t *bkey = (cnat_bihash_kv_t *) session;
831 int rv, n_retries = 0;
832 static u32 sport_seed = 0;
834 session->value.cs_ts_index = cnat_timestamp_new (ctx->now);
836 /* First create the return session */
837 ip46_address_copy (&rsession->key.cs_ip[VLIB_RX],
838 &session->value.cs_ip[VLIB_TX]);
839 ip46_address_copy (&rsession->key.cs_ip[VLIB_TX],
840 &session->value.cs_ip[VLIB_RX]);
841 rsession->key.cs_proto = session->key.cs_proto;
842 rsession->key.cs_loc = rsession_location;
843 rsession->key.__cs_pad = 0;
844 rsession->key.cs_af = ctx->af;
845 rsession->key.cs_port[VLIB_RX] = session->value.cs_port[VLIB_TX];
846 rsession->key.cs_port[VLIB_TX] = session->value.cs_port[VLIB_RX];
848 ip46_address_copy (&rsession->value.cs_ip[VLIB_RX],
849 &session->key.cs_ip[VLIB_TX]);
850 ip46_address_copy (&rsession->value.cs_ip[VLIB_TX],
851 &session->key.cs_ip[VLIB_RX]);
852 rsession->value.cs_ts_index = session->value.cs_ts_index;
853 rsession->value.cs_lbi = INDEX_INVALID;
854 rsession->value.flags = rsession_flags | CNAT_SESSION_IS_RETURN;
855 rsession->value.cs_port[VLIB_TX] = session->key.cs_port[VLIB_RX];
856 rsession->value.cs_port[VLIB_RX] = session->key.cs_port[VLIB_TX];
859 rv = cnat_bihash_add_del (&cnat_session_db, &rkey,
860 2 /* add but don't overwrite */);
863 if (!(rsession_flags & CNAT_SESSION_RETRY_SNAT))
866 /* return session add failed pick an new random src port */
867 rsession->value.cs_port[VLIB_TX] = session->key.cs_port[VLIB_RX] =
868 random_u32 (&sport_seed);
869 if (n_retries++ < 100)
870 goto retry_add_ression;
873 clib_warning ("Could not find a free port after 100 tries");
874 /* translate this packet, but don't create state */
879 cnat_bihash_add_del (&cnat_session_db, bkey, 1 /* add */);
881 if (!(rsession_flags & CNAT_SESSION_FLAG_NO_CLIENT))
883 /* is this the first time we've seen this source address */
884 cc = (AF_IP4 == ctx->af ?
885 cnat_client_ip4_find (&session->value.cs_ip[VLIB_RX].ip4) :
886 cnat_client_ip6_find (&session->value.cs_ip[VLIB_RX].ip6));
894 addr.version = ctx->af;
895 ip46_address_copy (&addr.ip, &session->value.cs_ip[VLIB_RX]);
898 clib_spinlock_lock (&cnat_client_db.throttle_lock);
900 p = hash_get_mem (cnat_client_db.throttle_mem, &addr);
904 hash_set_mem (cnat_client_db.throttle_mem, &addr, refcnt);
907 hash_set_mem_alloc (&cnat_client_db.throttle_mem, &addr, 0);
909 clib_spinlock_unlock (&cnat_client_db.throttle_lock);
911 /* fire client create to the main thread */
913 vl_api_rpc_call_main_thread (cnat_client_learn, (u8 *) &addr,
918 /* Refcount reverse session */
919 cnat_client_cnt_session (cc);
926 cnat_node_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
927 vlib_frame_t *frame, cnat_node_sub_t cnat_sub,
928 ip_address_family_t af, cnat_session_location_t cs_loc,
931 u32 n_left, *from, thread_index;
932 vlib_buffer_t *bufs[VLIB_FRAME_SIZE];
933 vlib_buffer_t **b = bufs;
934 u16 nexts[VLIB_FRAME_SIZE], *next;
937 thread_index = vm->thread_index;
938 from = vlib_frame_vector_args (frame);
939 n_left = frame->n_vectors;
941 vlib_get_buffers (vm, from, bufs, n_left);
942 now = vlib_time_now (vm);
943 cnat_session_t *session[4];
944 cnat_bihash_kv_t bkey[4], bvalue[4];
948 cnat_node_ctx_t ctx = { now, thread_index, af, do_trace };
952 /* Kickstart our state */
953 cnat_session_make_key (b[3], af, cs_loc, &bkey[3]);
954 cnat_session_make_key (b[2], af, cs_loc, &bkey[2]);
955 cnat_session_make_key (b[1], af, cs_loc, &bkey[1]);
956 cnat_session_make_key (b[0], af, cs_loc, &bkey[0]);
958 hash[3] = cnat_bihash_hash (&bkey[3]);
959 hash[2] = cnat_bihash_hash (&bkey[2]);
960 hash[1] = cnat_bihash_hash (&bkey[1]);
961 hash[0] = cnat_bihash_hash (&bkey[0]);
968 vlib_prefetch_buffer_header (b[11], LOAD);
969 vlib_prefetch_buffer_header (b[10], LOAD);
970 vlib_prefetch_buffer_header (b[9], LOAD);
971 vlib_prefetch_buffer_header (b[8], LOAD);
974 rv[3] = cnat_bihash_search_i2_hash (&cnat_session_db, hash[3], &bkey[3],
976 session[3] = (cnat_session_t *) (rv[3] ? &bkey[3] : &bvalue[3]);
977 next[3] = cnat_sub (vm, node, b[3], &ctx, rv[3], session[3]);
979 rv[2] = cnat_bihash_search_i2_hash (&cnat_session_db, hash[2], &bkey[2],
981 session[2] = (cnat_session_t *) (rv[2] ? &bkey[2] : &bvalue[2]);
982 next[2] = cnat_sub (vm, node, b[2], &ctx, rv[2], session[2]);
984 rv[1] = cnat_bihash_search_i2_hash (&cnat_session_db, hash[1], &bkey[1],
986 session[1] = (cnat_session_t *) (rv[1] ? &bkey[1] : &bvalue[1]);
987 next[1] = cnat_sub (vm, node, b[1], &ctx, rv[1], session[1]);
989 rv[0] = cnat_bihash_search_i2_hash (&cnat_session_db, hash[0], &bkey[0],
991 session[0] = (cnat_session_t *) (rv[0] ? &bkey[0] : &bvalue[0]);
992 next[0] = cnat_sub (vm, node, b[0], &ctx, rv[0], session[0]);
994 cnat_session_make_key (b[7], af, cs_loc, &bkey[3]);
995 cnat_session_make_key (b[6], af, cs_loc, &bkey[2]);
996 cnat_session_make_key (b[5], af, cs_loc, &bkey[1]);
997 cnat_session_make_key (b[4], af, cs_loc, &bkey[0]);
999 hash[3] = cnat_bihash_hash (&bkey[3]);
1000 hash[2] = cnat_bihash_hash (&bkey[2]);
1001 hash[1] = cnat_bihash_hash (&bkey[1]);
1002 hash[0] = cnat_bihash_hash (&bkey[0]);
1004 cnat_bihash_prefetch_bucket (&cnat_session_db, hash[3]);
1005 cnat_bihash_prefetch_bucket (&cnat_session_db, hash[2]);
1006 cnat_bihash_prefetch_bucket (&cnat_session_db, hash[1]);
1007 cnat_bihash_prefetch_bucket (&cnat_session_db, hash[0]);
1009 cnat_bihash_prefetch_data (&cnat_session_db, hash[3]);
1010 cnat_bihash_prefetch_data (&cnat_session_db, hash[2]);
1011 cnat_bihash_prefetch_data (&cnat_session_db, hash[1]);
1012 cnat_bihash_prefetch_data (&cnat_session_db, hash[0]);
1021 cnat_session_make_key (b[0], af, cs_loc, &bkey[0]);
1022 rv[0] = cnat_bihash_search_i2 (&cnat_session_db, &bkey[0], &bvalue[0]);
1024 session[0] = (cnat_session_t *) (rv[0] ? &bkey[0] : &bvalue[0]);
1025 next[0] = cnat_sub (vm, node, b[0], &ctx, rv[0], session[0]);
1032 vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1034 return frame->n_vectors;
1038 * fd.io coding-style-patch-verification: ON
1041 * eval: (c-set-style "gnu")