2 * Copyright (c) 2020 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 #ifndef __CNAT_NODE_H__
17 #define __CNAT_NODE_H__
19 #include <vlibmemory/api.h>
20 #include <vnet/dpo/load_balance.h>
21 #include <vnet/dpo/load_balance_map.h>
23 #include <cnat/cnat_session.h>
24 #include <cnat/cnat_client.h>
25 #include <cnat/cnat_inline.h>
26 #include <cnat/cnat_translation.h>
28 #include <vnet/ip/ip4_inlines.h>
29 #include <vnet/ip/ip6_inlines.h>
31 typedef uword (*cnat_node_sub_t) (vlib_main_t * vm,
32 vlib_node_runtime_t * node,
34 cnat_node_ctx_t * ctx, int rv,
35 cnat_session_t * session);
37 typedef struct cnat_trace_element_t_
39 cnat_session_t session;
40 cnat_translation_t tr;
41 u32 sw_if_index[VLIB_N_RX_TX];
42 u32 snat_policy_result;
44 } cnat_trace_element_t;
46 typedef enum cnat_trace_element_flag_t_
48 CNAT_TRACE_SESSION_FOUND = (1 << 0),
49 CNAT_TRACE_SESSION_CREATED = (1 << 1),
50 CNAT_TRACE_TRANSLATION_FOUND = (1 << 2),
51 CNAT_TRACE_NO_NAT = (1 << 3),
52 } cnat_trace_element_flag_t;
54 static_always_inline void
55 cnat_add_trace (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_buffer_t *b,
56 cnat_session_t *session, const cnat_translation_t *ct,
59 cnat_trace_element_t *t;
61 flags |= CNAT_TRACE_TRANSLATION_FOUND;
63 t = vlib_add_trace (vm, node, b, sizeof (*t));
64 t->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_RX];
65 t->sw_if_index[VLIB_TX] = vnet_buffer (b)->sw_if_index[VLIB_TX];
67 if (flags & (CNAT_TRACE_SESSION_FOUND | CNAT_TRACE_SESSION_CREATED))
68 clib_memcpy (&t->session, session, sizeof (t->session));
69 if (flags & CNAT_TRACE_TRANSLATION_FOUND)
70 clib_memcpy (&t->tr, ct, sizeof (cnat_translation_t));
75 format_cnat_trace (u8 *s, va_list *args)
77 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
78 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
79 cnat_trace_element_t *t = va_arg (*args, cnat_trace_element_t *);
80 u32 indent = format_get_indent (s);
81 vnet_main_t *vnm = vnet_get_main ();
83 if (t->flags & CNAT_TRACE_SESSION_CREATED)
84 s = format (s, "created session");
85 else if (t->flags & CNAT_TRACE_SESSION_FOUND)
86 s = format (s, "found session");
88 s = format (s, "session not found");
90 if (t->flags & (CNAT_TRACE_NO_NAT))
91 s = format (s, " [policy:skip]");
93 s = format (s, "\n%Uin:%U out:%U ", format_white_space, indent,
94 format_vnet_sw_if_index_name, vnm, t->sw_if_index[VLIB_RX],
95 format_vnet_sw_if_index_name, vnm, t->sw_if_index[VLIB_TX]);
97 if (t->flags & (CNAT_TRACE_SESSION_CREATED | CNAT_TRACE_SESSION_FOUND))
98 s = format (s, "\n%U%U", format_white_space, indent, format_cnat_session,
101 if (t->flags & CNAT_TRACE_TRANSLATION_FOUND)
102 s = format (s, "\n%Utranslation: %U", format_white_space, indent,
103 format_cnat_translation, &t->tr, 0);
108 static_always_inline u8
109 icmp_type_is_error_message (u8 icmp_type)
113 case ICMP4_destination_unreachable:
114 case ICMP4_time_exceeded:
115 case ICMP4_parameter_problem:
116 case ICMP4_source_quench:
118 case ICMP4_alternate_host_address:
124 static_always_inline u8
125 icmp_type_is_echo (u8 icmp_type)
129 case ICMP4_echo_request:
130 case ICMP4_echo_reply:
136 static_always_inline u8
137 icmp6_type_is_echo (u8 icmp_type)
141 case ICMP6_echo_request:
142 case ICMP6_echo_reply:
148 static_always_inline u8
149 icmp6_type_is_error_message (u8 icmp_type)
153 case ICMP6_destination_unreachable:
154 case ICMP6_time_exceeded:
155 case ICMP6_parameter_problem:
161 static_always_inline u8
162 cmp_ip6_address (const ip6_address_t * a1, const ip6_address_t * a2)
164 return ((a1->as_u64[0] == a2->as_u64[0])
165 && (a1->as_u64[1] == a2->as_u64[1]));
169 * Inline translation functions
172 static_always_inline u8
173 has_ip6_address (ip6_address_t * a)
175 return ((0 != a->as_u64[0]) || (0 != a->as_u64[1]));
178 static_always_inline void
179 cnat_ip4_translate_l4 (ip4_header_t * ip4, udp_header_t * udp,
181 ip4_address_t new_addr[VLIB_N_DIR],
182 u16 new_port[VLIB_N_DIR])
184 u16 old_port[VLIB_N_DIR];
185 ip4_address_t old_addr[VLIB_N_DIR];
187 /* Fastpath no checksum */
188 if (PREDICT_TRUE (0 == *sum))
190 udp->dst_port = new_port[VLIB_TX];
191 udp->src_port = new_port[VLIB_RX];
195 old_port[VLIB_TX] = udp->dst_port;
196 old_port[VLIB_RX] = udp->src_port;
197 old_addr[VLIB_TX] = ip4->dst_address;
198 old_addr[VLIB_RX] = ip4->src_address;
200 if (new_addr[VLIB_TX].as_u32)
203 ip_csum_update (*sum, old_addr[VLIB_TX].as_u32,
204 new_addr[VLIB_TX].as_u32, ip4_header_t, dst_address);
206 if (new_port[VLIB_TX])
208 udp->dst_port = new_port[VLIB_TX];
209 *sum = ip_csum_update (*sum, old_port[VLIB_TX], new_port[VLIB_TX],
210 ip4_header_t /* cheat */ ,
211 length /* changed member */ );
213 if (new_addr[VLIB_RX].as_u32)
216 ip_csum_update (*sum, old_addr[VLIB_RX].as_u32,
217 new_addr[VLIB_RX].as_u32, ip4_header_t, src_address);
219 if (new_port[VLIB_RX])
221 udp->src_port = new_port[VLIB_RX];
222 *sum = ip_csum_update (*sum, old_port[VLIB_RX], new_port[VLIB_RX],
223 ip4_header_t /* cheat */ ,
224 length /* changed member */ );
228 static_always_inline void
229 cnat_ip4_translate_sctp (ip4_header_t *ip4, sctp_header_t *sctp,
230 u16 new_port[VLIB_N_DIR])
232 /* Fastpath no checksum */
233 if (PREDICT_TRUE (0 == sctp->checksum))
235 sctp->dst_port = new_port[VLIB_TX];
236 sctp->src_port = new_port[VLIB_RX];
240 if (new_port[VLIB_TX])
241 sctp->dst_port = new_port[VLIB_TX];
242 if (new_port[VLIB_RX])
243 sctp->src_port = new_port[VLIB_RX];
246 sctp->checksum = clib_host_to_little_u32 (~clib_crc32c_with_init (
247 (u8 *) sctp, ntohs (ip4->length) - sizeof (ip4_header_t),
248 ~0 /* init value */));
251 static_always_inline void
252 cnat_ip4_translate_l3 (ip4_header_t * ip4, ip4_address_t new_addr[VLIB_N_DIR])
254 ip4_address_t old_addr[VLIB_N_DIR];
257 old_addr[VLIB_TX] = ip4->dst_address;
258 old_addr[VLIB_RX] = ip4->src_address;
261 if (new_addr[VLIB_TX].as_u32)
263 ip4->dst_address = new_addr[VLIB_TX];
265 ip_csum_update (sum, old_addr[VLIB_TX].as_u32,
266 new_addr[VLIB_TX].as_u32, ip4_header_t, dst_address);
268 if (new_addr[VLIB_RX].as_u32)
270 ip4->src_address = new_addr[VLIB_RX];
272 ip_csum_update (sum, old_addr[VLIB_RX].as_u32,
273 new_addr[VLIB_RX].as_u32, ip4_header_t, src_address);
275 ip4->checksum = ip_csum_fold (sum);
278 static_always_inline void
279 cnat_tcp_update_session_lifetime (tcp_header_t * tcp, u32 index)
281 cnat_main_t *cm = &cnat_main;
282 if (PREDICT_FALSE (tcp_fin (tcp)))
284 cnat_timestamp_set_lifetime (index, CNAT_DEFAULT_TCP_RST_TIMEOUT);
287 if (PREDICT_FALSE (tcp_rst (tcp)))
289 cnat_timestamp_set_lifetime (index, CNAT_DEFAULT_TCP_RST_TIMEOUT);
292 if (PREDICT_FALSE (tcp_syn (tcp) && tcp_ack (tcp)))
294 cnat_timestamp_set_lifetime (index, cm->tcp_max_age);
298 static_always_inline void
299 cnat_translation_icmp4_echo (ip4_header_t * ip4, icmp46_header_t * icmp,
300 ip4_address_t new_addr[VLIB_N_DIR],
301 u16 new_port[VLIB_N_DIR])
305 cnat_echo_header_t *echo = (cnat_echo_header_t *) (icmp + 1);
307 cnat_ip4_translate_l3 (ip4, new_addr);
308 old_port = echo->identifier;
309 echo->identifier = new_port[VLIB_RX];
311 sum = icmp->checksum;
312 sum = ip_csum_update (sum, old_port, new_port[VLIB_RX],
313 ip4_header_t /* cheat */ ,
314 length /* changed member */ );
316 icmp->checksum = ip_csum_fold (sum);
319 static_always_inline void
320 cnat_translation_icmp4_error (ip4_header_t * outer_ip4,
321 icmp46_header_t * icmp,
322 ip4_address_t outer_new_addr[VLIB_N_DIR],
323 u16 outer_new_port[VLIB_N_DIR],
326 ip4_address_t new_addr[VLIB_N_DIR];
327 ip4_address_t old_addr[VLIB_N_DIR];
328 u16 new_port[VLIB_N_DIR];
329 u16 old_port[VLIB_N_DIR];
330 ip_csum_t sum, old_ip_sum, inner_l4_sum, inner_l4_old_sum;
332 ip4_header_t *ip4 = (ip4_header_t *) (icmp + 2);
333 udp_header_t *udp = (udp_header_t *) (ip4 + 1);
334 tcp_header_t *tcp = (tcp_header_t *) udp;
336 /* Swap inner ports */
337 new_addr[VLIB_TX] = outer_new_addr[VLIB_RX];
338 new_addr[VLIB_RX] = outer_new_addr[VLIB_TX];
339 new_port[VLIB_TX] = outer_new_port[VLIB_RX];
340 new_port[VLIB_RX] = outer_new_port[VLIB_TX];
342 old_addr[VLIB_TX] = ip4->dst_address;
343 old_addr[VLIB_RX] = ip4->src_address;
344 old_port[VLIB_RX] = udp->src_port;
345 old_port[VLIB_TX] = udp->dst_port;
347 sum = icmp->checksum;
348 old_ip_sum = ip4->checksum;
350 /* translate outer ip. */
352 outer_new_addr[VLIB_RX] = outer_ip4->src_address;
353 cnat_ip4_translate_l3 (outer_ip4, outer_new_addr);
355 if (ip4->protocol == IP_PROTOCOL_TCP)
357 inner_l4_old_sum = inner_l4_sum = tcp->checksum;
358 cnat_ip4_translate_l4 (ip4, udp, &inner_l4_sum, new_addr, new_port);
359 tcp->checksum = ip_csum_fold (inner_l4_sum);
361 else if (ip4->protocol == IP_PROTOCOL_UDP)
363 inner_l4_old_sum = inner_l4_sum = udp->checksum;
364 cnat_ip4_translate_l4 (ip4, udp, &inner_l4_sum, new_addr, new_port);
365 udp->checksum = ip_csum_fold (inner_l4_sum);
370 /* UDP/TCP checksum changed */
371 sum = ip_csum_update (sum, inner_l4_old_sum, inner_l4_sum,
372 ip4_header_t, checksum);
374 /* UDP/TCP Ports changed */
375 if (old_port[VLIB_TX] && new_port[VLIB_TX])
376 sum = ip_csum_update (sum, old_port[VLIB_TX], new_port[VLIB_TX],
377 ip4_header_t /* cheat */ ,
378 length /* changed member */ );
380 if (old_port[VLIB_RX] && new_port[VLIB_RX])
381 sum = ip_csum_update (sum, old_port[VLIB_RX], new_port[VLIB_RX],
382 ip4_header_t /* cheat */ ,
383 length /* changed member */ );
386 cnat_ip4_translate_l3 (ip4, new_addr);
387 ip_csum_t new_ip_sum = ip4->checksum;
388 /* IP checksum changed */
389 sum = ip_csum_update (sum, old_ip_sum, new_ip_sum, ip4_header_t, checksum);
391 /* IP src/dst addr changed */
392 if (new_addr[VLIB_TX].as_u32)
394 ip_csum_update (sum, old_addr[VLIB_TX].as_u32, new_addr[VLIB_TX].as_u32,
395 ip4_header_t, dst_address);
397 if (new_addr[VLIB_RX].as_u32)
399 ip_csum_update (sum, old_addr[VLIB_RX].as_u32, new_addr[VLIB_RX].as_u32,
400 ip4_header_t, src_address);
402 icmp->checksum = ip_csum_fold (sum);
405 static_always_inline void
406 cnat_translation_ip4 (const cnat_session_t * session,
407 ip4_header_t * ip4, udp_header_t * udp)
409 tcp_header_t *tcp = (tcp_header_t *) udp;
410 ip4_address_t new_addr[VLIB_N_DIR];
411 u16 new_port[VLIB_N_DIR];
413 new_addr[VLIB_TX] = session->value.cs_ip[VLIB_TX].ip4;
414 new_addr[VLIB_RX] = session->value.cs_ip[VLIB_RX].ip4;
415 new_port[VLIB_TX] = session->value.cs_port[VLIB_TX];
416 new_port[VLIB_RX] = session->value.cs_port[VLIB_RX];
418 if (ip4->protocol == IP_PROTOCOL_TCP)
420 ip_csum_t sum = tcp->checksum;
421 cnat_ip4_translate_l4 (ip4, udp, &sum, new_addr, new_port);
422 tcp->checksum = ip_csum_fold (sum);
423 cnat_ip4_translate_l3 (ip4, new_addr);
424 cnat_tcp_update_session_lifetime (tcp, session->value.cs_ts_index);
426 else if (ip4->protocol == IP_PROTOCOL_UDP)
428 ip_csum_t sum = udp->checksum;
429 cnat_ip4_translate_l4 (ip4, udp, &sum, new_addr, new_port);
430 udp->checksum = ip_csum_fold (sum);
431 cnat_ip4_translate_l3 (ip4, new_addr);
433 else if (ip4->protocol == IP_PROTOCOL_SCTP)
435 sctp_header_t *sctp = (sctp_header_t *) udp;
436 cnat_ip4_translate_sctp (ip4, sctp, new_port);
437 cnat_ip4_translate_l3 (ip4, new_addr);
439 else if (ip4->protocol == IP_PROTOCOL_ICMP)
441 icmp46_header_t *icmp = (icmp46_header_t *) udp;
442 if (icmp_type_is_error_message (icmp->type))
444 /* SNAT only if src_addr was translated */
446 (ip4->src_address.as_u32 ==
447 session->key.cs_ip[VLIB_RX].ip4.as_u32);
448 cnat_translation_icmp4_error (ip4, icmp, new_addr, new_port,
451 else if (icmp_type_is_echo (icmp->type))
452 cnat_translation_icmp4_echo (ip4, icmp, new_addr, new_port);
456 static_always_inline void
457 cnat_ip6_translate_l3 (ip6_header_t * ip6, ip6_address_t new_addr[VLIB_N_DIR])
459 if (has_ip6_address (&new_addr[VLIB_TX]))
460 ip6_address_copy (&ip6->dst_address, &new_addr[VLIB_TX]);
461 if (has_ip6_address (&new_addr[VLIB_RX]))
462 ip6_address_copy (&ip6->src_address, &new_addr[VLIB_RX]);
465 static_always_inline void
466 cnat_ip6_translate_l4 (ip6_header_t * ip6, udp_header_t * udp,
468 ip6_address_t new_addr[VLIB_N_DIR],
469 u16 new_port[VLIB_N_DIR])
471 u16 old_port[VLIB_N_DIR];
472 ip6_address_t old_addr[VLIB_N_DIR];
474 /* Fastpath no checksum */
475 if (PREDICT_TRUE (0 == *sum))
477 udp->dst_port = new_port[VLIB_TX];
478 udp->src_port = new_port[VLIB_RX];
482 old_port[VLIB_TX] = udp->dst_port;
483 old_port[VLIB_RX] = udp->src_port;
484 ip6_address_copy (&old_addr[VLIB_TX], &ip6->dst_address);
485 ip6_address_copy (&old_addr[VLIB_RX], &ip6->src_address);
487 if (has_ip6_address (&new_addr[VLIB_TX]))
489 *sum = ip_csum_add_even (*sum, new_addr[VLIB_TX].as_u64[0]);
490 *sum = ip_csum_add_even (*sum, new_addr[VLIB_TX].as_u64[1]);
491 *sum = ip_csum_sub_even (*sum, old_addr[VLIB_TX].as_u64[0]);
492 *sum = ip_csum_sub_even (*sum, old_addr[VLIB_TX].as_u64[1]);
495 if (new_port[VLIB_TX])
497 udp->dst_port = new_port[VLIB_TX];
498 *sum = ip_csum_update (*sum, old_port[VLIB_TX], new_port[VLIB_TX],
499 ip4_header_t /* cheat */ ,
500 length /* changed member */ );
502 if (has_ip6_address (&new_addr[VLIB_RX]))
504 *sum = ip_csum_add_even (*sum, new_addr[VLIB_RX].as_u64[0]);
505 *sum = ip_csum_add_even (*sum, new_addr[VLIB_RX].as_u64[1]);
506 *sum = ip_csum_sub_even (*sum, old_addr[VLIB_RX].as_u64[0]);
507 *sum = ip_csum_sub_even (*sum, old_addr[VLIB_RX].as_u64[1]);
510 if (new_port[VLIB_RX])
512 udp->src_port = new_port[VLIB_RX];
513 *sum = ip_csum_update (*sum, old_port[VLIB_RX], new_port[VLIB_RX],
514 ip4_header_t /* cheat */ ,
515 length /* changed member */ );
519 static_always_inline void
520 cnat_translation_icmp6_echo (ip6_header_t * ip6, icmp46_header_t * icmp,
521 ip6_address_t new_addr[VLIB_N_DIR],
522 u16 new_port[VLIB_N_DIR])
524 cnat_echo_header_t *echo = (cnat_echo_header_t *) (icmp + 1);
525 ip6_address_t old_addr[VLIB_N_DIR];
528 old_port = echo->identifier;
529 ip6_address_copy (&old_addr[VLIB_TX], &ip6->dst_address);
530 ip6_address_copy (&old_addr[VLIB_RX], &ip6->src_address);
532 sum = icmp->checksum;
534 cnat_ip6_translate_l3 (ip6, new_addr);
535 if (has_ip6_address (&new_addr[VLIB_TX]))
537 sum = ip_csum_add_even (sum, new_addr[VLIB_TX].as_u64[0]);
538 sum = ip_csum_add_even (sum, new_addr[VLIB_TX].as_u64[1]);
539 sum = ip_csum_sub_even (sum, old_addr[VLIB_TX].as_u64[0]);
540 sum = ip_csum_sub_even (sum, old_addr[VLIB_TX].as_u64[1]);
543 if (has_ip6_address (&new_addr[VLIB_RX]))
545 sum = ip_csum_add_even (sum, new_addr[VLIB_RX].as_u64[0]);
546 sum = ip_csum_add_even (sum, new_addr[VLIB_RX].as_u64[1]);
547 sum = ip_csum_sub_even (sum, old_addr[VLIB_RX].as_u64[0]);
548 sum = ip_csum_sub_even (sum, old_addr[VLIB_RX].as_u64[1]);
551 echo->identifier = new_port[VLIB_RX];
552 sum = ip_csum_update (sum, old_port, new_port[VLIB_RX],
553 ip4_header_t /* cheat */ ,
554 length /* changed member */ );
556 icmp->checksum = ip_csum_fold (sum);
559 static_always_inline void
560 cnat_translation_icmp6_error (ip6_header_t * outer_ip6,
561 icmp46_header_t * icmp,
562 ip6_address_t outer_new_addr[VLIB_N_DIR],
563 u16 outer_new_port[VLIB_N_DIR],
566 ip6_address_t new_addr[VLIB_N_DIR];
567 ip6_address_t old_addr[VLIB_N_DIR];
568 ip6_address_t outer_old_addr[VLIB_N_DIR];
569 u16 new_port[VLIB_N_DIR];
570 u16 old_port[VLIB_N_DIR];
571 ip_csum_t sum, inner_l4_sum, inner_l4_old_sum;
573 if (!icmp6_type_is_error_message (icmp->type))
576 ip6_header_t *ip6 = (ip6_header_t *) (icmp + 2);
577 udp_header_t *udp = (udp_header_t *) (ip6 + 1);
578 tcp_header_t *tcp = (tcp_header_t *) udp;
580 /* Swap inner ports */
581 ip6_address_copy (&new_addr[VLIB_RX], &outer_new_addr[VLIB_TX]);
582 ip6_address_copy (&new_addr[VLIB_TX], &outer_new_addr[VLIB_RX]);
583 new_port[VLIB_TX] = outer_new_port[VLIB_RX];
584 new_port[VLIB_RX] = outer_new_port[VLIB_TX];
586 ip6_address_copy (&old_addr[VLIB_TX], &ip6->dst_address);
587 ip6_address_copy (&old_addr[VLIB_RX], &ip6->src_address);
588 old_port[VLIB_RX] = udp->src_port;
589 old_port[VLIB_TX] = udp->dst_port;
591 sum = icmp->checksum;
592 /* Translate outer ip */
593 ip6_address_copy (&outer_old_addr[VLIB_TX], &outer_ip6->dst_address);
594 ip6_address_copy (&outer_old_addr[VLIB_RX], &outer_ip6->src_address);
596 ip6_address_copy (&outer_new_addr[VLIB_RX], &outer_ip6->src_address);
597 cnat_ip6_translate_l3 (outer_ip6, outer_new_addr);
598 if (has_ip6_address (&outer_new_addr[VLIB_TX]))
600 sum = ip_csum_add_even (sum, outer_new_addr[VLIB_TX].as_u64[0]);
601 sum = ip_csum_add_even (sum, outer_new_addr[VLIB_TX].as_u64[1]);
602 sum = ip_csum_sub_even (sum, outer_old_addr[VLIB_TX].as_u64[0]);
603 sum = ip_csum_sub_even (sum, outer_old_addr[VLIB_TX].as_u64[1]);
606 if (has_ip6_address (&outer_new_addr[VLIB_RX]))
608 sum = ip_csum_add_even (sum, outer_new_addr[VLIB_RX].as_u64[0]);
609 sum = ip_csum_add_even (sum, outer_new_addr[VLIB_RX].as_u64[1]);
610 sum = ip_csum_sub_even (sum, outer_old_addr[VLIB_RX].as_u64[0]);
611 sum = ip_csum_sub_even (sum, outer_old_addr[VLIB_RX].as_u64[1]);
614 /* Translate inner TCP / UDP */
615 if (ip6->protocol == IP_PROTOCOL_TCP)
617 inner_l4_old_sum = inner_l4_sum = tcp->checksum;
618 cnat_ip6_translate_l4 (ip6, udp, &inner_l4_sum, new_addr, new_port);
619 tcp->checksum = ip_csum_fold (inner_l4_sum);
621 else if (ip6->protocol == IP_PROTOCOL_UDP)
623 inner_l4_old_sum = inner_l4_sum = udp->checksum;
624 cnat_ip6_translate_l4 (ip6, udp, &inner_l4_sum, new_addr, new_port);
625 udp->checksum = ip_csum_fold (inner_l4_sum);
630 /* UDP/TCP checksum changed */
631 sum = ip_csum_update (sum, inner_l4_old_sum, inner_l4_sum,
632 ip4_header_t /* cheat */ ,
635 /* UDP/TCP Ports changed */
636 if (old_port[VLIB_TX] && new_port[VLIB_TX])
637 sum = ip_csum_update (sum, old_port[VLIB_TX], new_port[VLIB_TX],
638 ip4_header_t /* cheat */ ,
639 length /* changed member */ );
641 if (old_port[VLIB_RX] && new_port[VLIB_RX])
642 sum = ip_csum_update (sum, old_port[VLIB_RX], new_port[VLIB_RX],
643 ip4_header_t /* cheat */ ,
644 length /* changed member */ );
647 cnat_ip6_translate_l3 (ip6, new_addr);
648 /* IP src/dst addr changed */
649 if (has_ip6_address (&new_addr[VLIB_TX]))
651 sum = ip_csum_add_even (sum, new_addr[VLIB_TX].as_u64[0]);
652 sum = ip_csum_add_even (sum, new_addr[VLIB_TX].as_u64[1]);
653 sum = ip_csum_sub_even (sum, old_addr[VLIB_TX].as_u64[0]);
654 sum = ip_csum_sub_even (sum, old_addr[VLIB_TX].as_u64[1]);
657 if (has_ip6_address (&new_addr[VLIB_RX]))
659 sum = ip_csum_add_even (sum, new_addr[VLIB_RX].as_u64[0]);
660 sum = ip_csum_add_even (sum, new_addr[VLIB_RX].as_u64[1]);
661 sum = ip_csum_sub_even (sum, old_addr[VLIB_RX].as_u64[0]);
662 sum = ip_csum_sub_even (sum, old_addr[VLIB_RX].as_u64[1]);
665 icmp->checksum = ip_csum_fold (sum);
668 static_always_inline void
669 cnat_translation_ip6 (const cnat_session_t * session,
670 ip6_header_t * ip6, udp_header_t * udp)
672 tcp_header_t *tcp = (tcp_header_t *) udp;
673 ip6_address_t new_addr[VLIB_N_DIR];
674 u16 new_port[VLIB_N_DIR];
676 ip6_address_copy (&new_addr[VLIB_TX], &session->value.cs_ip[VLIB_TX].ip6);
677 ip6_address_copy (&new_addr[VLIB_RX], &session->value.cs_ip[VLIB_RX].ip6);
678 new_port[VLIB_TX] = session->value.cs_port[VLIB_TX];
679 new_port[VLIB_RX] = session->value.cs_port[VLIB_RX];
681 if (ip6->protocol == IP_PROTOCOL_TCP)
683 ip_csum_t sum = tcp->checksum;
684 cnat_ip6_translate_l4 (ip6, udp, &sum, new_addr, new_port);
685 tcp->checksum = ip_csum_fold (sum);
686 cnat_ip6_translate_l3 (ip6, new_addr);
687 cnat_tcp_update_session_lifetime (tcp, session->value.cs_ts_index);
689 else if (ip6->protocol == IP_PROTOCOL_UDP)
691 ip_csum_t sum = udp->checksum;
692 cnat_ip6_translate_l4 (ip6, udp, &sum, new_addr, new_port);
693 udp->checksum = ip_csum_fold (sum);
694 cnat_ip6_translate_l3 (ip6, new_addr);
696 else if (ip6->protocol == IP_PROTOCOL_ICMP6)
698 icmp46_header_t *icmp = (icmp46_header_t *) udp;
699 if (icmp6_type_is_error_message (icmp->type))
701 /* SNAT only if src_addr was translated */
702 u8 snat_outer_ip = cmp_ip6_address (&ip6->src_address,
705 cnat_translation_icmp6_error (ip6, icmp, new_addr, new_port,
708 else if (icmp6_type_is_echo (icmp->type))
709 cnat_translation_icmp6_echo (ip6, icmp, new_addr, new_port);
713 static_always_inline void
714 cnat_session_make_key (vlib_buffer_t *b, ip_address_family_t af,
715 cnat_session_location_t cs_loc, cnat_bihash_kv_t *bkey)
718 cnat_session_t *session = (cnat_session_t *) bkey;
720 session->key.cs_af = af;
722 session->key.cs_loc = cs_loc;
723 session->key.__cs_pad = 0;
724 if (cs_loc == CNAT_LOCATION_OUTPUT)
726 iph_offset = vnet_buffer (b)->ip.save_rewrite_length;
731 ip4 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b) + iph_offset);
733 if (PREDICT_FALSE (ip4->protocol == IP_PROTOCOL_ICMP))
735 icmp46_header_t *icmp = (icmp46_header_t *) (ip4 + 1);
736 if (icmp_type_is_error_message (icmp->type))
738 ip4 = (ip4_header_t *) (icmp + 2); /* Use inner packet */
739 udp = (udp_header_t *) (ip4 + 1);
740 /* Swap dst & src for search as ICMP payload is reversed */
741 ip46_address_set_ip4 (&session->key.cs_ip[VLIB_RX],
743 ip46_address_set_ip4 (&session->key.cs_ip[VLIB_TX],
745 session->key.cs_proto = ip4->protocol;
746 session->key.cs_port[VLIB_TX] = udp->src_port;
747 session->key.cs_port[VLIB_RX] = udp->dst_port;
749 else if (icmp_type_is_echo (icmp->type))
751 cnat_echo_header_t *echo = (cnat_echo_header_t *) (icmp + 1);
752 ip46_address_set_ip4 (&session->key.cs_ip[VLIB_TX],
754 ip46_address_set_ip4 (&session->key.cs_ip[VLIB_RX],
756 session->key.cs_proto = ip4->protocol;
757 session->key.cs_port[VLIB_TX] = echo->identifier;
758 session->key.cs_port[VLIB_RX] = echo->identifier;
763 else if (ip4->protocol == IP_PROTOCOL_UDP ||
764 ip4->protocol == IP_PROTOCOL_TCP)
766 udp = (udp_header_t *) (ip4 + 1);
767 ip46_address_set_ip4 (&session->key.cs_ip[VLIB_TX],
769 ip46_address_set_ip4 (&session->key.cs_ip[VLIB_RX],
771 session->key.cs_proto = ip4->protocol;
772 session->key.cs_port[VLIB_RX] = udp->src_port;
773 session->key.cs_port[VLIB_TX] = udp->dst_port;
775 else if (ip4->protocol == IP_PROTOCOL_SCTP)
778 sctp = (sctp_header_t *) (ip4 + 1);
779 ip46_address_set_ip4 (&session->key.cs_ip[VLIB_TX],
781 ip46_address_set_ip4 (&session->key.cs_ip[VLIB_RX],
783 session->key.cs_proto = ip4->protocol;
784 session->key.cs_port[VLIB_RX] = sctp->src_port;
785 session->key.cs_port[VLIB_TX] = sctp->dst_port;
793 ip6 = (ip6_header_t *) ((u8 *) vlib_buffer_get_current (b) + iph_offset);
794 if (PREDICT_FALSE (ip6->protocol == IP_PROTOCOL_ICMP6))
796 icmp46_header_t *icmp = (icmp46_header_t *) (ip6 + 1);
797 if (icmp6_type_is_error_message (icmp->type))
799 ip6 = (ip6_header_t *) (icmp + 2); /* Use inner packet */
800 udp = (udp_header_t *) (ip6 + 1);
801 /* Swap dst & src for search as ICMP payload is reversed */
802 ip46_address_set_ip6 (&session->key.cs_ip[VLIB_RX],
804 ip46_address_set_ip6 (&session->key.cs_ip[VLIB_TX],
806 session->key.cs_proto = ip6->protocol;
807 session->key.cs_port[VLIB_TX] = udp->src_port;
808 session->key.cs_port[VLIB_RX] = udp->dst_port;
810 else if (icmp6_type_is_echo (icmp->type))
812 cnat_echo_header_t *echo = (cnat_echo_header_t *) (icmp + 1);
813 ip46_address_set_ip6 (&session->key.cs_ip[VLIB_TX],
815 ip46_address_set_ip6 (&session->key.cs_ip[VLIB_RX],
817 session->key.cs_proto = ip6->protocol;
818 session->key.cs_port[VLIB_TX] = echo->identifier;
819 session->key.cs_port[VLIB_RX] = echo->identifier;
824 else if (ip6->protocol == IP_PROTOCOL_UDP ||
825 ip6->protocol == IP_PROTOCOL_TCP)
827 udp = (udp_header_t *) (ip6 + 1);
828 ip46_address_set_ip6 (&session->key.cs_ip[VLIB_TX],
830 ip46_address_set_ip6 (&session->key.cs_ip[VLIB_RX],
832 session->key.cs_port[VLIB_RX] = udp->src_port;
833 session->key.cs_port[VLIB_TX] = udp->dst_port;
834 session->key.cs_proto = ip6->protocol;
842 /* Ensure we dont find anything */
843 session->key.cs_proto = 0;
847 static_always_inline cnat_ep_trk_t *
848 cnat_load_balance (const cnat_translation_t *ct, ip_address_family_t af,
849 ip4_header_t *ip4, ip6_header_t *ip6, u32 *dpoi_index)
851 cnat_main_t *cm = &cnat_main;
852 const load_balance_t *lb0;
853 const dpo_id_t *dpo0;
854 u32 hash_c0, bucket0;
856 lb0 = load_balance_get (ct->ct_lb.dpoi_index);
857 if (PREDICT_FALSE (!lb0->lb_n_buckets))
860 /* session table miss */
861 hash_c0 = (AF_IP4 == af ? ip4_compute_flow_hash (ip4, lb0->lb_hash_config) :
862 ip6_compute_flow_hash (ip6, lb0->lb_hash_config));
864 if (PREDICT_FALSE (ct->lb_type == CNAT_LB_MAGLEV))
865 bucket0 = ct->lb_maglev[hash_c0 % cm->maglev_len];
867 bucket0 = hash_c0 % lb0->lb_n_buckets;
869 dpo0 = load_balance_get_fwd_bucket (lb0, bucket0);
871 *dpoi_index = dpo0->dpoi_index;
873 return &ct->ct_active_paths[bucket0];
877 * Create NAT sessions
878 * rsession_location is the location the (return) session will be
881 static_always_inline void
882 cnat_session_create (cnat_session_t *session, cnat_node_ctx_t *ctx,
883 cnat_session_location_t rsession_location,
887 cnat_bihash_kv_t rkey;
888 cnat_session_t *rsession = (cnat_session_t *) & rkey;
889 cnat_bihash_kv_t *bkey = (cnat_bihash_kv_t *) session;
890 int rv, n_retries = 0;
891 static u32 sport_seed = 0;
893 session->value.cs_ts_index = cnat_timestamp_new (ctx->now);
895 /* First create the return session */
896 ip46_address_copy (&rsession->key.cs_ip[VLIB_RX],
897 &session->value.cs_ip[VLIB_TX]);
898 ip46_address_copy (&rsession->key.cs_ip[VLIB_TX],
899 &session->value.cs_ip[VLIB_RX]);
900 rsession->key.cs_proto = session->key.cs_proto;
901 rsession->key.cs_loc = rsession_location;
902 rsession->key.__cs_pad = 0;
903 rsession->key.cs_af = ctx->af;
904 rsession->key.cs_port[VLIB_RX] = session->value.cs_port[VLIB_TX];
905 rsession->key.cs_port[VLIB_TX] = session->value.cs_port[VLIB_RX];
907 ip46_address_copy (&rsession->value.cs_ip[VLIB_RX],
908 &session->key.cs_ip[VLIB_TX]);
909 ip46_address_copy (&rsession->value.cs_ip[VLIB_TX],
910 &session->key.cs_ip[VLIB_RX]);
911 rsession->value.cs_ts_index = session->value.cs_ts_index;
912 rsession->value.cs_lbi = INDEX_INVALID;
913 rsession->value.flags = rsession_flags | CNAT_SESSION_IS_RETURN;
914 rsession->value.cs_port[VLIB_TX] = session->key.cs_port[VLIB_RX];
915 rsession->value.cs_port[VLIB_RX] = session->key.cs_port[VLIB_TX];
918 rv = cnat_bihash_add_del (&cnat_session_db, &rkey,
919 2 /* add but don't overwrite */);
922 if (!(rsession_flags & CNAT_SESSION_RETRY_SNAT))
925 /* return session add failed pick an new random src port */
926 rsession->value.cs_port[VLIB_TX] = session->key.cs_port[VLIB_RX] =
927 random_u32 (&sport_seed);
928 if (n_retries++ < 100)
929 goto retry_add_ression;
932 clib_warning ("Could not find a free port after 100 tries");
933 /* translate this packet, but don't create state */
938 cnat_bihash_add_del (&cnat_session_db, bkey, 1 /* add */);
940 if (!(rsession_flags & CNAT_SESSION_FLAG_NO_CLIENT))
942 /* is this the first time we've seen this source address */
943 cc = (AF_IP4 == ctx->af ?
944 cnat_client_ip4_find (&session->value.cs_ip[VLIB_RX].ip4) :
945 cnat_client_ip6_find (&session->value.cs_ip[VLIB_RX].ip6));
953 addr.version = ctx->af;
954 ip46_address_copy (&addr.ip, &session->value.cs_ip[VLIB_RX]);
957 clib_spinlock_lock (&cnat_client_db.throttle_lock);
959 p = hash_get_mem (cnat_client_db.throttle_mem, &addr);
963 hash_set_mem (cnat_client_db.throttle_mem, &addr, refcnt);
966 hash_set_mem_alloc (&cnat_client_db.throttle_mem, &addr, 0);
968 clib_spinlock_unlock (&cnat_client_db.throttle_lock);
970 /* fire client create to the main thread */
972 vl_api_rpc_call_main_thread (cnat_client_learn, (u8 *) &addr,
977 /* Refcount reverse session */
978 cnat_client_cnt_session (cc);
985 cnat_node_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
986 vlib_frame_t *frame, cnat_node_sub_t cnat_sub,
987 ip_address_family_t af, cnat_session_location_t cs_loc,
990 u32 n_left, *from, thread_index;
991 vlib_buffer_t *bufs[VLIB_FRAME_SIZE];
992 vlib_buffer_t **b = bufs;
993 u16 nexts[VLIB_FRAME_SIZE], *next;
996 thread_index = vm->thread_index;
997 from = vlib_frame_vector_args (frame);
998 n_left = frame->n_vectors;
1000 vlib_get_buffers (vm, from, bufs, n_left);
1001 now = vlib_time_now (vm);
1002 cnat_session_t *session[4];
1003 cnat_bihash_kv_t bkey[4], bvalue[4];
1007 cnat_node_ctx_t ctx = { now, thread_index, af, do_trace };
1011 /* Kickstart our state */
1012 cnat_session_make_key (b[3], af, cs_loc, &bkey[3]);
1013 cnat_session_make_key (b[2], af, cs_loc, &bkey[2]);
1014 cnat_session_make_key (b[1], af, cs_loc, &bkey[1]);
1015 cnat_session_make_key (b[0], af, cs_loc, &bkey[0]);
1017 hash[3] = cnat_bihash_hash (&bkey[3]);
1018 hash[2] = cnat_bihash_hash (&bkey[2]);
1019 hash[1] = cnat_bihash_hash (&bkey[1]);
1020 hash[0] = cnat_bihash_hash (&bkey[0]);
1027 vlib_prefetch_buffer_header (b[11], LOAD);
1028 vlib_prefetch_buffer_header (b[10], LOAD);
1029 vlib_prefetch_buffer_header (b[9], LOAD);
1030 vlib_prefetch_buffer_header (b[8], LOAD);
1033 rv[3] = cnat_bihash_search_i2_hash (&cnat_session_db, hash[3], &bkey[3],
1035 session[3] = (cnat_session_t *) (rv[3] ? &bkey[3] : &bvalue[3]);
1036 next[3] = cnat_sub (vm, node, b[3], &ctx, rv[3], session[3]);
1038 rv[2] = cnat_bihash_search_i2_hash (&cnat_session_db, hash[2], &bkey[2],
1040 session[2] = (cnat_session_t *) (rv[2] ? &bkey[2] : &bvalue[2]);
1041 next[2] = cnat_sub (vm, node, b[2], &ctx, rv[2], session[2]);
1043 rv[1] = cnat_bihash_search_i2_hash (&cnat_session_db, hash[1], &bkey[1],
1045 session[1] = (cnat_session_t *) (rv[1] ? &bkey[1] : &bvalue[1]);
1046 next[1] = cnat_sub (vm, node, b[1], &ctx, rv[1], session[1]);
1048 rv[0] = cnat_bihash_search_i2_hash (&cnat_session_db, hash[0], &bkey[0],
1050 session[0] = (cnat_session_t *) (rv[0] ? &bkey[0] : &bvalue[0]);
1051 next[0] = cnat_sub (vm, node, b[0], &ctx, rv[0], session[0]);
1053 cnat_session_make_key (b[7], af, cs_loc, &bkey[3]);
1054 cnat_session_make_key (b[6], af, cs_loc, &bkey[2]);
1055 cnat_session_make_key (b[5], af, cs_loc, &bkey[1]);
1056 cnat_session_make_key (b[4], af, cs_loc, &bkey[0]);
1058 hash[3] = cnat_bihash_hash (&bkey[3]);
1059 hash[2] = cnat_bihash_hash (&bkey[2]);
1060 hash[1] = cnat_bihash_hash (&bkey[1]);
1061 hash[0] = cnat_bihash_hash (&bkey[0]);
1063 cnat_bihash_prefetch_bucket (&cnat_session_db, hash[3]);
1064 cnat_bihash_prefetch_bucket (&cnat_session_db, hash[2]);
1065 cnat_bihash_prefetch_bucket (&cnat_session_db, hash[1]);
1066 cnat_bihash_prefetch_bucket (&cnat_session_db, hash[0]);
1068 cnat_bihash_prefetch_data (&cnat_session_db, hash[3]);
1069 cnat_bihash_prefetch_data (&cnat_session_db, hash[2]);
1070 cnat_bihash_prefetch_data (&cnat_session_db, hash[1]);
1071 cnat_bihash_prefetch_data (&cnat_session_db, hash[0]);
1080 cnat_session_make_key (b[0], af, cs_loc, &bkey[0]);
1081 rv[0] = cnat_bihash_search_i2 (&cnat_session_db, &bkey[0], &bvalue[0]);
1083 session[0] = (cnat_session_t *) (rv[0] ? &bkey[0] : &bvalue[0]);
1084 next[0] = cnat_sub (vm, node, b[0], &ctx, rv[0], session[0]);
1091 vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1093 return frame->n_vectors;
1097 * fd.io coding-style-patch-verification: ON
1100 * eval: (c-set-style "gnu")