cnat: Add support for SNat ICMP
[vpp.git] / src / plugins / cnat / cnat_node.h
1 /*
2  * Copyright (c) 2020 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 #ifndef __CNAT_NODE_H__
17 #define __CNAT_NODE_H__
18
19 #include <vlibmemory/api.h>
20 #include <cnat/cnat_session.h>
21 #include <cnat/cnat_client.h>
22
23 typedef uword (*cnat_node_sub_t) (vlib_main_t * vm,
24                                   vlib_node_runtime_t * node,
25                                   vlib_buffer_t * b,
26                                   cnat_node_ctx_t * ctx, int rv,
27                                   cnat_session_t * session);
28
29 static_always_inline u8
30 icmp_type_is_error_message (u8 icmp_type)
31 {
32   switch (icmp_type)
33     {
34     case ICMP4_destination_unreachable:
35     case ICMP4_time_exceeded:
36     case ICMP4_parameter_problem:
37     case ICMP4_source_quench:
38     case ICMP4_redirect:
39     case ICMP4_alternate_host_address:
40       return 1;
41     }
42   return 0;
43 }
44
45 static_always_inline u8
46 icmp_type_is_echo (u8 icmp_type)
47 {
48   switch (icmp_type)
49     {
50     case ICMP4_echo_request:
51     case ICMP4_echo_reply:
52       return 1;
53     }
54   return 0;
55 }
56
57 static_always_inline u8
58 icmp6_type_is_echo (u8 icmp_type)
59 {
60   switch (icmp_type)
61     {
62     case ICMP6_echo_request:
63     case ICMP6_echo_reply:
64       return 1;
65     }
66   return 0;
67 }
68
69 static_always_inline u8
70 icmp6_type_is_error_message (u8 icmp_type)
71 {
72   switch (icmp_type)
73     {
74     case ICMP6_destination_unreachable:
75     case ICMP6_time_exceeded:
76     case ICMP6_parameter_problem:
77       return 1;
78     }
79   return 0;
80 }
81
82 static_always_inline u8
83 cmp_ip6_address (const ip6_address_t * a1, const ip6_address_t * a2)
84 {
85   return ((a1->as_u64[0] == a2->as_u64[0])
86           && (a1->as_u64[1] == a2->as_u64[1]));
87 }
88
89 /**
90  * Inline translation functions
91  */
92
93 static_always_inline u8
94 has_ip6_address (ip6_address_t * a)
95 {
96   return ((0 != a->as_u64[0]) || (0 != a->as_u64[1]));
97 }
98
99 static_always_inline void
100 cnat_ip4_translate_l4 (ip4_header_t * ip4, udp_header_t * udp,
101                        ip_csum_t * sum,
102                        ip4_address_t new_addr[VLIB_N_DIR],
103                        u16 new_port[VLIB_N_DIR])
104 {
105   u16 old_port[VLIB_N_DIR];
106   ip4_address_t old_addr[VLIB_N_DIR];
107
108   /* Fastpath no checksum */
109   if (PREDICT_TRUE (0 == *sum))
110     {
111       udp->dst_port = new_port[VLIB_TX];
112       udp->src_port = new_port[VLIB_RX];
113       return;
114     }
115
116   old_port[VLIB_TX] = udp->dst_port;
117   old_port[VLIB_RX] = udp->src_port;
118   old_addr[VLIB_TX] = ip4->dst_address;
119   old_addr[VLIB_RX] = ip4->src_address;
120
121   if (new_addr[VLIB_TX].as_u32)
122     {
123       *sum =
124         ip_csum_update (*sum, old_addr[VLIB_TX].as_u32,
125                         new_addr[VLIB_TX].as_u32, ip4_header_t, dst_address);
126     }
127   if (new_port[VLIB_TX])
128     {
129       udp->dst_port = new_port[VLIB_TX];
130       *sum = ip_csum_update (*sum, old_port[VLIB_TX], new_port[VLIB_TX],
131                              ip4_header_t /* cheat */ ,
132                              length /* changed member */ );
133     }
134   if (new_addr[VLIB_RX].as_u32)
135     {
136       *sum =
137         ip_csum_update (*sum, old_addr[VLIB_RX].as_u32,
138                         new_addr[VLIB_RX].as_u32, ip4_header_t, src_address);
139     }
140   if (new_port[VLIB_RX])
141     {
142       udp->src_port = new_port[VLIB_RX];
143       *sum = ip_csum_update (*sum, old_port[VLIB_RX], new_port[VLIB_RX],
144                              ip4_header_t /* cheat */ ,
145                              length /* changed member */ );
146     }
147 }
148
149 static_always_inline void
150 cnat_ip4_translate_l3 (ip4_header_t * ip4, ip4_address_t new_addr[VLIB_N_DIR])
151 {
152   ip4_address_t old_addr[VLIB_N_DIR];
153   ip_csum_t sum;
154
155   old_addr[VLIB_TX] = ip4->dst_address;
156   old_addr[VLIB_RX] = ip4->src_address;
157
158   sum = ip4->checksum;
159   if (new_addr[VLIB_TX].as_u32)
160     {
161       ip4->dst_address = new_addr[VLIB_TX];
162       sum =
163         ip_csum_update (sum, old_addr[VLIB_TX].as_u32,
164                         new_addr[VLIB_TX].as_u32, ip4_header_t, dst_address);
165     }
166   if (new_addr[VLIB_RX].as_u32)
167     {
168       ip4->src_address = new_addr[VLIB_RX];
169       sum =
170         ip_csum_update (sum, old_addr[VLIB_RX].as_u32,
171                         new_addr[VLIB_RX].as_u32, ip4_header_t, src_address);
172     }
173   ip4->checksum = ip_csum_fold (sum);
174 }
175
176 static_always_inline void
177 cnat_tcp_update_session_lifetime (tcp_header_t * tcp, u32 index)
178 {
179   cnat_main_t *cm = &cnat_main;
180   if (PREDICT_FALSE (tcp_fin (tcp)))
181     {
182       cnat_timestamp_set_lifetime (index, CNAT_DEFAULT_TCP_RST_TIMEOUT);
183     }
184
185   if (PREDICT_FALSE (tcp_rst (tcp)))
186     {
187       cnat_timestamp_set_lifetime (index, CNAT_DEFAULT_TCP_RST_TIMEOUT);
188     }
189
190   if (PREDICT_FALSE (tcp_syn (tcp) && tcp_ack (tcp)))
191     {
192       cnat_timestamp_set_lifetime (index, cm->tcp_max_age);
193     }
194 }
195
196 static_always_inline void
197 cnat_translation_icmp4_echo (ip4_header_t * ip4, icmp46_header_t * icmp,
198                              ip4_address_t new_addr[VLIB_N_DIR],
199                              u16 new_port[VLIB_N_DIR])
200 {
201   ip_csum_t sum;
202   u16 old_port;
203   cnat_echo_header_t *echo = (cnat_echo_header_t *) (icmp + 1);
204
205   cnat_ip4_translate_l3 (ip4, new_addr);
206   old_port = echo->identifier;
207   echo->identifier = new_port[VLIB_RX];
208
209   sum = icmp->checksum;
210   sum = ip_csum_update (sum, old_port, new_port[VLIB_RX],
211                         ip4_header_t /* cheat */ ,
212                         length /* changed member */ );
213
214   icmp->checksum = ip_csum_fold (sum);
215 }
216
217 static_always_inline void
218 cnat_translation_icmp4_error (ip4_header_t * outer_ip4,
219                               icmp46_header_t * icmp,
220                               ip4_address_t outer_new_addr[VLIB_N_DIR],
221                               u16 outer_new_port[VLIB_N_DIR],
222                               u8 snat_outer_ip)
223 {
224   ip4_address_t new_addr[VLIB_N_DIR];
225   ip4_address_t old_addr[VLIB_N_DIR];
226   u16 new_port[VLIB_N_DIR];
227   u16 old_port[VLIB_N_DIR];
228   ip_csum_t sum, old_ip_sum, inner_l4_sum, inner_l4_old_sum;
229
230   ip4_header_t *ip4 = (ip4_header_t *) (icmp + 2);
231   udp_header_t *udp = (udp_header_t *) (ip4 + 1);
232   tcp_header_t *tcp = (tcp_header_t *) udp;
233
234   /* Swap inner ports */
235   new_addr[VLIB_TX] = outer_new_addr[VLIB_RX];
236   new_addr[VLIB_RX] = outer_new_addr[VLIB_TX];
237   new_port[VLIB_TX] = outer_new_port[VLIB_RX];
238   new_port[VLIB_RX] = outer_new_port[VLIB_TX];
239
240   old_addr[VLIB_TX] = ip4->dst_address;
241   old_addr[VLIB_RX] = ip4->src_address;
242   old_port[VLIB_RX] = udp->src_port;
243   old_port[VLIB_TX] = udp->dst_port;
244
245   sum = icmp->checksum;
246   old_ip_sum = ip4->checksum;
247
248   /* translate outer ip. */
249   if (!snat_outer_ip)
250     outer_new_addr[VLIB_RX] = outer_ip4->src_address;
251   cnat_ip4_translate_l3 (outer_ip4, outer_new_addr);
252
253   if (ip4->protocol == IP_PROTOCOL_TCP)
254     {
255       inner_l4_old_sum = inner_l4_sum = tcp->checksum;
256       cnat_ip4_translate_l4 (ip4, udp, &inner_l4_sum, new_addr, new_port);
257       tcp->checksum = ip_csum_fold (inner_l4_sum);
258     }
259   else if (ip4->protocol == IP_PROTOCOL_UDP)
260     {
261       inner_l4_old_sum = inner_l4_sum = udp->checksum;
262       cnat_ip4_translate_l4 (ip4, udp, &inner_l4_sum, new_addr, new_port);
263       udp->checksum = ip_csum_fold (inner_l4_sum);
264     }
265   else
266     return;
267
268   /* UDP/TCP checksum changed */
269   sum = ip_csum_update (sum, inner_l4_old_sum, inner_l4_sum,
270                         ip4_header_t, checksum);
271
272   /* UDP/TCP Ports changed */
273   if (old_port[VLIB_TX] && new_port[VLIB_TX])
274     sum = ip_csum_update (sum, old_port[VLIB_TX], new_port[VLIB_TX],
275                           ip4_header_t /* cheat */ ,
276                           length /* changed member */ );
277
278   if (old_port[VLIB_RX] && new_port[VLIB_RX])
279     sum = ip_csum_update (sum, old_port[VLIB_RX], new_port[VLIB_RX],
280                           ip4_header_t /* cheat */ ,
281                           length /* changed member */ );
282
283
284   cnat_ip4_translate_l3 (ip4, new_addr);
285   ip_csum_t new_ip_sum = ip4->checksum;
286   /* IP checksum changed */
287   sum = ip_csum_update (sum, old_ip_sum, new_ip_sum, ip4_header_t, checksum);
288
289   /* IP src/dst addr changed */
290   if (new_addr[VLIB_TX].as_u32)
291     sum =
292       ip_csum_update (sum, old_addr[VLIB_TX].as_u32, new_addr[VLIB_TX].as_u32,
293                       ip4_header_t, dst_address);
294
295   if (new_addr[VLIB_RX].as_u32)
296     sum =
297       ip_csum_update (sum, old_addr[VLIB_RX].as_u32, new_addr[VLIB_RX].as_u32,
298                       ip4_header_t, src_address);
299
300   icmp->checksum = ip_csum_fold (sum);
301 }
302
303 static_always_inline void
304 cnat_translation_ip4 (const cnat_session_t * session,
305                       ip4_header_t * ip4, udp_header_t * udp)
306 {
307   tcp_header_t *tcp = (tcp_header_t *) udp;
308   ip4_address_t new_addr[VLIB_N_DIR];
309   u16 new_port[VLIB_N_DIR];
310
311   new_addr[VLIB_TX] = session->value.cs_ip[VLIB_TX].ip4;
312   new_addr[VLIB_RX] = session->value.cs_ip[VLIB_RX].ip4;
313   new_port[VLIB_TX] = session->value.cs_port[VLIB_TX];
314   new_port[VLIB_RX] = session->value.cs_port[VLIB_RX];
315
316   if (ip4->protocol == IP_PROTOCOL_TCP)
317     {
318       ip_csum_t sum = tcp->checksum;
319       cnat_ip4_translate_l4 (ip4, udp, &sum, new_addr, new_port);
320       tcp->checksum = ip_csum_fold (sum);
321       cnat_ip4_translate_l3 (ip4, new_addr);
322       cnat_tcp_update_session_lifetime (tcp, session->value.cs_ts_index);
323     }
324   else if (ip4->protocol == IP_PROTOCOL_UDP)
325     {
326       ip_csum_t sum = udp->checksum;
327       cnat_ip4_translate_l4 (ip4, udp, &sum, new_addr, new_port);
328       udp->checksum = ip_csum_fold (sum);
329       cnat_ip4_translate_l3 (ip4, new_addr);
330     }
331   else if (ip4->protocol == IP_PROTOCOL_ICMP)
332     {
333       icmp46_header_t *icmp = (icmp46_header_t *) udp;
334       if (icmp_type_is_error_message (icmp->type))
335         {
336           /* SNAT only if src_addr was translated */
337           u8 snat_outer_ip =
338             (ip4->src_address.as_u32 ==
339              session->key.cs_ip[VLIB_RX].ip4.as_u32);
340           cnat_translation_icmp4_error (ip4, icmp, new_addr, new_port,
341                                         snat_outer_ip);
342         }
343       else if (icmp_type_is_echo (icmp->type))
344         cnat_translation_icmp4_echo (ip4, icmp, new_addr, new_port);
345     }
346 }
347
348 static_always_inline void
349 cnat_ip6_translate_l3 (ip6_header_t * ip6, ip6_address_t new_addr[VLIB_N_DIR])
350 {
351   if (has_ip6_address (&new_addr[VLIB_TX]))
352     ip6_address_copy (&ip6->dst_address, &new_addr[VLIB_TX]);
353   if (has_ip6_address (&new_addr[VLIB_RX]))
354     ip6_address_copy (&ip6->src_address, &new_addr[VLIB_RX]);
355 }
356
357 static_always_inline void
358 cnat_ip6_translate_l4 (ip6_header_t * ip6, udp_header_t * udp,
359                        ip_csum_t * sum,
360                        ip6_address_t new_addr[VLIB_N_DIR],
361                        u16 new_port[VLIB_N_DIR])
362 {
363   u16 old_port[VLIB_N_DIR];
364   ip6_address_t old_addr[VLIB_N_DIR];
365
366   /* Fastpath no checksum */
367   if (PREDICT_TRUE (0 == *sum))
368     {
369       udp->dst_port = new_port[VLIB_TX];
370       udp->src_port = new_port[VLIB_RX];
371       return;
372     }
373
374   old_port[VLIB_TX] = udp->dst_port;
375   old_port[VLIB_RX] = udp->src_port;
376   ip6_address_copy (&old_addr[VLIB_TX], &ip6->dst_address);
377   ip6_address_copy (&old_addr[VLIB_RX], &ip6->src_address);
378
379   if (has_ip6_address (&new_addr[VLIB_TX]))
380     {
381       *sum = ip_csum_add_even (*sum, new_addr[VLIB_TX].as_u64[0]);
382       *sum = ip_csum_add_even (*sum, new_addr[VLIB_TX].as_u64[1]);
383       *sum = ip_csum_sub_even (*sum, old_addr[VLIB_TX].as_u64[0]);
384       *sum = ip_csum_sub_even (*sum, old_addr[VLIB_TX].as_u64[1]);
385     }
386
387   if (new_port[VLIB_TX])
388     {
389       udp->dst_port = new_port[VLIB_TX];
390       *sum = ip_csum_update (*sum, old_port[VLIB_TX], new_port[VLIB_TX],
391                              ip4_header_t /* cheat */ ,
392                              length /* changed member */ );
393     }
394   if (has_ip6_address (&new_addr[VLIB_RX]))
395     {
396       *sum = ip_csum_add_even (*sum, new_addr[VLIB_RX].as_u64[0]);
397       *sum = ip_csum_add_even (*sum, new_addr[VLIB_RX].as_u64[1]);
398       *sum = ip_csum_sub_even (*sum, old_addr[VLIB_RX].as_u64[0]);
399       *sum = ip_csum_sub_even (*sum, old_addr[VLIB_RX].as_u64[1]);
400     }
401
402   if (new_port[VLIB_RX])
403     {
404       udp->src_port = new_port[VLIB_RX];
405       *sum = ip_csum_update (*sum, old_port[VLIB_RX], new_port[VLIB_RX],
406                              ip4_header_t /* cheat */ ,
407                              length /* changed member */ );
408     }
409 }
410
411 static_always_inline void
412 cnat_translation_icmp6_echo (ip6_header_t * ip6, icmp46_header_t * icmp,
413                              ip6_address_t new_addr[VLIB_N_DIR],
414                              u16 new_port[VLIB_N_DIR])
415 {
416   cnat_echo_header_t *echo = (cnat_echo_header_t *) (icmp + 1);
417   ip6_address_t old_addr[VLIB_N_DIR];
418   ip_csum_t sum;
419   u16 old_port;
420   old_port = echo->identifier;
421   ip6_address_copy (&old_addr[VLIB_TX], &ip6->dst_address);
422   ip6_address_copy (&old_addr[VLIB_RX], &ip6->src_address);
423
424   sum = icmp->checksum;
425
426   cnat_ip6_translate_l3 (ip6, new_addr);
427   if (has_ip6_address (&new_addr[VLIB_TX]))
428     {
429       sum = ip_csum_add_even (sum, new_addr[VLIB_TX].as_u64[0]);
430       sum = ip_csum_add_even (sum, new_addr[VLIB_TX].as_u64[1]);
431       sum = ip_csum_sub_even (sum, old_addr[VLIB_TX].as_u64[0]);
432       sum = ip_csum_sub_even (sum, old_addr[VLIB_TX].as_u64[1]);
433     }
434
435   if (has_ip6_address (&new_addr[VLIB_RX]))
436     {
437       sum = ip_csum_add_even (sum, new_addr[VLIB_RX].as_u64[0]);
438       sum = ip_csum_add_even (sum, new_addr[VLIB_RX].as_u64[1]);
439       sum = ip_csum_sub_even (sum, old_addr[VLIB_RX].as_u64[0]);
440       sum = ip_csum_sub_even (sum, old_addr[VLIB_RX].as_u64[1]);
441     }
442
443   echo->identifier = new_port[VLIB_RX];
444   sum = ip_csum_update (sum, old_port, new_port[VLIB_RX],
445                         ip4_header_t /* cheat */ ,
446                         length /* changed member */ );
447
448   icmp->checksum = ip_csum_fold (sum);
449 }
450
451 static_always_inline void
452 cnat_translation_icmp6_error (ip6_header_t * outer_ip6,
453                               icmp46_header_t * icmp,
454                               ip6_address_t outer_new_addr[VLIB_N_DIR],
455                               u16 outer_new_port[VLIB_N_DIR],
456                               u8 snat_outer_ip)
457 {
458   ip6_address_t new_addr[VLIB_N_DIR];
459   ip6_address_t old_addr[VLIB_N_DIR];
460   ip6_address_t outer_old_addr[VLIB_N_DIR];
461   u16 new_port[VLIB_N_DIR];
462   u16 old_port[VLIB_N_DIR];
463   ip_csum_t sum, inner_l4_sum, inner_l4_old_sum;
464
465   if (!icmp6_type_is_error_message (icmp->type))
466     return;
467
468   ip6_header_t *ip6 = (ip6_header_t *) (icmp + 2);
469   udp_header_t *udp = (udp_header_t *) (ip6 + 1);
470   tcp_header_t *tcp = (tcp_header_t *) udp;
471
472   /* Swap inner ports */
473   ip6_address_copy (&new_addr[VLIB_RX], &outer_new_addr[VLIB_TX]);
474   ip6_address_copy (&new_addr[VLIB_TX], &outer_new_addr[VLIB_RX]);
475   new_port[VLIB_TX] = outer_new_port[VLIB_RX];
476   new_port[VLIB_RX] = outer_new_port[VLIB_TX];
477
478   ip6_address_copy (&old_addr[VLIB_TX], &ip6->dst_address);
479   ip6_address_copy (&old_addr[VLIB_RX], &ip6->src_address);
480   old_port[VLIB_RX] = udp->src_port;
481   old_port[VLIB_TX] = udp->dst_port;
482
483   sum = icmp->checksum;
484   /* Translate outer ip */
485   ip6_address_copy (&outer_old_addr[VLIB_TX], &outer_ip6->dst_address);
486   ip6_address_copy (&outer_old_addr[VLIB_RX], &outer_ip6->src_address);
487   if (!snat_outer_ip)
488     ip6_address_copy (&outer_new_addr[VLIB_RX], &outer_ip6->src_address);
489   cnat_ip6_translate_l3 (outer_ip6, outer_new_addr);
490   if (has_ip6_address (&outer_new_addr[VLIB_TX]))
491     {
492       sum = ip_csum_add_even (sum, outer_new_addr[VLIB_TX].as_u64[0]);
493       sum = ip_csum_add_even (sum, outer_new_addr[VLIB_TX].as_u64[1]);
494       sum = ip_csum_sub_even (sum, outer_old_addr[VLIB_TX].as_u64[0]);
495       sum = ip_csum_sub_even (sum, outer_old_addr[VLIB_TX].as_u64[1]);
496     }
497
498   if (has_ip6_address (&outer_new_addr[VLIB_RX]))
499     {
500       sum = ip_csum_add_even (sum, outer_new_addr[VLIB_RX].as_u64[0]);
501       sum = ip_csum_add_even (sum, outer_new_addr[VLIB_RX].as_u64[1]);
502       sum = ip_csum_sub_even (sum, outer_old_addr[VLIB_RX].as_u64[0]);
503       sum = ip_csum_sub_even (sum, outer_old_addr[VLIB_RX].as_u64[1]);
504     }
505
506   /* Translate inner TCP / UDP */
507   if (ip6->protocol == IP_PROTOCOL_TCP)
508     {
509       inner_l4_old_sum = inner_l4_sum = tcp->checksum;
510       cnat_ip6_translate_l4 (ip6, udp, &inner_l4_sum, new_addr, new_port);
511       tcp->checksum = ip_csum_fold (inner_l4_sum);
512     }
513   else if (ip6->protocol == IP_PROTOCOL_UDP)
514     {
515       inner_l4_old_sum = inner_l4_sum = udp->checksum;
516       cnat_ip6_translate_l4 (ip6, udp, &inner_l4_sum, new_addr, new_port);
517       udp->checksum = ip_csum_fold (inner_l4_sum);
518     }
519   else
520     return;
521
522   /* UDP/TCP checksum changed */
523   sum = ip_csum_update (sum, inner_l4_old_sum, inner_l4_sum,
524                         ip4_header_t /* cheat */ ,
525                         checksum);
526
527   /* UDP/TCP Ports changed */
528   if (old_port[VLIB_TX] && new_port[VLIB_TX])
529     sum = ip_csum_update (sum, old_port[VLIB_TX], new_port[VLIB_TX],
530                           ip4_header_t /* cheat */ ,
531                           length /* changed member */ );
532
533   if (old_port[VLIB_RX] && new_port[VLIB_RX])
534     sum = ip_csum_update (sum, old_port[VLIB_RX], new_port[VLIB_RX],
535                           ip4_header_t /* cheat */ ,
536                           length /* changed member */ );
537
538
539   cnat_ip6_translate_l3 (ip6, new_addr);
540   /* IP src/dst addr changed */
541   if (has_ip6_address (&new_addr[VLIB_TX]))
542     {
543       sum = ip_csum_add_even (sum, new_addr[VLIB_TX].as_u64[0]);
544       sum = ip_csum_add_even (sum, new_addr[VLIB_TX].as_u64[1]);
545       sum = ip_csum_sub_even (sum, old_addr[VLIB_TX].as_u64[0]);
546       sum = ip_csum_sub_even (sum, old_addr[VLIB_TX].as_u64[1]);
547     }
548
549   if (has_ip6_address (&new_addr[VLIB_RX]))
550     {
551       sum = ip_csum_add_even (sum, new_addr[VLIB_RX].as_u64[0]);
552       sum = ip_csum_add_even (sum, new_addr[VLIB_RX].as_u64[1]);
553       sum = ip_csum_sub_even (sum, old_addr[VLIB_RX].as_u64[0]);
554       sum = ip_csum_sub_even (sum, old_addr[VLIB_RX].as_u64[1]);
555     }
556
557   icmp->checksum = ip_csum_fold (sum);
558 }
559
560 static_always_inline void
561 cnat_translation_ip6 (const cnat_session_t * session,
562                       ip6_header_t * ip6, udp_header_t * udp)
563 {
564   tcp_header_t *tcp = (tcp_header_t *) udp;
565   ip6_address_t new_addr[VLIB_N_DIR];
566   u16 new_port[VLIB_N_DIR];
567
568   ip6_address_copy (&new_addr[VLIB_TX], &session->value.cs_ip[VLIB_TX].ip6);
569   ip6_address_copy (&new_addr[VLIB_RX], &session->value.cs_ip[VLIB_RX].ip6);
570   new_port[VLIB_TX] = session->value.cs_port[VLIB_TX];
571   new_port[VLIB_RX] = session->value.cs_port[VLIB_RX];
572
573   if (ip6->protocol == IP_PROTOCOL_TCP)
574     {
575       ip_csum_t sum = tcp->checksum;
576       cnat_ip6_translate_l4 (ip6, udp, &sum, new_addr, new_port);
577       tcp->checksum = ip_csum_fold (sum);
578       cnat_ip6_translate_l3 (ip6, new_addr);
579       cnat_tcp_update_session_lifetime (tcp, session->value.cs_ts_index);
580     }
581   else if (ip6->protocol == IP_PROTOCOL_UDP)
582     {
583       ip_csum_t sum = udp->checksum;
584       cnat_ip6_translate_l4 (ip6, udp, &sum, new_addr, new_port);
585       udp->checksum = ip_csum_fold (sum);
586       cnat_ip6_translate_l3 (ip6, new_addr);
587     }
588   else if (ip6->protocol == IP_PROTOCOL_ICMP6)
589     {
590       icmp46_header_t *icmp = (icmp46_header_t *) udp;
591       if (icmp6_type_is_error_message (icmp->type))
592         {
593           /* SNAT only if src_addr was translated */
594           u8 snat_outer_ip = cmp_ip6_address (&ip6->src_address,
595                                               &session->key.
596                                               cs_ip[VLIB_RX].ip6);
597           cnat_translation_icmp6_error (ip6, icmp, new_addr, new_port,
598                                         snat_outer_ip);
599         }
600       else if (icmp6_type_is_echo (icmp->type))
601         cnat_translation_icmp6_echo (ip6, icmp, new_addr, new_port);
602     }
603 }
604
605 static_always_inline void
606 cnat_session_make_key (vlib_buffer_t * b, ip_address_family_t af,
607                        clib_bihash_kv_40_48_t * bkey)
608 {
609   udp_header_t *udp;
610   cnat_session_t *session = (cnat_session_t *) bkey;
611   session->key.cs_af = af;
612   session->key.__cs_pad[0] = 0;
613   session->key.__cs_pad[1] = 0;
614   if (AF_IP4 == af)
615     {
616       ip4_header_t *ip4;
617       ip4 = vlib_buffer_get_current (b);
618       if (PREDICT_FALSE (ip4->protocol == IP_PROTOCOL_ICMP))
619         {
620           icmp46_header_t *icmp = (icmp46_header_t *) (ip4 + 1);
621           if (icmp_type_is_error_message (icmp->type))
622             {
623               ip4 = (ip4_header_t *) (icmp + 2);        /* Use inner packet */
624               udp = (udp_header_t *) (ip4 + 1);
625               /* Swap dst & src for search as ICMP payload is reversed */
626               ip46_address_set_ip4 (&session->key.cs_ip[VLIB_RX],
627                                     &ip4->dst_address);
628               ip46_address_set_ip4 (&session->key.cs_ip[VLIB_TX],
629                                     &ip4->src_address);
630               session->key.cs_proto = ip4->protocol;
631               session->key.cs_port[VLIB_TX] = udp->src_port;
632               session->key.cs_port[VLIB_RX] = udp->dst_port;
633             }
634           else if (icmp_type_is_echo (icmp->type))
635             {
636               cnat_echo_header_t *echo = (cnat_echo_header_t *) (icmp + 1);
637               ip46_address_set_ip4 (&session->key.cs_ip[VLIB_TX],
638                                     &ip4->dst_address);
639               ip46_address_set_ip4 (&session->key.cs_ip[VLIB_RX],
640                                     &ip4->src_address);
641               session->key.cs_proto = ip4->protocol;
642               session->key.cs_port[VLIB_TX] = echo->identifier;
643               session->key.cs_port[VLIB_RX] = echo->identifier;
644             }
645           else
646             goto error;
647         }
648       else
649         {
650           udp = (udp_header_t *) (ip4 + 1);
651           ip46_address_set_ip4 (&session->key.cs_ip[VLIB_TX],
652                                 &ip4->dst_address);
653           ip46_address_set_ip4 (&session->key.cs_ip[VLIB_RX],
654                                 &ip4->src_address);
655           session->key.cs_proto = ip4->protocol;
656           session->key.cs_port[VLIB_RX] = udp->src_port;
657           session->key.cs_port[VLIB_TX] = udp->dst_port;
658         }
659
660     }
661   else
662     {
663       ip6_header_t *ip6;
664       ip6 = vlib_buffer_get_current (b);
665       if (PREDICT_FALSE (ip6->protocol == IP_PROTOCOL_ICMP6))
666         {
667           icmp46_header_t *icmp = (icmp46_header_t *) (ip6 + 1);
668           if (icmp6_type_is_error_message (icmp->type))
669             {
670               ip6 = (ip6_header_t *) (icmp + 2);        /* Use inner packet */
671               udp = (udp_header_t *) (ip6 + 1);
672               /* Swap dst & src for search as ICMP payload is reversed */
673               ip46_address_set_ip6 (&session->key.cs_ip[VLIB_RX],
674                                     &ip6->dst_address);
675               ip46_address_set_ip6 (&session->key.cs_ip[VLIB_TX],
676                                     &ip6->src_address);
677               session->key.cs_proto = ip6->protocol;
678               session->key.cs_port[VLIB_TX] = udp->src_port;
679               session->key.cs_port[VLIB_RX] = udp->dst_port;
680             }
681           else if (icmp6_type_is_echo (icmp->type))
682             {
683               cnat_echo_header_t *echo = (cnat_echo_header_t *) (icmp + 1);
684               ip46_address_set_ip6 (&session->key.cs_ip[VLIB_TX],
685                                     &ip6->dst_address);
686               ip46_address_set_ip6 (&session->key.cs_ip[VLIB_RX],
687                                     &ip6->src_address);
688               session->key.cs_proto = ip6->protocol;
689               session->key.cs_port[VLIB_TX] = echo->identifier;
690               session->key.cs_port[VLIB_RX] = echo->identifier;
691             }
692           else
693             goto error;
694         }
695       else
696         {
697           udp = (udp_header_t *) (ip6 + 1);
698           ip46_address_set_ip6 (&session->key.cs_ip[VLIB_TX],
699                                 &ip6->dst_address);
700           ip46_address_set_ip6 (&session->key.cs_ip[VLIB_RX],
701                                 &ip6->src_address);
702           session->key.cs_port[VLIB_RX] = udp->src_port;
703           session->key.cs_port[VLIB_TX] = udp->dst_port;
704           session->key.cs_proto = ip6->protocol;
705         }
706     }
707   return;
708
709 error:
710   /* Ensure we dont find anything */
711   session->key.cs_proto = 0;
712   return;
713 }
714
715 /**
716  * Create NAT sessions
717  */
718
719 static_always_inline void
720 cnat_session_create (cnat_session_t * session, cnat_node_ctx_t * ctx,
721                      u8 rsession_flags)
722 {
723   cnat_client_t *cc;
724   clib_bihash_kv_40_48_t rkey;
725   cnat_session_t *rsession = (cnat_session_t *) & rkey;
726   clib_bihash_kv_40_48_t *bkey = (clib_bihash_kv_40_48_t *) session;
727   clib_bihash_kv_40_48_t rvalue;
728   int rv;
729
730   session->value.cs_ts_index = cnat_timestamp_new (ctx->now);
731   clib_bihash_add_del_40_48 (&cnat_session_db, bkey, 1);
732
733   /* is this the first time we've seen this source address */
734   cc = (AF_IP4 == ctx->af ?
735         cnat_client_ip4_find (&session->value.cs_ip[VLIB_RX].ip4) :
736         cnat_client_ip6_find (&session->value.cs_ip[VLIB_RX].ip6));
737
738   if (NULL == cc)
739     {
740       u64 r0 = 17;
741       if (AF_IP4 == ctx->af)
742         r0 = (u64) session->value.cs_ip[VLIB_RX].ip4.as_u32;
743       else
744         {
745           r0 = r0 * 31 + session->value.cs_ip[VLIB_RX].ip6.as_u64[0];
746           r0 = r0 * 31 + session->value.cs_ip[VLIB_RX].ip6.as_u64[1];
747         }
748
749       /* Rate limit */
750       if (!throttle_check (&cnat_throttle, ctx->thread_index, r0, ctx->seed))
751         {
752           cnat_learn_arg_t l;
753           l.addr.version = ctx->af;
754           ip46_address_copy (&l.addr.ip, &session->value.cs_ip[VLIB_RX]);
755           /* fire client create to the main thread */
756           vl_api_rpc_call_main_thread (cnat_client_learn,
757                                        (u8 *) & l, sizeof (l));
758         }
759       else
760         {
761           /* Will still need to count those for session refcnt */
762           ip_address_t *addr;
763           clib_spinlock_lock (&cnat_client_db.throttle_pool_lock
764                               [ctx->thread_index]);
765           pool_get (cnat_client_db.throttle_pool[ctx->thread_index], addr);
766           addr->version = ctx->af;
767           ip46_address_copy (&addr->ip, &session->value.cs_ip[VLIB_RX]);
768           clib_spinlock_unlock (&cnat_client_db.throttle_pool_lock
769                                 [ctx->thread_index]);
770         }
771     }
772   else
773     {
774       /* Refcount reverse session */
775       cnat_client_cnt_session (cc);
776     }
777
778   /* create the reverse flow key */
779   ip46_address_copy (&rsession->key.cs_ip[VLIB_RX],
780                      &session->value.cs_ip[VLIB_TX]);
781   ip46_address_copy (&rsession->key.cs_ip[VLIB_TX],
782                      &session->value.cs_ip[VLIB_RX]);
783   rsession->key.cs_proto = session->key.cs_proto;
784   rsession->key.__cs_pad[0] = 0;
785   rsession->key.__cs_pad[1] = 0;
786   rsession->key.cs_af = ctx->af;
787   rsession->key.cs_port[VLIB_RX] = session->value.cs_port[VLIB_TX];
788   rsession->key.cs_port[VLIB_TX] = session->value.cs_port[VLIB_RX];
789
790   /* First search for existing reverse session */
791   rv = clib_bihash_search_inline_2_40_48 (&cnat_session_db, &rkey, &rvalue);
792   if (!rv)
793     {
794       /* Reverse session already exists
795          cleanup before creating for refcnts */
796       cnat_session_t *found_rsession = (cnat_session_t *) & rvalue;
797       cnat_session_free (found_rsession);
798     }
799   /* add the reverse flow */
800   ip46_address_copy (&rsession->value.cs_ip[VLIB_RX],
801                      &session->key.cs_ip[VLIB_TX]);
802   ip46_address_copy (&rsession->value.cs_ip[VLIB_TX],
803                      &session->key.cs_ip[VLIB_RX]);
804   rsession->value.cs_ts_index = session->value.cs_ts_index;
805   rsession->value.cs_lbi = INDEX_INVALID;
806   rsession->value.flags = rsession_flags;
807   rsession->value.cs_port[VLIB_TX] = session->key.cs_port[VLIB_RX];
808   rsession->value.cs_port[VLIB_RX] = session->key.cs_port[VLIB_TX];
809
810   clib_bihash_add_del_40_48 (&cnat_session_db, &rkey, 1);
811 }
812
813 always_inline uword
814 cnat_node_inline (vlib_main_t * vm,
815                   vlib_node_runtime_t * node,
816                   vlib_frame_t * frame,
817                   cnat_node_sub_t cnat_sub,
818                   ip_address_family_t af, u8 do_trace)
819 {
820   u32 n_left, *from, thread_index;
821   vlib_buffer_t *bufs[VLIB_FRAME_SIZE];
822   vlib_buffer_t **b = bufs;
823   u16 nexts[VLIB_FRAME_SIZE], *next;
824   f64 now;
825   u64 seed;
826
827   thread_index = vm->thread_index;
828   from = vlib_frame_vector_args (frame);
829   n_left = frame->n_vectors;
830   next = nexts;
831   vlib_get_buffers (vm, from, bufs, n_left);
832   now = vlib_time_now (vm);
833   seed = throttle_seed (&cnat_throttle, thread_index, vlib_time_now (vm));
834   cnat_session_t *session[4];
835   clib_bihash_kv_40_48_t bkey[4], bvalue[4];
836   u64 hash[4];
837   int rv[4];
838
839   cnat_node_ctx_t ctx = { now, seed, thread_index, af, do_trace };
840
841   if (n_left >= 8)
842     {
843       /* Kickstart our state */
844       cnat_session_make_key (b[3], af, &bkey[3]);
845       cnat_session_make_key (b[2], af, &bkey[2]);
846       cnat_session_make_key (b[1], af, &bkey[1]);
847       cnat_session_make_key (b[0], af, &bkey[0]);
848
849       hash[3] = clib_bihash_hash_40_48 (&bkey[3]);
850       hash[2] = clib_bihash_hash_40_48 (&bkey[2]);
851       hash[1] = clib_bihash_hash_40_48 (&bkey[1]);
852       hash[0] = clib_bihash_hash_40_48 (&bkey[0]);
853     }
854
855   while (n_left >= 8)
856     {
857       if (n_left >= 12)
858         {
859           vlib_prefetch_buffer_header (b[11], LOAD);
860           vlib_prefetch_buffer_header (b[10], LOAD);
861           vlib_prefetch_buffer_header (b[9], LOAD);
862           vlib_prefetch_buffer_header (b[8], LOAD);
863         }
864
865       rv[3] =
866         clib_bihash_search_inline_2_with_hash_40_48 (&cnat_session_db,
867                                                      hash[3], &bkey[3],
868                                                      &bvalue[3]);
869       session[3] = (cnat_session_t *) (rv[3] ? &bkey[3] : &bvalue[3]);
870       next[3] = cnat_sub (vm, node, b[3], &ctx, rv[3], session[3]);
871
872       rv[2] =
873         clib_bihash_search_inline_2_with_hash_40_48 (&cnat_session_db,
874                                                      hash[2], &bkey[2],
875                                                      &bvalue[2]);
876       session[2] = (cnat_session_t *) (rv[2] ? &bkey[2] : &bvalue[2]);
877       next[2] = cnat_sub (vm, node, b[2], &ctx, rv[2], session[2]);
878
879       rv[1] =
880         clib_bihash_search_inline_2_with_hash_40_48 (&cnat_session_db,
881                                                      hash[1], &bkey[1],
882                                                      &bvalue[1]);
883       session[1] = (cnat_session_t *) (rv[1] ? &bkey[1] : &bvalue[1]);
884       next[1] = cnat_sub (vm, node, b[1], &ctx, rv[1], session[1]);
885
886       rv[0] =
887         clib_bihash_search_inline_2_with_hash_40_48 (&cnat_session_db,
888                                                      hash[0], &bkey[0],
889                                                      &bvalue[0]);
890       session[0] = (cnat_session_t *) (rv[0] ? &bkey[0] : &bvalue[0]);
891       next[0] = cnat_sub (vm, node, b[0], &ctx, rv[0], session[0]);
892
893       cnat_session_make_key (b[7], af, &bkey[3]);
894       cnat_session_make_key (b[6], af, &bkey[2]);
895       cnat_session_make_key (b[5], af, &bkey[1]);
896       cnat_session_make_key (b[4], af, &bkey[0]);
897
898       hash[3] = clib_bihash_hash_40_48 (&bkey[3]);
899       hash[2] = clib_bihash_hash_40_48 (&bkey[2]);
900       hash[1] = clib_bihash_hash_40_48 (&bkey[1]);
901       hash[0] = clib_bihash_hash_40_48 (&bkey[0]);
902
903       clib_bihash_prefetch_bucket_40_48 (&cnat_session_db, hash[3]);
904       clib_bihash_prefetch_bucket_40_48 (&cnat_session_db, hash[2]);
905       clib_bihash_prefetch_bucket_40_48 (&cnat_session_db, hash[1]);
906       clib_bihash_prefetch_bucket_40_48 (&cnat_session_db, hash[0]);
907
908       clib_bihash_prefetch_data_40_48 (&cnat_session_db, hash[3]);
909       clib_bihash_prefetch_data_40_48 (&cnat_session_db, hash[2]);
910       clib_bihash_prefetch_data_40_48 (&cnat_session_db, hash[1]);
911       clib_bihash_prefetch_data_40_48 (&cnat_session_db, hash[0]);
912
913       b += 4;
914       next += 4;
915       n_left -= 4;
916     }
917
918   while (n_left > 0)
919     {
920       cnat_session_make_key (b[0], af, &bkey[0]);
921       rv[0] = clib_bihash_search_inline_2_40_48 (&cnat_session_db,
922                                                  &bkey[0], &bvalue[0]);
923
924       session[0] = (cnat_session_t *) (rv[0] ? &bkey[0] : &bvalue[0]);
925       next[0] = cnat_sub (vm, node, b[0], &ctx, rv[0], session[0]);
926
927       b++;
928       next++;
929       n_left--;
930     }
931
932   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
933
934   return frame->n_vectors;
935 }
936
937 /*
938  * fd.io coding-style-patch-verification: ON
939  *
940  * Local Variables:
941  * eval: (c-set-style "gnu")
942  * End:
943  */
944
945 #endif