cnat: Fix throttle hash & cleanup
[vpp.git] / src / plugins / cnat / cnat_node.h
1 /*
2  * Copyright (c) 2020 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 #ifndef __CNAT_NODE_H__
17 #define __CNAT_NODE_H__
18
19 #include <vlibmemory/api.h>
20 #include <cnat/cnat_session.h>
21 #include <cnat/cnat_client.h>
22 #include <cnat/cnat_inline.h>
23
24 typedef uword (*cnat_node_sub_t) (vlib_main_t * vm,
25                                   vlib_node_runtime_t * node,
26                                   vlib_buffer_t * b,
27                                   cnat_node_ctx_t * ctx, int rv,
28                                   cnat_session_t * session);
29
30 static_always_inline u8
31 icmp_type_is_error_message (u8 icmp_type)
32 {
33   switch (icmp_type)
34     {
35     case ICMP4_destination_unreachable:
36     case ICMP4_time_exceeded:
37     case ICMP4_parameter_problem:
38     case ICMP4_source_quench:
39     case ICMP4_redirect:
40     case ICMP4_alternate_host_address:
41       return 1;
42     }
43   return 0;
44 }
45
46 static_always_inline u8
47 icmp_type_is_echo (u8 icmp_type)
48 {
49   switch (icmp_type)
50     {
51     case ICMP4_echo_request:
52     case ICMP4_echo_reply:
53       return 1;
54     }
55   return 0;
56 }
57
58 static_always_inline u8
59 icmp6_type_is_echo (u8 icmp_type)
60 {
61   switch (icmp_type)
62     {
63     case ICMP6_echo_request:
64     case ICMP6_echo_reply:
65       return 1;
66     }
67   return 0;
68 }
69
70 static_always_inline u8
71 icmp6_type_is_error_message (u8 icmp_type)
72 {
73   switch (icmp_type)
74     {
75     case ICMP6_destination_unreachable:
76     case ICMP6_time_exceeded:
77     case ICMP6_parameter_problem:
78       return 1;
79     }
80   return 0;
81 }
82
83 static_always_inline u8
84 cmp_ip6_address (const ip6_address_t * a1, const ip6_address_t * a2)
85 {
86   return ((a1->as_u64[0] == a2->as_u64[0])
87           && (a1->as_u64[1] == a2->as_u64[1]));
88 }
89
90 /**
91  * Inline translation functions
92  */
93
94 static_always_inline u8
95 has_ip6_address (ip6_address_t * a)
96 {
97   return ((0 != a->as_u64[0]) || (0 != a->as_u64[1]));
98 }
99
100 static_always_inline void
101 cnat_ip4_translate_l4 (ip4_header_t * ip4, udp_header_t * udp,
102                        ip_csum_t * sum,
103                        ip4_address_t new_addr[VLIB_N_DIR],
104                        u16 new_port[VLIB_N_DIR])
105 {
106   u16 old_port[VLIB_N_DIR];
107   ip4_address_t old_addr[VLIB_N_DIR];
108
109   /* Fastpath no checksum */
110   if (PREDICT_TRUE (0 == *sum))
111     {
112       udp->dst_port = new_port[VLIB_TX];
113       udp->src_port = new_port[VLIB_RX];
114       return;
115     }
116
117   old_port[VLIB_TX] = udp->dst_port;
118   old_port[VLIB_RX] = udp->src_port;
119   old_addr[VLIB_TX] = ip4->dst_address;
120   old_addr[VLIB_RX] = ip4->src_address;
121
122   if (new_addr[VLIB_TX].as_u32)
123     {
124       *sum =
125         ip_csum_update (*sum, old_addr[VLIB_TX].as_u32,
126                         new_addr[VLIB_TX].as_u32, ip4_header_t, dst_address);
127     }
128   if (new_port[VLIB_TX])
129     {
130       udp->dst_port = new_port[VLIB_TX];
131       *sum = ip_csum_update (*sum, old_port[VLIB_TX], new_port[VLIB_TX],
132                              ip4_header_t /* cheat */ ,
133                              length /* changed member */ );
134     }
135   if (new_addr[VLIB_RX].as_u32)
136     {
137       *sum =
138         ip_csum_update (*sum, old_addr[VLIB_RX].as_u32,
139                         new_addr[VLIB_RX].as_u32, ip4_header_t, src_address);
140     }
141   if (new_port[VLIB_RX])
142     {
143       udp->src_port = new_port[VLIB_RX];
144       *sum = ip_csum_update (*sum, old_port[VLIB_RX], new_port[VLIB_RX],
145                              ip4_header_t /* cheat */ ,
146                              length /* changed member */ );
147     }
148 }
149
150 static_always_inline void
151 cnat_ip4_translate_l3 (ip4_header_t * ip4, ip4_address_t new_addr[VLIB_N_DIR])
152 {
153   ip4_address_t old_addr[VLIB_N_DIR];
154   ip_csum_t sum;
155
156   old_addr[VLIB_TX] = ip4->dst_address;
157   old_addr[VLIB_RX] = ip4->src_address;
158
159   sum = ip4->checksum;
160   if (new_addr[VLIB_TX].as_u32)
161     {
162       ip4->dst_address = new_addr[VLIB_TX];
163       sum =
164         ip_csum_update (sum, old_addr[VLIB_TX].as_u32,
165                         new_addr[VLIB_TX].as_u32, ip4_header_t, dst_address);
166     }
167   if (new_addr[VLIB_RX].as_u32)
168     {
169       ip4->src_address = new_addr[VLIB_RX];
170       sum =
171         ip_csum_update (sum, old_addr[VLIB_RX].as_u32,
172                         new_addr[VLIB_RX].as_u32, ip4_header_t, src_address);
173     }
174   ip4->checksum = ip_csum_fold (sum);
175 }
176
177 static_always_inline void
178 cnat_tcp_update_session_lifetime (tcp_header_t * tcp, u32 index)
179 {
180   cnat_main_t *cm = &cnat_main;
181   if (PREDICT_FALSE (tcp_fin (tcp)))
182     {
183       cnat_timestamp_set_lifetime (index, CNAT_DEFAULT_TCP_RST_TIMEOUT);
184     }
185
186   if (PREDICT_FALSE (tcp_rst (tcp)))
187     {
188       cnat_timestamp_set_lifetime (index, CNAT_DEFAULT_TCP_RST_TIMEOUT);
189     }
190
191   if (PREDICT_FALSE (tcp_syn (tcp) && tcp_ack (tcp)))
192     {
193       cnat_timestamp_set_lifetime (index, cm->tcp_max_age);
194     }
195 }
196
197 static_always_inline void
198 cnat_translation_icmp4_echo (ip4_header_t * ip4, icmp46_header_t * icmp,
199                              ip4_address_t new_addr[VLIB_N_DIR],
200                              u16 new_port[VLIB_N_DIR])
201 {
202   ip_csum_t sum;
203   u16 old_port;
204   cnat_echo_header_t *echo = (cnat_echo_header_t *) (icmp + 1);
205
206   cnat_ip4_translate_l3 (ip4, new_addr);
207   old_port = echo->identifier;
208   echo->identifier = new_port[VLIB_RX];
209
210   sum = icmp->checksum;
211   sum = ip_csum_update (sum, old_port, new_port[VLIB_RX],
212                         ip4_header_t /* cheat */ ,
213                         length /* changed member */ );
214
215   icmp->checksum = ip_csum_fold (sum);
216 }
217
218 static_always_inline void
219 cnat_translation_icmp4_error (ip4_header_t * outer_ip4,
220                               icmp46_header_t * icmp,
221                               ip4_address_t outer_new_addr[VLIB_N_DIR],
222                               u16 outer_new_port[VLIB_N_DIR],
223                               u8 snat_outer_ip)
224 {
225   ip4_address_t new_addr[VLIB_N_DIR];
226   ip4_address_t old_addr[VLIB_N_DIR];
227   u16 new_port[VLIB_N_DIR];
228   u16 old_port[VLIB_N_DIR];
229   ip_csum_t sum, old_ip_sum, inner_l4_sum, inner_l4_old_sum;
230
231   ip4_header_t *ip4 = (ip4_header_t *) (icmp + 2);
232   udp_header_t *udp = (udp_header_t *) (ip4 + 1);
233   tcp_header_t *tcp = (tcp_header_t *) udp;
234
235   /* Swap inner ports */
236   new_addr[VLIB_TX] = outer_new_addr[VLIB_RX];
237   new_addr[VLIB_RX] = outer_new_addr[VLIB_TX];
238   new_port[VLIB_TX] = outer_new_port[VLIB_RX];
239   new_port[VLIB_RX] = outer_new_port[VLIB_TX];
240
241   old_addr[VLIB_TX] = ip4->dst_address;
242   old_addr[VLIB_RX] = ip4->src_address;
243   old_port[VLIB_RX] = udp->src_port;
244   old_port[VLIB_TX] = udp->dst_port;
245
246   sum = icmp->checksum;
247   old_ip_sum = ip4->checksum;
248
249   /* translate outer ip. */
250   if (!snat_outer_ip)
251     outer_new_addr[VLIB_RX] = outer_ip4->src_address;
252   cnat_ip4_translate_l3 (outer_ip4, outer_new_addr);
253
254   if (ip4->protocol == IP_PROTOCOL_TCP)
255     {
256       inner_l4_old_sum = inner_l4_sum = tcp->checksum;
257       cnat_ip4_translate_l4 (ip4, udp, &inner_l4_sum, new_addr, new_port);
258       tcp->checksum = ip_csum_fold (inner_l4_sum);
259     }
260   else if (ip4->protocol == IP_PROTOCOL_UDP)
261     {
262       inner_l4_old_sum = inner_l4_sum = udp->checksum;
263       cnat_ip4_translate_l4 (ip4, udp, &inner_l4_sum, new_addr, new_port);
264       udp->checksum = ip_csum_fold (inner_l4_sum);
265     }
266   else
267     return;
268
269   /* UDP/TCP checksum changed */
270   sum = ip_csum_update (sum, inner_l4_old_sum, inner_l4_sum,
271                         ip4_header_t, checksum);
272
273   /* UDP/TCP Ports changed */
274   if (old_port[VLIB_TX] && new_port[VLIB_TX])
275     sum = ip_csum_update (sum, old_port[VLIB_TX], new_port[VLIB_TX],
276                           ip4_header_t /* cheat */ ,
277                           length /* changed member */ );
278
279   if (old_port[VLIB_RX] && new_port[VLIB_RX])
280     sum = ip_csum_update (sum, old_port[VLIB_RX], new_port[VLIB_RX],
281                           ip4_header_t /* cheat */ ,
282                           length /* changed member */ );
283
284
285   cnat_ip4_translate_l3 (ip4, new_addr);
286   ip_csum_t new_ip_sum = ip4->checksum;
287   /* IP checksum changed */
288   sum = ip_csum_update (sum, old_ip_sum, new_ip_sum, ip4_header_t, checksum);
289
290   /* IP src/dst addr changed */
291   if (new_addr[VLIB_TX].as_u32)
292     sum =
293       ip_csum_update (sum, old_addr[VLIB_TX].as_u32, new_addr[VLIB_TX].as_u32,
294                       ip4_header_t, dst_address);
295
296   if (new_addr[VLIB_RX].as_u32)
297     sum =
298       ip_csum_update (sum, old_addr[VLIB_RX].as_u32, new_addr[VLIB_RX].as_u32,
299                       ip4_header_t, src_address);
300
301   icmp->checksum = ip_csum_fold (sum);
302 }
303
304 static_always_inline void
305 cnat_translation_ip4 (const cnat_session_t * session,
306                       ip4_header_t * ip4, udp_header_t * udp)
307 {
308   tcp_header_t *tcp = (tcp_header_t *) udp;
309   ip4_address_t new_addr[VLIB_N_DIR];
310   u16 new_port[VLIB_N_DIR];
311
312   new_addr[VLIB_TX] = session->value.cs_ip[VLIB_TX].ip4;
313   new_addr[VLIB_RX] = session->value.cs_ip[VLIB_RX].ip4;
314   new_port[VLIB_TX] = session->value.cs_port[VLIB_TX];
315   new_port[VLIB_RX] = session->value.cs_port[VLIB_RX];
316
317   if (ip4->protocol == IP_PROTOCOL_TCP)
318     {
319       ip_csum_t sum = tcp->checksum;
320       cnat_ip4_translate_l4 (ip4, udp, &sum, new_addr, new_port);
321       tcp->checksum = ip_csum_fold (sum);
322       cnat_ip4_translate_l3 (ip4, new_addr);
323       cnat_tcp_update_session_lifetime (tcp, session->value.cs_ts_index);
324     }
325   else if (ip4->protocol == IP_PROTOCOL_UDP)
326     {
327       ip_csum_t sum = udp->checksum;
328       cnat_ip4_translate_l4 (ip4, udp, &sum, new_addr, new_port);
329       udp->checksum = ip_csum_fold (sum);
330       cnat_ip4_translate_l3 (ip4, new_addr);
331     }
332   else if (ip4->protocol == IP_PROTOCOL_ICMP)
333     {
334       icmp46_header_t *icmp = (icmp46_header_t *) udp;
335       if (icmp_type_is_error_message (icmp->type))
336         {
337           /* SNAT only if src_addr was translated */
338           u8 snat_outer_ip =
339             (ip4->src_address.as_u32 ==
340              session->key.cs_ip[VLIB_RX].ip4.as_u32);
341           cnat_translation_icmp4_error (ip4, icmp, new_addr, new_port,
342                                         snat_outer_ip);
343         }
344       else if (icmp_type_is_echo (icmp->type))
345         cnat_translation_icmp4_echo (ip4, icmp, new_addr, new_port);
346     }
347 }
348
349 static_always_inline void
350 cnat_ip6_translate_l3 (ip6_header_t * ip6, ip6_address_t new_addr[VLIB_N_DIR])
351 {
352   if (has_ip6_address (&new_addr[VLIB_TX]))
353     ip6_address_copy (&ip6->dst_address, &new_addr[VLIB_TX]);
354   if (has_ip6_address (&new_addr[VLIB_RX]))
355     ip6_address_copy (&ip6->src_address, &new_addr[VLIB_RX]);
356 }
357
358 static_always_inline void
359 cnat_ip6_translate_l4 (ip6_header_t * ip6, udp_header_t * udp,
360                        ip_csum_t * sum,
361                        ip6_address_t new_addr[VLIB_N_DIR],
362                        u16 new_port[VLIB_N_DIR])
363 {
364   u16 old_port[VLIB_N_DIR];
365   ip6_address_t old_addr[VLIB_N_DIR];
366
367   /* Fastpath no checksum */
368   if (PREDICT_TRUE (0 == *sum))
369     {
370       udp->dst_port = new_port[VLIB_TX];
371       udp->src_port = new_port[VLIB_RX];
372       return;
373     }
374
375   old_port[VLIB_TX] = udp->dst_port;
376   old_port[VLIB_RX] = udp->src_port;
377   ip6_address_copy (&old_addr[VLIB_TX], &ip6->dst_address);
378   ip6_address_copy (&old_addr[VLIB_RX], &ip6->src_address);
379
380   if (has_ip6_address (&new_addr[VLIB_TX]))
381     {
382       *sum = ip_csum_add_even (*sum, new_addr[VLIB_TX].as_u64[0]);
383       *sum = ip_csum_add_even (*sum, new_addr[VLIB_TX].as_u64[1]);
384       *sum = ip_csum_sub_even (*sum, old_addr[VLIB_TX].as_u64[0]);
385       *sum = ip_csum_sub_even (*sum, old_addr[VLIB_TX].as_u64[1]);
386     }
387
388   if (new_port[VLIB_TX])
389     {
390       udp->dst_port = new_port[VLIB_TX];
391       *sum = ip_csum_update (*sum, old_port[VLIB_TX], new_port[VLIB_TX],
392                              ip4_header_t /* cheat */ ,
393                              length /* changed member */ );
394     }
395   if (has_ip6_address (&new_addr[VLIB_RX]))
396     {
397       *sum = ip_csum_add_even (*sum, new_addr[VLIB_RX].as_u64[0]);
398       *sum = ip_csum_add_even (*sum, new_addr[VLIB_RX].as_u64[1]);
399       *sum = ip_csum_sub_even (*sum, old_addr[VLIB_RX].as_u64[0]);
400       *sum = ip_csum_sub_even (*sum, old_addr[VLIB_RX].as_u64[1]);
401     }
402
403   if (new_port[VLIB_RX])
404     {
405       udp->src_port = new_port[VLIB_RX];
406       *sum = ip_csum_update (*sum, old_port[VLIB_RX], new_port[VLIB_RX],
407                              ip4_header_t /* cheat */ ,
408                              length /* changed member */ );
409     }
410 }
411
412 static_always_inline void
413 cnat_translation_icmp6_echo (ip6_header_t * ip6, icmp46_header_t * icmp,
414                              ip6_address_t new_addr[VLIB_N_DIR],
415                              u16 new_port[VLIB_N_DIR])
416 {
417   cnat_echo_header_t *echo = (cnat_echo_header_t *) (icmp + 1);
418   ip6_address_t old_addr[VLIB_N_DIR];
419   ip_csum_t sum;
420   u16 old_port;
421   old_port = echo->identifier;
422   ip6_address_copy (&old_addr[VLIB_TX], &ip6->dst_address);
423   ip6_address_copy (&old_addr[VLIB_RX], &ip6->src_address);
424
425   sum = icmp->checksum;
426
427   cnat_ip6_translate_l3 (ip6, new_addr);
428   if (has_ip6_address (&new_addr[VLIB_TX]))
429     {
430       sum = ip_csum_add_even (sum, new_addr[VLIB_TX].as_u64[0]);
431       sum = ip_csum_add_even (sum, new_addr[VLIB_TX].as_u64[1]);
432       sum = ip_csum_sub_even (sum, old_addr[VLIB_TX].as_u64[0]);
433       sum = ip_csum_sub_even (sum, old_addr[VLIB_TX].as_u64[1]);
434     }
435
436   if (has_ip6_address (&new_addr[VLIB_RX]))
437     {
438       sum = ip_csum_add_even (sum, new_addr[VLIB_RX].as_u64[0]);
439       sum = ip_csum_add_even (sum, new_addr[VLIB_RX].as_u64[1]);
440       sum = ip_csum_sub_even (sum, old_addr[VLIB_RX].as_u64[0]);
441       sum = ip_csum_sub_even (sum, old_addr[VLIB_RX].as_u64[1]);
442     }
443
444   echo->identifier = new_port[VLIB_RX];
445   sum = ip_csum_update (sum, old_port, new_port[VLIB_RX],
446                         ip4_header_t /* cheat */ ,
447                         length /* changed member */ );
448
449   icmp->checksum = ip_csum_fold (sum);
450 }
451
452 static_always_inline void
453 cnat_translation_icmp6_error (ip6_header_t * outer_ip6,
454                               icmp46_header_t * icmp,
455                               ip6_address_t outer_new_addr[VLIB_N_DIR],
456                               u16 outer_new_port[VLIB_N_DIR],
457                               u8 snat_outer_ip)
458 {
459   ip6_address_t new_addr[VLIB_N_DIR];
460   ip6_address_t old_addr[VLIB_N_DIR];
461   ip6_address_t outer_old_addr[VLIB_N_DIR];
462   u16 new_port[VLIB_N_DIR];
463   u16 old_port[VLIB_N_DIR];
464   ip_csum_t sum, inner_l4_sum, inner_l4_old_sum;
465
466   if (!icmp6_type_is_error_message (icmp->type))
467     return;
468
469   ip6_header_t *ip6 = (ip6_header_t *) (icmp + 2);
470   udp_header_t *udp = (udp_header_t *) (ip6 + 1);
471   tcp_header_t *tcp = (tcp_header_t *) udp;
472
473   /* Swap inner ports */
474   ip6_address_copy (&new_addr[VLIB_RX], &outer_new_addr[VLIB_TX]);
475   ip6_address_copy (&new_addr[VLIB_TX], &outer_new_addr[VLIB_RX]);
476   new_port[VLIB_TX] = outer_new_port[VLIB_RX];
477   new_port[VLIB_RX] = outer_new_port[VLIB_TX];
478
479   ip6_address_copy (&old_addr[VLIB_TX], &ip6->dst_address);
480   ip6_address_copy (&old_addr[VLIB_RX], &ip6->src_address);
481   old_port[VLIB_RX] = udp->src_port;
482   old_port[VLIB_TX] = udp->dst_port;
483
484   sum = icmp->checksum;
485   /* Translate outer ip */
486   ip6_address_copy (&outer_old_addr[VLIB_TX], &outer_ip6->dst_address);
487   ip6_address_copy (&outer_old_addr[VLIB_RX], &outer_ip6->src_address);
488   if (!snat_outer_ip)
489     ip6_address_copy (&outer_new_addr[VLIB_RX], &outer_ip6->src_address);
490   cnat_ip6_translate_l3 (outer_ip6, outer_new_addr);
491   if (has_ip6_address (&outer_new_addr[VLIB_TX]))
492     {
493       sum = ip_csum_add_even (sum, outer_new_addr[VLIB_TX].as_u64[0]);
494       sum = ip_csum_add_even (sum, outer_new_addr[VLIB_TX].as_u64[1]);
495       sum = ip_csum_sub_even (sum, outer_old_addr[VLIB_TX].as_u64[0]);
496       sum = ip_csum_sub_even (sum, outer_old_addr[VLIB_TX].as_u64[1]);
497     }
498
499   if (has_ip6_address (&outer_new_addr[VLIB_RX]))
500     {
501       sum = ip_csum_add_even (sum, outer_new_addr[VLIB_RX].as_u64[0]);
502       sum = ip_csum_add_even (sum, outer_new_addr[VLIB_RX].as_u64[1]);
503       sum = ip_csum_sub_even (sum, outer_old_addr[VLIB_RX].as_u64[0]);
504       sum = ip_csum_sub_even (sum, outer_old_addr[VLIB_RX].as_u64[1]);
505     }
506
507   /* Translate inner TCP / UDP */
508   if (ip6->protocol == IP_PROTOCOL_TCP)
509     {
510       inner_l4_old_sum = inner_l4_sum = tcp->checksum;
511       cnat_ip6_translate_l4 (ip6, udp, &inner_l4_sum, new_addr, new_port);
512       tcp->checksum = ip_csum_fold (inner_l4_sum);
513     }
514   else if (ip6->protocol == IP_PROTOCOL_UDP)
515     {
516       inner_l4_old_sum = inner_l4_sum = udp->checksum;
517       cnat_ip6_translate_l4 (ip6, udp, &inner_l4_sum, new_addr, new_port);
518       udp->checksum = ip_csum_fold (inner_l4_sum);
519     }
520   else
521     return;
522
523   /* UDP/TCP checksum changed */
524   sum = ip_csum_update (sum, inner_l4_old_sum, inner_l4_sum,
525                         ip4_header_t /* cheat */ ,
526                         checksum);
527
528   /* UDP/TCP Ports changed */
529   if (old_port[VLIB_TX] && new_port[VLIB_TX])
530     sum = ip_csum_update (sum, old_port[VLIB_TX], new_port[VLIB_TX],
531                           ip4_header_t /* cheat */ ,
532                           length /* changed member */ );
533
534   if (old_port[VLIB_RX] && new_port[VLIB_RX])
535     sum = ip_csum_update (sum, old_port[VLIB_RX], new_port[VLIB_RX],
536                           ip4_header_t /* cheat */ ,
537                           length /* changed member */ );
538
539
540   cnat_ip6_translate_l3 (ip6, new_addr);
541   /* IP src/dst addr changed */
542   if (has_ip6_address (&new_addr[VLIB_TX]))
543     {
544       sum = ip_csum_add_even (sum, new_addr[VLIB_TX].as_u64[0]);
545       sum = ip_csum_add_even (sum, new_addr[VLIB_TX].as_u64[1]);
546       sum = ip_csum_sub_even (sum, old_addr[VLIB_TX].as_u64[0]);
547       sum = ip_csum_sub_even (sum, old_addr[VLIB_TX].as_u64[1]);
548     }
549
550   if (has_ip6_address (&new_addr[VLIB_RX]))
551     {
552       sum = ip_csum_add_even (sum, new_addr[VLIB_RX].as_u64[0]);
553       sum = ip_csum_add_even (sum, new_addr[VLIB_RX].as_u64[1]);
554       sum = ip_csum_sub_even (sum, old_addr[VLIB_RX].as_u64[0]);
555       sum = ip_csum_sub_even (sum, old_addr[VLIB_RX].as_u64[1]);
556     }
557
558   icmp->checksum = ip_csum_fold (sum);
559 }
560
561 static_always_inline void
562 cnat_translation_ip6 (const cnat_session_t * session,
563                       ip6_header_t * ip6, udp_header_t * udp)
564 {
565   tcp_header_t *tcp = (tcp_header_t *) udp;
566   ip6_address_t new_addr[VLIB_N_DIR];
567   u16 new_port[VLIB_N_DIR];
568
569   ip6_address_copy (&new_addr[VLIB_TX], &session->value.cs_ip[VLIB_TX].ip6);
570   ip6_address_copy (&new_addr[VLIB_RX], &session->value.cs_ip[VLIB_RX].ip6);
571   new_port[VLIB_TX] = session->value.cs_port[VLIB_TX];
572   new_port[VLIB_RX] = session->value.cs_port[VLIB_RX];
573
574   if (ip6->protocol == IP_PROTOCOL_TCP)
575     {
576       ip_csum_t sum = tcp->checksum;
577       cnat_ip6_translate_l4 (ip6, udp, &sum, new_addr, new_port);
578       tcp->checksum = ip_csum_fold (sum);
579       cnat_ip6_translate_l3 (ip6, new_addr);
580       cnat_tcp_update_session_lifetime (tcp, session->value.cs_ts_index);
581     }
582   else if (ip6->protocol == IP_PROTOCOL_UDP)
583     {
584       ip_csum_t sum = udp->checksum;
585       cnat_ip6_translate_l4 (ip6, udp, &sum, new_addr, new_port);
586       udp->checksum = ip_csum_fold (sum);
587       cnat_ip6_translate_l3 (ip6, new_addr);
588     }
589   else if (ip6->protocol == IP_PROTOCOL_ICMP6)
590     {
591       icmp46_header_t *icmp = (icmp46_header_t *) udp;
592       if (icmp6_type_is_error_message (icmp->type))
593         {
594           /* SNAT only if src_addr was translated */
595           u8 snat_outer_ip = cmp_ip6_address (&ip6->src_address,
596                                               &session->key.
597                                               cs_ip[VLIB_RX].ip6);
598           cnat_translation_icmp6_error (ip6, icmp, new_addr, new_port,
599                                         snat_outer_ip);
600         }
601       else if (icmp6_type_is_echo (icmp->type))
602         cnat_translation_icmp6_echo (ip6, icmp, new_addr, new_port);
603     }
604 }
605
606 static_always_inline void
607 cnat_session_make_key (vlib_buffer_t * b, ip_address_family_t af,
608                        clib_bihash_kv_40_48_t * bkey)
609 {
610   udp_header_t *udp;
611   cnat_session_t *session = (cnat_session_t *) bkey;
612   session->key.cs_af = af;
613   session->key.__cs_pad[0] = 0;
614   session->key.__cs_pad[1] = 0;
615   if (AF_IP4 == af)
616     {
617       ip4_header_t *ip4;
618       ip4 = vlib_buffer_get_current (b);
619       if (PREDICT_FALSE (ip4->protocol == IP_PROTOCOL_ICMP))
620         {
621           icmp46_header_t *icmp = (icmp46_header_t *) (ip4 + 1);
622           if (icmp_type_is_error_message (icmp->type))
623             {
624               ip4 = (ip4_header_t *) (icmp + 2);        /* Use inner packet */
625               udp = (udp_header_t *) (ip4 + 1);
626               /* Swap dst & src for search as ICMP payload is reversed */
627               ip46_address_set_ip4 (&session->key.cs_ip[VLIB_RX],
628                                     &ip4->dst_address);
629               ip46_address_set_ip4 (&session->key.cs_ip[VLIB_TX],
630                                     &ip4->src_address);
631               session->key.cs_proto = ip4->protocol;
632               session->key.cs_port[VLIB_TX] = udp->src_port;
633               session->key.cs_port[VLIB_RX] = udp->dst_port;
634             }
635           else if (icmp_type_is_echo (icmp->type))
636             {
637               cnat_echo_header_t *echo = (cnat_echo_header_t *) (icmp + 1);
638               ip46_address_set_ip4 (&session->key.cs_ip[VLIB_TX],
639                                     &ip4->dst_address);
640               ip46_address_set_ip4 (&session->key.cs_ip[VLIB_RX],
641                                     &ip4->src_address);
642               session->key.cs_proto = ip4->protocol;
643               session->key.cs_port[VLIB_TX] = echo->identifier;
644               session->key.cs_port[VLIB_RX] = echo->identifier;
645             }
646           else
647             goto error;
648         }
649       else
650         {
651           udp = (udp_header_t *) (ip4 + 1);
652           ip46_address_set_ip4 (&session->key.cs_ip[VLIB_TX],
653                                 &ip4->dst_address);
654           ip46_address_set_ip4 (&session->key.cs_ip[VLIB_RX],
655                                 &ip4->src_address);
656           session->key.cs_proto = ip4->protocol;
657           session->key.cs_port[VLIB_RX] = udp->src_port;
658           session->key.cs_port[VLIB_TX] = udp->dst_port;
659         }
660
661     }
662   else
663     {
664       ip6_header_t *ip6;
665       ip6 = vlib_buffer_get_current (b);
666       if (PREDICT_FALSE (ip6->protocol == IP_PROTOCOL_ICMP6))
667         {
668           icmp46_header_t *icmp = (icmp46_header_t *) (ip6 + 1);
669           if (icmp6_type_is_error_message (icmp->type))
670             {
671               ip6 = (ip6_header_t *) (icmp + 2);        /* Use inner packet */
672               udp = (udp_header_t *) (ip6 + 1);
673               /* Swap dst & src for search as ICMP payload is reversed */
674               ip46_address_set_ip6 (&session->key.cs_ip[VLIB_RX],
675                                     &ip6->dst_address);
676               ip46_address_set_ip6 (&session->key.cs_ip[VLIB_TX],
677                                     &ip6->src_address);
678               session->key.cs_proto = ip6->protocol;
679               session->key.cs_port[VLIB_TX] = udp->src_port;
680               session->key.cs_port[VLIB_RX] = udp->dst_port;
681             }
682           else if (icmp6_type_is_echo (icmp->type))
683             {
684               cnat_echo_header_t *echo = (cnat_echo_header_t *) (icmp + 1);
685               ip46_address_set_ip6 (&session->key.cs_ip[VLIB_TX],
686                                     &ip6->dst_address);
687               ip46_address_set_ip6 (&session->key.cs_ip[VLIB_RX],
688                                     &ip6->src_address);
689               session->key.cs_proto = ip6->protocol;
690               session->key.cs_port[VLIB_TX] = echo->identifier;
691               session->key.cs_port[VLIB_RX] = echo->identifier;
692             }
693           else
694             goto error;
695         }
696       else
697         {
698           udp = (udp_header_t *) (ip6 + 1);
699           ip46_address_set_ip6 (&session->key.cs_ip[VLIB_TX],
700                                 &ip6->dst_address);
701           ip46_address_set_ip6 (&session->key.cs_ip[VLIB_RX],
702                                 &ip6->src_address);
703           session->key.cs_port[VLIB_RX] = udp->src_port;
704           session->key.cs_port[VLIB_TX] = udp->dst_port;
705           session->key.cs_proto = ip6->protocol;
706         }
707     }
708   return;
709
710 error:
711   /* Ensure we dont find anything */
712   session->key.cs_proto = 0;
713   return;
714 }
715
716 /**
717  * Create NAT sessions
718  */
719
720 static_always_inline void
721 cnat_session_create (cnat_session_t * session, cnat_node_ctx_t * ctx,
722                      u8 rsession_flags)
723 {
724   cnat_client_t *cc;
725   clib_bihash_kv_40_48_t rkey;
726   cnat_session_t *rsession = (cnat_session_t *) & rkey;
727   clib_bihash_kv_40_48_t *bkey = (clib_bihash_kv_40_48_t *) session;
728   clib_bihash_kv_40_48_t rvalue;
729   int rv;
730
731   session->value.cs_ts_index = cnat_timestamp_new (ctx->now);
732   clib_bihash_add_del_40_48 (&cnat_session_db, bkey, 1);
733
734   /* is this the first time we've seen this source address */
735   cc = (AF_IP4 == ctx->af ?
736         cnat_client_ip4_find (&session->value.cs_ip[VLIB_RX].ip4) :
737         cnat_client_ip6_find (&session->value.cs_ip[VLIB_RX].ip6));
738
739   if (NULL == cc)
740     {
741       ip_address_t addr;
742       uword *p;
743       u32 refcnt;
744
745       addr.version = ctx->af;
746       ip46_address_copy (&addr.ip, &session->value.cs_ip[VLIB_RX]);
747
748       /* Throttle */
749       clib_spinlock_lock (&cnat_client_db.throttle_lock);
750
751       p = hash_get_mem (cnat_client_db.throttle_mem, &addr);
752       if (p)
753         {
754           refcnt = p[0] + 1;
755           hash_set_mem (cnat_client_db.throttle_mem, &addr, refcnt);
756         }
757       else
758         hash_set_mem_alloc (&cnat_client_db.throttle_mem, &addr, 0);
759
760       clib_spinlock_unlock (&cnat_client_db.throttle_lock);
761
762       /* fire client create to the main thread */
763       if (!p)
764         vl_api_rpc_call_main_thread (cnat_client_learn, (u8 *) &addr,
765                                      sizeof (addr));
766     }
767   else
768     {
769       /* Refcount reverse session */
770       cnat_client_cnt_session (cc);
771     }
772
773   /* create the reverse flow key */
774   ip46_address_copy (&rsession->key.cs_ip[VLIB_RX],
775                      &session->value.cs_ip[VLIB_TX]);
776   ip46_address_copy (&rsession->key.cs_ip[VLIB_TX],
777                      &session->value.cs_ip[VLIB_RX]);
778   rsession->key.cs_proto = session->key.cs_proto;
779   rsession->key.__cs_pad[0] = 0;
780   rsession->key.__cs_pad[1] = 0;
781   rsession->key.cs_af = ctx->af;
782   rsession->key.cs_port[VLIB_RX] = session->value.cs_port[VLIB_TX];
783   rsession->key.cs_port[VLIB_TX] = session->value.cs_port[VLIB_RX];
784
785   /* First search for existing reverse session */
786   rv = clib_bihash_search_inline_2_40_48 (&cnat_session_db, &rkey, &rvalue);
787   if (!rv)
788     {
789       /* Reverse session already exists
790          cleanup before creating for refcnts */
791       cnat_session_t *found_rsession = (cnat_session_t *) & rvalue;
792       cnat_session_free (found_rsession);
793     }
794   /* add the reverse flow */
795   ip46_address_copy (&rsession->value.cs_ip[VLIB_RX],
796                      &session->key.cs_ip[VLIB_TX]);
797   ip46_address_copy (&rsession->value.cs_ip[VLIB_TX],
798                      &session->key.cs_ip[VLIB_RX]);
799   rsession->value.cs_ts_index = session->value.cs_ts_index;
800   rsession->value.cs_lbi = INDEX_INVALID;
801   rsession->value.flags = rsession_flags;
802   rsession->value.cs_port[VLIB_TX] = session->key.cs_port[VLIB_RX];
803   rsession->value.cs_port[VLIB_RX] = session->key.cs_port[VLIB_TX];
804
805   clib_bihash_add_del_40_48 (&cnat_session_db, &rkey, 1);
806 }
807
808 always_inline uword
809 cnat_node_inline (vlib_main_t * vm,
810                   vlib_node_runtime_t * node,
811                   vlib_frame_t * frame,
812                   cnat_node_sub_t cnat_sub,
813                   ip_address_family_t af, u8 do_trace)
814 {
815   u32 n_left, *from, thread_index;
816   vlib_buffer_t *bufs[VLIB_FRAME_SIZE];
817   vlib_buffer_t **b = bufs;
818   u16 nexts[VLIB_FRAME_SIZE], *next;
819   f64 now;
820
821   thread_index = vm->thread_index;
822   from = vlib_frame_vector_args (frame);
823   n_left = frame->n_vectors;
824   next = nexts;
825   vlib_get_buffers (vm, from, bufs, n_left);
826   now = vlib_time_now (vm);
827   cnat_session_t *session[4];
828   clib_bihash_kv_40_48_t bkey[4], bvalue[4];
829   u64 hash[4];
830   int rv[4];
831
832   cnat_node_ctx_t ctx = { now, thread_index, af, do_trace };
833
834   if (n_left >= 8)
835     {
836       /* Kickstart our state */
837       cnat_session_make_key (b[3], af, &bkey[3]);
838       cnat_session_make_key (b[2], af, &bkey[2]);
839       cnat_session_make_key (b[1], af, &bkey[1]);
840       cnat_session_make_key (b[0], af, &bkey[0]);
841
842       hash[3] = clib_bihash_hash_40_48 (&bkey[3]);
843       hash[2] = clib_bihash_hash_40_48 (&bkey[2]);
844       hash[1] = clib_bihash_hash_40_48 (&bkey[1]);
845       hash[0] = clib_bihash_hash_40_48 (&bkey[0]);
846     }
847
848   while (n_left >= 8)
849     {
850       if (n_left >= 12)
851         {
852           vlib_prefetch_buffer_header (b[11], LOAD);
853           vlib_prefetch_buffer_header (b[10], LOAD);
854           vlib_prefetch_buffer_header (b[9], LOAD);
855           vlib_prefetch_buffer_header (b[8], LOAD);
856         }
857
858       rv[3] =
859         clib_bihash_search_inline_2_with_hash_40_48 (&cnat_session_db,
860                                                      hash[3], &bkey[3],
861                                                      &bvalue[3]);
862       session[3] = (cnat_session_t *) (rv[3] ? &bkey[3] : &bvalue[3]);
863       next[3] = cnat_sub (vm, node, b[3], &ctx, rv[3], session[3]);
864
865       rv[2] =
866         clib_bihash_search_inline_2_with_hash_40_48 (&cnat_session_db,
867                                                      hash[2], &bkey[2],
868                                                      &bvalue[2]);
869       session[2] = (cnat_session_t *) (rv[2] ? &bkey[2] : &bvalue[2]);
870       next[2] = cnat_sub (vm, node, b[2], &ctx, rv[2], session[2]);
871
872       rv[1] =
873         clib_bihash_search_inline_2_with_hash_40_48 (&cnat_session_db,
874                                                      hash[1], &bkey[1],
875                                                      &bvalue[1]);
876       session[1] = (cnat_session_t *) (rv[1] ? &bkey[1] : &bvalue[1]);
877       next[1] = cnat_sub (vm, node, b[1], &ctx, rv[1], session[1]);
878
879       rv[0] =
880         clib_bihash_search_inline_2_with_hash_40_48 (&cnat_session_db,
881                                                      hash[0], &bkey[0],
882                                                      &bvalue[0]);
883       session[0] = (cnat_session_t *) (rv[0] ? &bkey[0] : &bvalue[0]);
884       next[0] = cnat_sub (vm, node, b[0], &ctx, rv[0], session[0]);
885
886       cnat_session_make_key (b[7], af, &bkey[3]);
887       cnat_session_make_key (b[6], af, &bkey[2]);
888       cnat_session_make_key (b[5], af, &bkey[1]);
889       cnat_session_make_key (b[4], af, &bkey[0]);
890
891       hash[3] = clib_bihash_hash_40_48 (&bkey[3]);
892       hash[2] = clib_bihash_hash_40_48 (&bkey[2]);
893       hash[1] = clib_bihash_hash_40_48 (&bkey[1]);
894       hash[0] = clib_bihash_hash_40_48 (&bkey[0]);
895
896       clib_bihash_prefetch_bucket_40_48 (&cnat_session_db, hash[3]);
897       clib_bihash_prefetch_bucket_40_48 (&cnat_session_db, hash[2]);
898       clib_bihash_prefetch_bucket_40_48 (&cnat_session_db, hash[1]);
899       clib_bihash_prefetch_bucket_40_48 (&cnat_session_db, hash[0]);
900
901       clib_bihash_prefetch_data_40_48 (&cnat_session_db, hash[3]);
902       clib_bihash_prefetch_data_40_48 (&cnat_session_db, hash[2]);
903       clib_bihash_prefetch_data_40_48 (&cnat_session_db, hash[1]);
904       clib_bihash_prefetch_data_40_48 (&cnat_session_db, hash[0]);
905
906       b += 4;
907       next += 4;
908       n_left -= 4;
909     }
910
911   while (n_left > 0)
912     {
913       cnat_session_make_key (b[0], af, &bkey[0]);
914       rv[0] = clib_bihash_search_inline_2_40_48 (&cnat_session_db,
915                                                  &bkey[0], &bvalue[0]);
916
917       session[0] = (cnat_session_t *) (rv[0] ? &bkey[0] : &bvalue[0]);
918       next[0] = cnat_sub (vm, node, b[0], &ctx, rv[0], session[0]);
919
920       b++;
921       next++;
922       n_left--;
923     }
924
925   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
926
927   return frame->n_vectors;
928 }
929
930 /*
931  * fd.io coding-style-patch-verification: ON
932  *
933  * Local Variables:
934  * eval: (c-set-style "gnu")
935  * End:
936  */
937
938 #endif