nat: tweak rfc7857 tcp connection tracking
[vpp.git] / src / plugins / nat / nat44-ed / nat44_ed_inlines.h
1 /*
2  * Copyright (c) 2018 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15
16 /**
17  * @brief The NAT inline functions
18  */
19
20 #ifndef __included_nat44_ed_inlines_h__
21 #define __included_nat44_ed_inlines_h__
22
23 #include <float.h>
24 #include <vppinfra/clib.h>
25 #include <vnet/fib/ip4_fib.h>
26
27 #include <nat/lib/log.h>
28 #include <nat/lib/ipfix_logging.h>
29 #include <nat/nat44-ed/nat44_ed.h>
30
31 always_inline void
32 init_ed_k (clib_bihash_kv_16_8_t *kv, u32 l_addr, u16 l_port, u32 r_addr,
33            u16 r_port, u32 fib_index, ip_protocol_t proto)
34 {
35   kv->key[0] = (u64) r_addr << 32 | l_addr;
36   kv->key[1] =
37     (u64) r_port << 48 | (u64) l_port << 32 | fib_index << 8 | proto;
38 }
39
40 always_inline void
41 init_ed_kv (clib_bihash_kv_16_8_t *kv, u32 l_addr, u16 l_port, u32 r_addr,
42             u16 r_port, u32 fib_index, u8 proto, u32 thread_index,
43             u32 session_index)
44 {
45   init_ed_k (kv, l_addr, l_port, r_addr, r_port, fib_index, proto);
46   kv->value = (u64) thread_index << 32 | session_index;
47 }
48
49 always_inline void
50 nat44_ed_sm_init_i2o_kv (clib_bihash_kv_16_8_t *kv, u32 addr, u16 port,
51                          u32 fib_index, u8 proto, u32 sm_index)
52 {
53   return init_ed_kv (kv, addr, port, 0, 0, fib_index, proto, 0, sm_index);
54 }
55
56 always_inline void
57 nat44_ed_sm_init_o2i_kv (clib_bihash_kv_16_8_t *kv, u32 e_addr, u16 e_port,
58                          u32 fib_index, u8 proto, u32 sm_index)
59 {
60   return init_ed_kv (kv, 0, 0, e_addr, e_port, fib_index, proto, 0, sm_index);
61 }
62
63 always_inline void
64 nat44_ed_sm_init_i2o_k (clib_bihash_kv_16_8_t *kv, u32 addr, u16 port,
65                         u32 fib_index, u8 proto)
66 {
67   return nat44_ed_sm_init_i2o_kv (kv, addr, port, fib_index, proto, 0);
68 }
69
70 always_inline void
71 nat44_ed_sm_init_o2i_k (clib_bihash_kv_16_8_t *kv, u32 e_addr, u16 e_port,
72                         u32 fib_index, u8 proto)
73 {
74   return nat44_ed_sm_init_o2i_kv (kv, e_addr, e_port, fib_index, proto, 0);
75 }
76
77 always_inline u32
78 ed_value_get_thread_index (clib_bihash_kv_16_8_t *value)
79 {
80   return value->value >> 32;
81 }
82
83 always_inline u32
84 ed_value_get_session_index (clib_bihash_kv_16_8_t *value)
85 {
86   return value->value & ~(u32) 0;
87 }
88
89 always_inline void
90 split_ed_kv (clib_bihash_kv_16_8_t *kv, ip4_address_t *l_addr,
91              ip4_address_t *r_addr, u8 *proto, u32 *fib_index, u16 *l_port,
92              u16 *r_port)
93 {
94   if (l_addr)
95     {
96       l_addr->as_u32 = kv->key[0] & (u32) ~0;
97     }
98   if (r_addr)
99     {
100       r_addr->as_u32 = kv->key[0] >> 32;
101     }
102   if (r_port)
103     {
104       *r_port = kv->key[1] >> 48;
105     }
106   if (l_port)
107     {
108       *l_port = (kv->key[1] >> 32) & (u16) ~0;
109     }
110   if (fib_index)
111     {
112       *fib_index = (kv->key[1] >> 8) & ((1 << 24) - 1);
113     }
114   if (proto)
115     {
116       *proto = kv->key[1] & (u8) ~0;
117     }
118 }
119
120 static_always_inline int
121 nat_get_icmp_session_lookup_values (vlib_buffer_t *b, ip4_header_t *ip0,
122                                     ip4_address_t *lookup_saddr,
123                                     u16 *lookup_sport,
124                                     ip4_address_t *lookup_daddr,
125                                     u16 *lookup_dport, u8 *lookup_protocol)
126 {
127   icmp46_header_t *icmp0;
128   nat_icmp_echo_header_t *echo0, *inner_echo0 = 0;
129   ip4_header_t *inner_ip0 = 0;
130   void *l4_header = 0;
131   icmp46_header_t *inner_icmp0;
132
133   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
134   echo0 = (nat_icmp_echo_header_t *) (icmp0 + 1);
135
136   // avoid warning about unused variables in caller by setting to bogus values
137   *lookup_sport = 0;
138   *lookup_dport = 0;
139
140   if (!icmp_type_is_error_message (
141         vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags))
142     {
143       *lookup_protocol = IP_PROTOCOL_ICMP;
144       lookup_saddr->as_u32 = ip0->src_address.as_u32;
145       *lookup_sport = vnet_buffer (b)->ip.reass.l4_src_port;
146       lookup_daddr->as_u32 = ip0->dst_address.as_u32;
147       *lookup_dport = vnet_buffer (b)->ip.reass.l4_dst_port;
148     }
149   else
150     {
151       inner_ip0 = (ip4_header_t *) (echo0 + 1);
152       l4_header = ip4_next_header (inner_ip0);
153       *lookup_protocol = inner_ip0->protocol;
154       lookup_saddr->as_u32 = inner_ip0->dst_address.as_u32;
155       lookup_daddr->as_u32 = inner_ip0->src_address.as_u32;
156       switch (inner_ip0->protocol)
157         {
158         case IP_PROTOCOL_ICMP:
159           inner_icmp0 = (icmp46_header_t *) l4_header;
160           inner_echo0 = (nat_icmp_echo_header_t *) (inner_icmp0 + 1);
161           *lookup_sport = inner_echo0->identifier;
162           *lookup_dport = inner_echo0->identifier;
163           break;
164         case IP_PROTOCOL_UDP:
165         case IP_PROTOCOL_TCP:
166           *lookup_sport = ((nat_tcp_udp_header_t *) l4_header)->dst_port;
167           *lookup_dport = ((nat_tcp_udp_header_t *) l4_header)->src_port;
168           break;
169         default:
170           return NAT_IN2OUT_ED_ERROR_UNSUPPORTED_PROTOCOL;
171         }
172     }
173   return 0;
174 }
175
176 always_inline int
177 nat44_ed_tcp_is_established (nat44_ed_tcp_state_e state)
178 {
179   return state == NAT44_ED_TCP_STATE_ESTABLISHED ? 1 : 0;
180 }
181
182 always_inline u32
183 nat44_session_get_timeout (snat_main_t *sm, snat_session_t *s)
184 {
185   switch (s->proto)
186     {
187     case IP_PROTOCOL_ICMP:
188       /* fallthrough */
189     case IP_PROTOCOL_ICMP6:
190       return sm->timeouts.icmp;
191     case IP_PROTOCOL_UDP:
192       return sm->timeouts.udp;
193     case IP_PROTOCOL_TCP:
194       {
195         if (nat44_ed_tcp_is_established (s->tcp_state))
196           return sm->timeouts.tcp.established;
197         else
198           return sm->timeouts.tcp.transitory;
199       }
200     default:
201       return sm->timeouts.udp;
202     }
203
204   return 0;
205 }
206
207 static_always_inline u8
208 nat44_ed_maximum_sessions_exceeded (snat_main_t *sm, u32 fib_index,
209                                     u32 thread_index)
210 {
211   u32 translations;
212   translations = pool_elts (sm->per_thread_data[thread_index].sessions);
213   if (vec_len (sm->max_translations_per_fib) <= fib_index)
214     fib_index = 0;
215   return translations >= sm->max_translations_per_fib[fib_index];
216 }
217
218 static_always_inline int
219 nat_ed_lru_insert (snat_main_per_thread_data_t *tsm, snat_session_t *s,
220                    f64 now, u8 proto)
221 {
222   dlist_elt_t *lru_list_elt;
223   pool_get (tsm->lru_pool, lru_list_elt);
224   s->lru_index = lru_list_elt - tsm->lru_pool;
225   switch (proto)
226     {
227     case IP_PROTOCOL_UDP:
228       s->lru_head_index = tsm->udp_lru_head_index;
229       break;
230     case IP_PROTOCOL_TCP:
231       s->lru_head_index = tsm->tcp_trans_lru_head_index;
232       break;
233     case IP_PROTOCOL_ICMP:
234       s->lru_head_index = tsm->icmp_lru_head_index;
235       break;
236     default:
237       s->lru_head_index = tsm->unk_proto_lru_head_index;
238       break;
239     }
240   clib_dlist_addtail (tsm->lru_pool, s->lru_head_index, s->lru_index);
241   lru_list_elt->value = s - tsm->sessions;
242   s->last_lru_update = now;
243   return 1;
244 }
245
246 static_always_inline void
247 nat_6t_flow_to_ed_k (clib_bihash_kv_16_8_t *kv, nat_6t_flow_t *f)
248 {
249   init_ed_k (kv, f->match.saddr.as_u32, f->match.sport, f->match.daddr.as_u32,
250              f->match.dport, f->match.fib_index, f->match.proto);
251 }
252
253 static_always_inline void
254 nat_6t_flow_to_ed_kv (clib_bihash_kv_16_8_t *kv, nat_6t_flow_t *f,
255                       u32 thread_idx, u32 session_idx)
256 {
257   init_ed_kv (kv, f->match.saddr.as_u32, f->match.sport, f->match.daddr.as_u32,
258               f->match.dport, f->match.fib_index, f->match.proto, thread_idx,
259               session_idx);
260 }
261
262 static_always_inline int
263 nat_ed_ses_i2o_flow_hash_add_del (snat_main_t *sm, u32 thread_idx,
264                                   snat_session_t *s, int is_add)
265 {
266   snat_main_per_thread_data_t *tsm =
267     vec_elt_at_index (sm->per_thread_data, thread_idx);
268   clib_bihash_kv_16_8_t kv;
269   if (0 == is_add)
270     {
271       nat_6t_flow_to_ed_k (&kv, &s->i2o);
272     }
273   else
274     {
275       nat_6t_flow_to_ed_kv (&kv, &s->i2o, thread_idx, s - tsm->sessions);
276       nat_6t_l3_l4_csum_calc (&s->i2o);
277     }
278
279   ASSERT (thread_idx == s->thread_index);
280   return clib_bihash_add_del_16_8 (&sm->flow_hash, &kv, is_add);
281 }
282
283 static_always_inline int
284 nat_ed_ses_o2i_flow_hash_add_del (snat_main_t *sm, u32 thread_idx,
285                                   snat_session_t *s, int is_add)
286 {
287   snat_main_per_thread_data_t *tsm =
288     vec_elt_at_index (sm->per_thread_data, thread_idx);
289   clib_bihash_kv_16_8_t kv;
290   if (0 == is_add)
291     {
292       nat_6t_flow_to_ed_k (&kv, &s->o2i);
293     }
294   else
295     {
296       nat_6t_flow_to_ed_kv (&kv, &s->o2i, thread_idx, s - tsm->sessions);
297       if (!(s->flags & SNAT_SESSION_FLAG_STATIC_MAPPING))
298         {
299           if (nat44_ed_sm_o2i_lookup (sm, s->o2i.match.daddr,
300                                       s->o2i.match.dport, 0,
301                                       s->o2i.match.proto))
302             {
303               return -1;
304             }
305         }
306       nat_6t_l3_l4_csum_calc (&s->o2i);
307     }
308   ASSERT (thread_idx == s->thread_index);
309   return clib_bihash_add_del_16_8 (&sm->flow_hash, &kv, is_add);
310 }
311
312 always_inline void
313 nat_ed_session_delete (snat_main_t *sm, snat_session_t *ses, u32 thread_index,
314                        int lru_delete
315                        /* delete from global LRU list */)
316 {
317   snat_main_per_thread_data_t *tsm =
318     vec_elt_at_index (sm->per_thread_data, thread_index);
319
320   if (lru_delete)
321     {
322       clib_dlist_remove (tsm->lru_pool, ses->lru_index);
323     }
324   pool_put_index (tsm->lru_pool, ses->lru_index);
325   if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, ses, 0))
326     nat_elog_warn (sm, "flow hash del failed");
327   if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, ses, 0))
328     nat_elog_warn (sm, "flow hash del failed");
329   pool_put (tsm->sessions, ses);
330   vlib_set_simple_counter (&sm->total_sessions, thread_index, 0,
331                            pool_elts (tsm->sessions));
332 }
333
334 static_always_inline int
335 nat_lru_free_one_with_head (snat_main_t *sm, int thread_index, f64 now,
336                             u32 head_index)
337 {
338   snat_session_t *s = NULL;
339   dlist_elt_t *oldest_elt;
340   f64 sess_timeout_time;
341   u32 oldest_index;
342   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
343   oldest_index = clib_dlist_remove_head (tsm->lru_pool, head_index);
344   if (~0 != oldest_index)
345     {
346       oldest_elt = pool_elt_at_index (tsm->lru_pool, oldest_index);
347       s = pool_elt_at_index (tsm->sessions, oldest_elt->value);
348
349       sess_timeout_time =
350         s->last_heard + (f64) nat44_session_get_timeout (sm, s);
351       if (now >= sess_timeout_time)
352         {
353           nat44_ed_free_session_data (sm, s, thread_index, 0);
354           nat_ed_session_delete (sm, s, thread_index, 0);
355           return 1;
356         }
357       else
358         {
359           clib_dlist_addhead (tsm->lru_pool, head_index, oldest_index);
360         }
361     }
362   return 0;
363 }
364
365 static_always_inline int
366 nat_lru_free_one (snat_main_t *sm, int thread_index, f64 now)
367 {
368   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
369   int rc = 0;
370 #define _(p)                                                                  \
371   if ((rc = nat_lru_free_one_with_head (sm, thread_index, now,                \
372                                         tsm->p##_lru_head_index)))            \
373     {                                                                         \
374       return rc;                                                              \
375     }
376   _ (tcp_trans);
377   _ (udp);
378   _ (unk_proto);
379   _ (icmp);
380   _ (tcp_estab);
381 #undef _
382   return 0;
383 }
384
385 static_always_inline snat_session_t *
386 nat_ed_session_alloc (snat_main_t *sm, u32 thread_index, f64 now, u8 proto)
387 {
388   snat_session_t *s;
389   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
390
391   nat_lru_free_one (sm, thread_index, now);
392
393   pool_get (tsm->sessions, s);
394   clib_memset (s, 0, sizeof (*s));
395
396   nat_ed_lru_insert (tsm, s, now, proto);
397
398   s->ha_last_refreshed = now;
399   vlib_set_simple_counter (&sm->total_sessions, thread_index, 0,
400                            pool_elts (tsm->sessions));
401 #if CLIB_ASSERT_ENABLE
402   s->thread_index = thread_index;
403 #endif
404   return s;
405 }
406
407 // slow path
408 static_always_inline void
409 per_vrf_sessions_cleanup (u32 thread_index)
410 {
411   snat_main_t *sm = &snat_main;
412   snat_main_per_thread_data_t *tsm =
413     vec_elt_at_index (sm->per_thread_data, thread_index);
414   per_vrf_sessions_t *per_vrf_sessions;
415   u32 *to_free = 0, *i;
416
417   vec_foreach (per_vrf_sessions, tsm->per_vrf_sessions_vec)
418     {
419       if (per_vrf_sessions->expired)
420         {
421           if (per_vrf_sessions->ses_count == 0)
422             {
423               vec_add1 (to_free, per_vrf_sessions - tsm->per_vrf_sessions_vec);
424             }
425         }
426     }
427
428   if (vec_len (to_free))
429     {
430       vec_foreach (i, to_free)
431         {
432           vec_del1 (tsm->per_vrf_sessions_vec, *i);
433         }
434     }
435
436   vec_free (to_free);
437 }
438
439 // slow path
440 static_always_inline void
441 per_vrf_sessions_register_session (snat_session_t *s, u32 thread_index)
442 {
443   snat_main_t *sm = &snat_main;
444   snat_main_per_thread_data_t *tsm =
445     vec_elt_at_index (sm->per_thread_data, thread_index);
446   per_vrf_sessions_t *per_vrf_sessions;
447
448   per_vrf_sessions_cleanup (thread_index);
449
450   // s->per_vrf_sessions_index == ~0 ... reuse of old session
451
452   vec_foreach (per_vrf_sessions, tsm->per_vrf_sessions_vec)
453     {
454       // ignore already expired registrations
455       if (per_vrf_sessions->expired)
456         continue;
457
458       if ((s->in2out.fib_index == per_vrf_sessions->rx_fib_index) &&
459           (s->out2in.fib_index == per_vrf_sessions->tx_fib_index))
460         {
461           goto done;
462         }
463       if ((s->in2out.fib_index == per_vrf_sessions->tx_fib_index) &&
464           (s->out2in.fib_index == per_vrf_sessions->rx_fib_index))
465         {
466           goto done;
467         }
468     }
469
470   // create a new registration
471   vec_add2 (tsm->per_vrf_sessions_vec, per_vrf_sessions, 1);
472   clib_memset (per_vrf_sessions, 0, sizeof (*per_vrf_sessions));
473
474   per_vrf_sessions->rx_fib_index = s->in2out.fib_index;
475   per_vrf_sessions->tx_fib_index = s->out2in.fib_index;
476
477 done:
478   s->per_vrf_sessions_index = per_vrf_sessions - tsm->per_vrf_sessions_vec;
479   per_vrf_sessions->ses_count++;
480 }
481
482 // fast path
483 static_always_inline void
484 per_vrf_sessions_unregister_session (snat_session_t *s, u32 thread_index)
485 {
486   snat_main_t *sm = &snat_main;
487   snat_main_per_thread_data_t *tsm;
488   per_vrf_sessions_t *per_vrf_sessions;
489
490   ASSERT (s->per_vrf_sessions_index != ~0);
491
492   tsm = vec_elt_at_index (sm->per_thread_data, thread_index);
493   per_vrf_sessions =
494     vec_elt_at_index (tsm->per_vrf_sessions_vec, s->per_vrf_sessions_index);
495
496   ASSERT (per_vrf_sessions->ses_count != 0);
497
498   per_vrf_sessions->ses_count--;
499   s->per_vrf_sessions_index = ~0;
500 }
501
502 // fast path
503 static_always_inline u8
504 per_vrf_sessions_is_expired (snat_session_t *s, u32 thread_index)
505 {
506   snat_main_t *sm = &snat_main;
507   snat_main_per_thread_data_t *tsm;
508   per_vrf_sessions_t *per_vrf_sessions;
509
510   ASSERT (s->per_vrf_sessions_index != ~0);
511
512   tsm = vec_elt_at_index (sm->per_thread_data, thread_index);
513   per_vrf_sessions =
514     vec_elt_at_index (tsm->per_vrf_sessions_vec, s->per_vrf_sessions_index);
515   return per_vrf_sessions->expired;
516 }
517
518 static_always_inline void
519 nat_6t_flow_init (nat_6t_flow_t *f, u32 thread_idx, ip4_address_t saddr,
520                   u16 sport, ip4_address_t daddr, u16 dport, u32 fib_index,
521                   u8 proto, u32 session_idx)
522 {
523   clib_memset (f, 0, sizeof (*f));
524   f->match.saddr = saddr;
525   f->match.sport = sport;
526   f->match.daddr = daddr;
527   f->match.dport = dport;
528   f->match.proto = proto;
529   f->match.fib_index = fib_index;
530 }
531
532 static_always_inline void
533 nat_6t_i2o_flow_init (snat_main_t *sm, u32 thread_idx, snat_session_t *s,
534                       ip4_address_t saddr, u16 sport, ip4_address_t daddr,
535                       u16 dport, u32 fib_index, u8 proto)
536 {
537   snat_main_per_thread_data_t *tsm =
538     vec_elt_at_index (sm->per_thread_data, thread_idx);
539   nat_6t_flow_init (&s->i2o, thread_idx, saddr, sport, daddr, dport, fib_index,
540                     proto, s - tsm->sessions);
541 }
542
543 static_always_inline void
544 nat_6t_o2i_flow_init (snat_main_t *sm, u32 thread_idx, snat_session_t *s,
545                       ip4_address_t saddr, u16 sport, ip4_address_t daddr,
546                       u16 dport, u32 fib_index, u8 proto)
547 {
548   snat_main_per_thread_data_t *tsm =
549     vec_elt_at_index (sm->per_thread_data, thread_idx);
550   nat_6t_flow_init (&s->o2i, thread_idx, saddr, sport, daddr, dport, fib_index,
551                     proto, s - tsm->sessions);
552 }
553
554 static_always_inline int
555 nat_6t_t_eq (nat_6t_t *t1, nat_6t_t *t2)
556 {
557   return t1->as_u64[0] == t2->as_u64[0] && t1->as_u64[1] == t2->as_u64[1];
558 }
559
560 static inline uword
561 nat_pre_node_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
562                         vlib_frame_t *frame, u32 def_next)
563 {
564   u32 n_left_from, *from;
565
566   from = vlib_frame_vector_args (frame);
567   n_left_from = frame->n_vectors;
568
569   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
570   u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
571   vlib_get_buffers (vm, from, b, n_left_from);
572
573   while (n_left_from >= 2)
574     {
575       u32 next0, next1;
576       u32 arc_next0, arc_next1;
577       vlib_buffer_t *b0, *b1;
578
579       b0 = *b;
580       b++;
581       b1 = *b;
582       b++;
583
584       /* Prefetch next iteration. */
585       if (PREDICT_TRUE (n_left_from >= 4))
586         {
587           vlib_buffer_t *p2, *p3;
588
589           p2 = *b;
590           p3 = *(b + 1);
591
592           vlib_prefetch_buffer_header (p2, LOAD);
593           vlib_prefetch_buffer_header (p3, LOAD);
594
595           clib_prefetch_load (p2->data);
596           clib_prefetch_load (p3->data);
597         }
598
599       next0 = def_next;
600       next1 = def_next;
601
602       vnet_feature_next (&arc_next0, b0);
603       vnet_feature_next (&arc_next1, b1);
604
605       vnet_buffer2 (b0)->nat.arc_next = arc_next0;
606       vnet_buffer2 (b1)->nat.arc_next = arc_next1;
607
608       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
609         {
610           if (b0->flags & VLIB_BUFFER_IS_TRACED)
611             {
612               nat_pre_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
613               t->next_index = next0;
614               t->arc_next_index = arc_next0;
615             }
616           if (b1->flags & VLIB_BUFFER_IS_TRACED)
617             {
618               nat_pre_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
619               t->next_index = next1;
620               t->arc_next_index = arc_next1;
621             }
622         }
623
624       n_left_from -= 2;
625       next[0] = next0;
626       next[1] = next1;
627       next += 2;
628     }
629
630   while (n_left_from > 0)
631     {
632       u32 next0;
633       u32 arc_next0;
634       vlib_buffer_t *b0;
635
636       b0 = *b;
637       b++;
638
639       next0 = def_next;
640       vnet_feature_next (&arc_next0, b0);
641       vnet_buffer2 (b0)->nat.arc_next = arc_next0;
642
643       if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
644                          (b0->flags & VLIB_BUFFER_IS_TRACED)))
645         {
646           nat_pre_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
647           t->next_index = next0;
648           t->arc_next_index = arc_next0;
649         }
650
651       n_left_from--;
652       next[0] = next0;
653       next++;
654     }
655   vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
656                                frame->n_vectors);
657
658   return frame->n_vectors;
659 }
660
661 static_always_inline u16
662 snat_random_port (u16 min, u16 max)
663 {
664   snat_main_t *sm = &snat_main;
665   u32 rwide;
666   u16 r;
667
668   rwide = random_u32 (&sm->random_seed);
669   r = rwide & 0xFFFF;
670   if (r >= min && r <= max)
671     return r;
672
673   return min + (rwide % (max - min + 1));
674 }
675
676 always_inline u8
677 is_interface_addr (snat_main_t *sm, vlib_node_runtime_t *node,
678                    u32 sw_if_index0, u32 ip4_addr)
679 {
680   snat_runtime_t *rt = (snat_runtime_t *) node->runtime_data;
681   u8 ip4_addr_exists;
682
683   if (PREDICT_FALSE (rt->cached_sw_if_index != sw_if_index0))
684     {
685       ip_lookup_main_t *lm = &sm->ip4_main->lookup_main;
686       ip_interface_address_t *ia;
687       ip4_address_t *a;
688
689       rt->cached_sw_if_index = ~0;
690       hash_free (rt->cached_presence_by_ip4_address);
691
692       foreach_ip_interface_address (
693         lm, ia, sw_if_index0, 1 /* honor unnumbered */, ({
694           a = ip_interface_address_get_address (lm, ia);
695           hash_set (rt->cached_presence_by_ip4_address, a->as_u32, 1);
696           rt->cached_sw_if_index = sw_if_index0;
697         }));
698
699       if (rt->cached_sw_if_index == ~0)
700         return 0;
701     }
702
703   ip4_addr_exists = !!hash_get (rt->cached_presence_by_ip4_address, ip4_addr);
704   if (PREDICT_FALSE (ip4_addr_exists))
705     return 1;
706   else
707     return 0;
708 }
709
710 always_inline void
711 nat44_ed_session_reopen (u32 thread_index, snat_session_t *s)
712 {
713   nat_syslog_nat44_sdel (0, s->in2out.fib_index, &s->in2out.addr,
714                          s->in2out.port, &s->ext_host_nat_addr,
715                          s->ext_host_nat_port, &s->out2in.addr, s->out2in.port,
716                          &s->ext_host_addr, s->ext_host_port, s->proto,
717                          nat44_ed_is_twice_nat_session (s));
718
719   nat_ipfix_logging_nat44_ses_delete (
720     thread_index, s->in2out.addr.as_u32, s->out2in.addr.as_u32, s->proto,
721     s->in2out.port, s->out2in.port, s->in2out.fib_index);
722   nat_ipfix_logging_nat44_ses_create (
723     thread_index, s->in2out.addr.as_u32, s->out2in.addr.as_u32, s->proto,
724     s->in2out.port, s->out2in.port, s->in2out.fib_index);
725
726   nat_syslog_nat44_sadd (0, s->in2out.fib_index, &s->in2out.addr,
727                          s->in2out.port, &s->ext_host_nat_addr,
728                          s->ext_host_nat_port, &s->out2in.addr, s->out2in.port,
729                          &s->ext_host_addr, s->ext_host_port, s->proto, 0);
730   s->total_pkts = 0;
731   s->total_bytes = 0;
732 }
733
734 /*
735  * "Some rise by SYN, and some by virtue FIN" - William Shakespeare
736  * TCP state tracking patterned after RFC 7857 (and RFC 6146, which is
737  * referenced by RFC 7857). In contrast to the state machine in RFC7857 we only
738  * transition to ESTABLISHED state after seeing a full 3-way handshake (SYNs
739  * and ACKs in both directions). RFC7857 as a means of protecting against
740  * spurious RSTs closing a session, goes back to ESTABLISHED if a data packet
741  * is received after the RST. This state machine will leave the state in
742  * transitory if RST is seen. Our implementation also goes beyond by supporting
743  * creation of a new session while old session is in transitory timeout after
744  * seeing FIN packets from both sides.
745  */
746 always_inline void
747 nat44_set_tcp_session_state (snat_main_t *sm, f64 now, snat_session_t *ses,
748                              u8 tcp_flags, u32 thread_index,
749                              nat44_ed_dir_e dir)
750 {
751   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
752   u8 old_flags = ses->tcp_flags[dir];
753   ses->tcp_flags[dir] |=
754     tcp_flags & (TCP_FLAG_FIN | TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_ACK);
755   if (old_flags == ses->tcp_flags[dir])
756     return;
757
758   u8 old_state = ses->tcp_state;
759
760   switch (old_state)
761     {
762     case NAT44_ED_TCP_STATE_CLOSED:
763       // ESTABLISHED when a SYN and ACK is seen from both sides
764       if ((ses->tcp_flags[NAT44_ED_DIR_I2O] &
765            ses->tcp_flags[NAT44_ED_DIR_O2I]) == (TCP_FLAG_SYN | TCP_FLAG_ACK))
766         {
767           ses->tcp_state = NAT44_ED_TCP_STATE_ESTABLISHED;
768           ses->lru_head_index = tsm->tcp_estab_lru_head_index;
769         }
770       break;
771     case NAT44_ED_TCP_STATE_ESTABLISHED:
772       // CLOSING when a FIN is seen from either side or session has been RST
773       if ((ses->tcp_flags[dir] & TCP_FLAG_FIN) ||
774           (ses->tcp_flags[dir] & TCP_FLAG_RST))
775         {
776           ses->tcp_state = NAT44_ED_TCP_STATE_CLOSING;
777           ses->tcp_flags[NAT44_ED_DIR_I2O] = 0;
778           ses->tcp_flags[NAT44_ED_DIR_O2I] = 0;
779           // need to update last heard otherwise session might get
780           // immediately timed out if it has been idle longer than
781           // transitory timeout
782           ses->last_heard = now;
783           ses->lru_head_index = tsm->tcp_trans_lru_head_index;
784         }
785       break;
786     case NAT44_ED_TCP_STATE_CLOSING:
787       // Allow a transitory session to reopen
788       if ((ses->tcp_flags[NAT44_ED_DIR_I2O] &
789            ses->tcp_flags[NAT44_ED_DIR_O2I]) == (TCP_FLAG_SYN | TCP_FLAG_ACK))
790         {
791           nat44_ed_session_reopen (thread_index, ses);
792           ses->tcp_state = NAT44_ED_TCP_STATE_ESTABLISHED;
793           ses->lru_head_index = tsm->tcp_estab_lru_head_index;
794         }
795       break;
796     }
797   if (old_state == ses->tcp_state)
798     return;
799   ses->last_lru_update = now;
800   clib_dlist_remove (tsm->lru_pool, ses->lru_index);
801   clib_dlist_addtail (tsm->lru_pool, ses->lru_head_index, ses->lru_index);
802 }
803
804 always_inline void
805 nat44_set_tcp_session_state_i2o (snat_main_t *sm, f64 now, snat_session_t *ses,
806                                  u8 tcp_flags, u32 thread_index)
807 {
808   return nat44_set_tcp_session_state (sm, now, ses, tcp_flags, thread_index,
809                                       NAT44_ED_DIR_I2O);
810 }
811
812 always_inline void
813 nat44_set_tcp_session_state_o2i (snat_main_t *sm, f64 now, snat_session_t *ses,
814                                  u8 tcp_flags, u32 thread_index)
815 {
816   return nat44_set_tcp_session_state (sm, now, ses, tcp_flags, thread_index,
817                                       NAT44_ED_DIR_O2I);
818 }
819
820 always_inline void
821 nat44_session_update_counters (snat_session_t *s, f64 now, uword bytes,
822                                u32 thread_index)
823 {
824   // regardless of TCP state, reset the timer if data packet is seen.
825   s->last_heard = now;
826   s->total_pkts++;
827   s->total_bytes += bytes;
828 }
829
830 /** \brief Per-user LRU list maintenance */
831 always_inline void
832 nat44_session_update_lru (snat_main_t *sm, snat_session_t *s, u32 thread_index)
833 {
834   /* don't update too often - timeout is in magnitude of seconds anyway */
835   if (s->last_heard > s->last_lru_update + 1)
836     {
837       clib_dlist_remove (sm->per_thread_data[thread_index].lru_pool,
838                          s->lru_index);
839       clib_dlist_addtail (sm->per_thread_data[thread_index].lru_pool,
840                           s->lru_head_index, s->lru_index);
841       s->last_lru_update = s->last_heard;
842     }
843 }
844
845 static_always_inline int
846 nat44_ed_is_unk_proto (u8 proto)
847 {
848   static const int lookup_table[256] = {
849     [IP_PROTOCOL_TCP] = 1,
850     [IP_PROTOCOL_UDP] = 1,
851     [IP_PROTOCOL_ICMP] = 1,
852     [IP_PROTOCOL_ICMP6] = 1,
853   };
854
855   return 1 - lookup_table[proto];
856 }
857
858 #endif /* __included_nat44_ed_inlines_h__ */
859
860 /*
861  * fd.io coding-style-patch-verification: ON
862  *
863  * Local Variables:
864  * eval: (c-set-style "gnu")
865  * End:
866  */