NAT: VPP-1537 IPFIX per worker processing
[vpp.git] / src / plugins / nat / nat44_classify.c
1 /*
2  * Copyright (c) 2018 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /**
16  * @file
17  * @brief Classify for one armed NAT44 (in+out interface)
18  */
19
20 #include <vlib/vlib.h>
21 #include <vnet/vnet.h>
22 #include <vnet/fib/ip4_fib.h>
23 #include <nat/nat.h>
24 #include <nat/nat_reass.h>
25 #include <nat/nat_inlines.h>
26
27 vlib_node_registration_t nat44_classify_node;
28 vlib_node_registration_t nat44_ed_classify_node;
29 vlib_node_registration_t nat44_det_classify_node;
30 vlib_node_registration_t nat44_handoff_classify_node;
31
32 #define foreach_nat44_classify_error                      \
33 _(MAX_REASS, "Maximum reassemblies exceeded")             \
34 _(MAX_FRAG, "Maximum fragments per reassembly exceeded")  \
35 _(NEXT_IN2OUT, "next in2out")                             \
36 _(NEXT_OUT2IN, "next out2in")                             \
37 _(FRAG_CACHED, "fragment cached")
38
39 typedef enum
40 {
41 #define _(sym,str) NAT44_CLASSIFY_ERROR_##sym,
42   foreach_nat44_classify_error
43 #undef _
44     NAT44_CLASSIFY_N_ERROR,
45 } nat44_classify_error_t;
46
47 static char *nat44_classify_error_strings[] = {
48 #define _(sym,string) string,
49   foreach_nat44_classify_error
50 #undef _
51 };
52
53 typedef enum
54 {
55   NAT44_CLASSIFY_NEXT_IN2OUT,
56   NAT44_CLASSIFY_NEXT_OUT2IN,
57   NAT44_CLASSIFY_NEXT_DROP,
58   NAT44_CLASSIFY_N_NEXT,
59 } nat44_classify_next_t;
60
61 typedef struct
62 {
63   u8 next_in2out;
64   u8 cached;
65 } nat44_classify_trace_t;
66
67 static u8 *
68 format_nat44_classify_trace (u8 * s, va_list * args)
69 {
70   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
71   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
72   nat44_classify_trace_t *t = va_arg (*args, nat44_classify_trace_t *);
73   char *next;
74
75   if (t->cached)
76     s = format (s, "nat44-classify: fragment cached");
77   else
78     {
79       next = t->next_in2out ? "nat44-in2out" : "nat44-out2in";
80       s = format (s, "nat44-classify: next %s", next);
81     }
82
83   return s;
84 }
85
86 static inline uword
87 nat44_classify_node_fn_inline (vlib_main_t * vm,
88                                vlib_node_runtime_t * node,
89                                vlib_frame_t * frame, int is_ed)
90 {
91   u32 n_left_from, *from, *to_next;
92   nat44_classify_next_t next_index;
93   snat_main_t *sm = &snat_main;
94   snat_static_mapping_t *m;
95   u32 thread_index = vm->thread_index;
96   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
97   u32 *fragments_to_drop = 0;
98   u32 *fragments_to_loopback = 0;
99   u32 next_in2out = 0, next_out2in = 0, frag_cached = 0;
100
101   from = vlib_frame_vector_args (frame);
102   n_left_from = frame->n_vectors;
103   next_index = node->cached_next_index;
104
105   while (n_left_from > 0)
106     {
107       u32 n_left_to_next;
108
109       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
110
111       while (n_left_from > 0 && n_left_to_next > 0)
112         {
113           u32 bi0;
114           vlib_buffer_t *b0;
115           u32 next0 = NAT44_CLASSIFY_NEXT_IN2OUT, sw_if_index0, rx_fib_index0;
116           ip4_header_t *ip0;
117           snat_address_t *ap;
118           snat_session_key_t m_key0;
119           clib_bihash_kv_8_8_t kv0, value0;
120           clib_bihash_kv_16_8_t ed_kv0, ed_value0;
121           udp_header_t *udp0;
122           nat_reass_ip4_t *reass0;
123           u8 cached0 = 0;
124
125           /* speculatively enqueue b0 to the current next frame */
126           bi0 = from[0];
127           to_next[0] = bi0;
128           from += 1;
129           to_next += 1;
130           n_left_from -= 1;
131           n_left_to_next -= 1;
132
133           b0 = vlib_get_buffer (vm, bi0);
134           ip0 = vlib_buffer_get_current (b0);
135           udp0 = ip4_next_header (ip0);
136
137           if (is_ed && ip0->protocol != IP_PROTOCOL_ICMP)
138             {
139               if (!ip4_is_fragment (ip0) || ip4_is_first_fragment (ip0))
140                 {
141                   /* process leading fragment/whole packet (with L4 header) */
142                   sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
143                   rx_fib_index0 =
144                     fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
145                                                          sw_if_index0);
146                   make_ed_kv (&ed_kv0, &ip0->src_address, &ip0->dst_address,
147                               ip0->protocol, rx_fib_index0, udp0->src_port,
148                               udp0->dst_port);
149                   if (ip4_is_fragment (ip0))
150                     {
151                       reass0 = nat_ip4_reass_find_or_create (ip0->src_address,
152                                                              ip0->dst_address,
153                                                              ip0->fragment_id,
154                                                              ip0->protocol,
155                                                              1,
156                                                              &fragments_to_drop);
157                       if (PREDICT_FALSE (!reass0))
158                         {
159                           next0 = NAT44_CLASSIFY_NEXT_DROP;
160                           b0->error =
161                             node->errors[NAT44_CLASSIFY_ERROR_MAX_REASS];
162                           nat_log_notice ("maximum reassemblies exceeded");
163                           goto enqueue0;
164                         }
165                       if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &ed_kv0,
166                                                     &ed_value0))
167                         {
168                           /* session exists so classify as IN2OUT,
169                            * save this information for future fragments and set
170                            * past fragments to be looped over and reprocessed */
171                           reass0->sess_index = ed_value0.value;
172                           reass0->classify_next =
173                             NAT_REASS_IP4_CLASSIFY_NEXT_IN2OUT;
174                           nat_ip4_reass_get_frags (reass0,
175                                                    &fragments_to_loopback);
176                           goto enqueue0;
177                         }
178                       else
179                         {
180                           /* session doesn't exist so continue in the code,
181                            * save this information for future fragments and set
182                            * past fragments to be looped over and reprocessed */
183                           reass0->flags |=
184                             NAT_REASS_FLAG_CLASSIFY_ED_CONTINUE;
185                           nat_ip4_reass_get_frags (reass0,
186                                                    &fragments_to_loopback);
187                         }
188                     }
189                   else
190                     {
191                       /* process whole packet */
192                       if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &ed_kv0,
193                                                     &ed_value0))
194                         goto enqueue0;
195                       /* session doesn't exist so continue in code */
196                     }
197                 }
198               else
199                 {
200                   /* process non-first fragment */
201                   reass0 = nat_ip4_reass_find_or_create (ip0->src_address,
202                                                          ip0->dst_address,
203                                                          ip0->fragment_id,
204                                                          ip0->protocol,
205                                                          1,
206                                                          &fragments_to_drop);
207                   if (PREDICT_FALSE (!reass0))
208                     {
209                       next0 = NAT44_CLASSIFY_NEXT_DROP;
210                       b0->error =
211                         node->errors[NAT44_CLASSIFY_ERROR_MAX_REASS];
212                       nat_log_notice ("maximum reassemblies exceeded");
213                       goto enqueue0;
214                     }
215                   /* check if first fragment has arrived */
216                   if (reass0->classify_next == NAT_REASS_IP4_CLASSIFY_NONE &&
217                       !(reass0->flags & NAT_REASS_FLAG_CLASSIFY_ED_CONTINUE))
218                     {
219                       /* first fragment still hasn't arrived, cache this fragment */
220                       if (nat_ip4_reass_add_fragment
221                           (thread_index, reass0, bi0, &fragments_to_drop))
222                         {
223                           b0->error =
224                             node->errors[NAT44_CLASSIFY_ERROR_MAX_FRAG];
225                           nat_log_notice
226                             ("maximum fragments per reassembly exceeded");
227                           next0 = NAT44_CLASSIFY_NEXT_DROP;
228                           goto enqueue0;
229                         }
230                       cached0 = 1;
231                       goto enqueue0;
232                     }
233                   if (reass0->classify_next ==
234                       NAT_REASS_IP4_CLASSIFY_NEXT_IN2OUT)
235                     goto enqueue0;
236                   /* flag NAT_REASS_FLAG_CLASSIFY_ED_CONTINUE is set
237                    * so keep the default next0 and continue in code to
238                    * potentially find other classification for this packet */
239                 }
240             }
241
242           /* *INDENT-OFF* */
243           vec_foreach (ap, sm->addresses)
244             {
245               if (ip0->dst_address.as_u32 == ap->addr.as_u32)
246                 {
247                   next0 = NAT44_CLASSIFY_NEXT_OUT2IN;
248                   goto enqueue0;
249                 }
250             }
251           /* *INDENT-ON* */
252
253           if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
254             {
255               m_key0.addr = ip0->dst_address;
256               m_key0.port = 0;
257               m_key0.protocol = 0;
258               m_key0.fib_index = 0;
259               kv0.key = m_key0.as_u64;
260               /* try to classify the fragment based on IP header alone */
261               if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external,
262                                            &kv0, &value0))
263                 {
264                   m = pool_elt_at_index (sm->static_mappings, value0.value);
265                   if (m->local_addr.as_u32 != m->external_addr.as_u32)
266                     next0 = NAT44_CLASSIFY_NEXT_OUT2IN;
267                   goto enqueue0;
268                 }
269               if (!ip4_is_fragment (ip0) || ip4_is_first_fragment (ip0))
270                 {
271                   /* process leading fragment/whole packet (with L4 header) */
272                   m_key0.port = clib_net_to_host_u16 (udp0->dst_port);
273                   m_key0.protocol = ip_proto_to_snat_proto (ip0->protocol);
274                   kv0.key = m_key0.as_u64;
275                   if (!clib_bihash_search_8_8
276                       (&sm->static_mapping_by_external, &kv0, &value0))
277                     {
278                       m =
279                         pool_elt_at_index (sm->static_mappings, value0.value);
280                       if (m->local_addr.as_u32 != m->external_addr.as_u32)
281                         next0 = NAT44_CLASSIFY_NEXT_OUT2IN;
282                     }
283                   if (ip4_is_fragment (ip0))
284                     {
285                       reass0 = nat_ip4_reass_find_or_create (ip0->src_address,
286                                                              ip0->dst_address,
287                                                              ip0->fragment_id,
288                                                              ip0->protocol,
289                                                              1,
290                                                              &fragments_to_drop);
291                       if (PREDICT_FALSE (!reass0))
292                         {
293                           next0 = NAT44_CLASSIFY_NEXT_DROP;
294                           b0->error =
295                             node->errors[NAT44_CLASSIFY_ERROR_MAX_REASS];
296                           nat_log_notice ("maximum reassemblies exceeded");
297                           goto enqueue0;
298                         }
299                       /* save classification for future fragments and set past
300                        * fragments to be looped over and reprocessed */
301                       if (next0 == NAT44_CLASSIFY_NEXT_OUT2IN)
302                         reass0->classify_next =
303                           NAT_REASS_IP4_CLASSIFY_NEXT_OUT2IN;
304                       else
305                         reass0->classify_next =
306                           NAT_REASS_IP4_CLASSIFY_NEXT_IN2OUT;
307                       nat_ip4_reass_get_frags (reass0,
308                                                &fragments_to_loopback);
309                     }
310                 }
311               else
312                 {
313                   /* process non-first fragment */
314                   reass0 = nat_ip4_reass_find_or_create (ip0->src_address,
315                                                          ip0->dst_address,
316                                                          ip0->fragment_id,
317                                                          ip0->protocol,
318                                                          1,
319                                                          &fragments_to_drop);
320                   if (PREDICT_FALSE (!reass0))
321                     {
322                       next0 = NAT44_CLASSIFY_NEXT_DROP;
323                       b0->error =
324                         node->errors[NAT44_CLASSIFY_ERROR_MAX_REASS];
325                       nat_log_notice ("maximum reassemblies exceeded");
326                       goto enqueue0;
327                     }
328                   if (reass0->classify_next == NAT_REASS_IP4_CLASSIFY_NONE)
329                     /* first fragment still hasn't arrived */
330                     {
331                       if (nat_ip4_reass_add_fragment
332                           (thread_index, reass0, bi0, &fragments_to_drop))
333                         {
334                           b0->error =
335                             node->errors[NAT44_CLASSIFY_ERROR_MAX_FRAG];
336                           nat_log_notice
337                             ("maximum fragments per reassembly exceeded");
338                           next0 = NAT44_CLASSIFY_NEXT_DROP;
339                           goto enqueue0;
340                         }
341                       cached0 = 1;
342                       goto enqueue0;
343                     }
344                   else if (reass0->classify_next ==
345                            NAT_REASS_IP4_CLASSIFY_NEXT_OUT2IN)
346                     next0 = NAT44_CLASSIFY_NEXT_OUT2IN;
347                   else if (reass0->classify_next ==
348                            NAT_REASS_IP4_CLASSIFY_NEXT_IN2OUT)
349                     next0 = NAT44_CLASSIFY_NEXT_IN2OUT;
350                 }
351             }
352
353         enqueue0:
354           if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
355                              && (b0->flags & VLIB_BUFFER_IS_TRACED)))
356             {
357               nat44_classify_trace_t *t =
358                 vlib_add_trace (vm, node, b0, sizeof (*t));
359               t->cached = cached0;
360               if (!cached0)
361                 t->next_in2out = next0 == NAT44_CLASSIFY_NEXT_IN2OUT ? 1 : 0;
362             }
363
364           if (cached0)
365             {
366               n_left_to_next++;
367               to_next--;
368               frag_cached++;
369             }
370           else
371             {
372               next_in2out += next0 == NAT44_CLASSIFY_NEXT_IN2OUT;
373               next_out2in += next0 == NAT44_CLASSIFY_NEXT_OUT2IN;
374
375               /* verify speculative enqueue, maybe switch current next frame */
376               vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
377                                                to_next, n_left_to_next,
378                                                bi0, next0);
379             }
380
381           if (n_left_from == 0 && vec_len (fragments_to_loopback))
382             {
383               from = vlib_frame_vector_args (frame);
384               u32 len = vec_len (fragments_to_loopback);
385               if (len <= VLIB_FRAME_SIZE)
386                 {
387                   clib_memcpy_fast (from, fragments_to_loopback,
388                                     sizeof (u32) * len);
389                   n_left_from = len;
390                   vec_reset_length (fragments_to_loopback);
391                 }
392               else
393                 {
394                   clib_memcpy_fast (from, fragments_to_loopback +
395                                     (len - VLIB_FRAME_SIZE),
396                                     sizeof (u32) * VLIB_FRAME_SIZE);
397                   n_left_from = VLIB_FRAME_SIZE;
398                   _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE;
399                 }
400             }
401         }
402
403       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
404     }
405
406   nat_send_all_to_node (vm, fragments_to_drop, node, 0,
407                         NAT44_CLASSIFY_NEXT_DROP);
408
409   vec_free (fragments_to_drop);
410
411   vlib_node_increment_counter (vm, node->node_index,
412                                NAT44_CLASSIFY_ERROR_NEXT_IN2OUT, next_in2out);
413   vlib_node_increment_counter (vm, node->node_index,
414                                NAT44_CLASSIFY_ERROR_NEXT_OUT2IN, next_out2in);
415   vlib_node_increment_counter (vm, node->node_index,
416                                NAT44_CLASSIFY_ERROR_FRAG_CACHED, frag_cached);
417
418   return frame->n_vectors;
419 }
420
421 static uword
422 nat44_classify_node_fn (vlib_main_t * vm,
423                         vlib_node_runtime_t * node, vlib_frame_t * frame)
424 {
425   return nat44_classify_node_fn_inline (vm, node, frame, 0);
426 }
427
428 /* *INDENT-OFF* */
429 VLIB_REGISTER_NODE (nat44_classify_node) = {
430   .function = nat44_classify_node_fn,
431   .name = "nat44-classify",
432   .vector_size = sizeof (u32),
433   .format_trace = format_nat44_classify_trace,
434   .type = VLIB_NODE_TYPE_INTERNAL,
435   .n_errors = ARRAY_LEN(nat44_classify_error_strings),
436   .error_strings = nat44_classify_error_strings,
437   .n_next_nodes = NAT44_CLASSIFY_N_NEXT,
438   .next_nodes = {
439     [NAT44_CLASSIFY_NEXT_IN2OUT] = "nat44-in2out",
440     [NAT44_CLASSIFY_NEXT_OUT2IN] = "nat44-out2in",
441     [NAT44_CLASSIFY_NEXT_DROP] = "error-drop",
442   },
443 };
444 /* *INDENT-ON* */
445
446 VLIB_NODE_FUNCTION_MULTIARCH (nat44_classify_node, nat44_classify_node_fn);
447 static uword
448 nat44_ed_classify_node_fn (vlib_main_t * vm,
449                            vlib_node_runtime_t * node, vlib_frame_t * frame)
450 {
451   return nat44_classify_node_fn_inline (vm, node, frame, 1);
452 }
453
454 /* *INDENT-OFF* */
455 VLIB_REGISTER_NODE (nat44_ed_classify_node) = {
456   .function = nat44_ed_classify_node_fn,
457   .name = "nat44-ed-classify",
458   .vector_size = sizeof (u32),
459   .format_trace = format_nat44_classify_trace,
460   .type = VLIB_NODE_TYPE_INTERNAL,
461   .n_next_nodes = NAT44_CLASSIFY_N_NEXT,
462   .next_nodes = {
463     [NAT44_CLASSIFY_NEXT_IN2OUT] = "nat44-ed-in2out",
464     [NAT44_CLASSIFY_NEXT_OUT2IN] = "nat44-ed-out2in",
465     [NAT44_CLASSIFY_NEXT_DROP] = "error-drop",
466   },
467 };
468 /* *INDENT-ON* */
469
470 VLIB_NODE_FUNCTION_MULTIARCH (nat44_ed_classify_node,
471                               nat44_ed_classify_node_fn);
472
473 static uword
474 nat44_det_classify_node_fn (vlib_main_t * vm,
475                             vlib_node_runtime_t * node, vlib_frame_t * frame)
476 {
477   return nat44_classify_node_fn_inline (vm, node, frame, 0);
478 }
479
480 /* *INDENT-OFF* */
481 VLIB_REGISTER_NODE (nat44_det_classify_node) = {
482   .function = nat44_det_classify_node_fn,
483   .name = "nat44-det-classify",
484   .vector_size = sizeof (u32),
485   .format_trace = format_nat44_classify_trace,
486   .type = VLIB_NODE_TYPE_INTERNAL,
487   .n_next_nodes = NAT44_CLASSIFY_N_NEXT,
488   .next_nodes = {
489     [NAT44_CLASSIFY_NEXT_IN2OUT] = "nat44-det-in2out",
490     [NAT44_CLASSIFY_NEXT_OUT2IN] = "nat44-det-out2in",
491     [NAT44_CLASSIFY_NEXT_DROP] = "error-drop",
492   },
493 };
494 /* *INDENT-ON* */
495
496 VLIB_NODE_FUNCTION_MULTIARCH (nat44_det_classify_node,
497                               nat44_det_classify_node_fn);
498
499 static uword
500 nat44_handoff_classify_node_fn (vlib_main_t * vm,
501                                 vlib_node_runtime_t * node,
502                                 vlib_frame_t * frame)
503 {
504   return nat44_classify_node_fn_inline (vm, node, frame, 0);
505 }
506
507 /* *INDENT-OFF* */
508 VLIB_REGISTER_NODE (nat44_handoff_classify_node) = {
509   .function = nat44_handoff_classify_node_fn,
510   .name = "nat44-handoff-classify",
511   .vector_size = sizeof (u32),
512   .format_trace = format_nat44_classify_trace,
513   .type = VLIB_NODE_TYPE_INTERNAL,
514   .n_next_nodes = NAT44_CLASSIFY_N_NEXT,
515   .next_nodes = {
516     [NAT44_CLASSIFY_NEXT_IN2OUT] = "nat44-in2out-worker-handoff",
517     [NAT44_CLASSIFY_NEXT_OUT2IN] = "nat44-out2in-worker-handoff",
518     [NAT44_CLASSIFY_NEXT_DROP] = "error-drop",
519   },
520 };
521
522 VLIB_NODE_FUNCTION_MULTIARCH (nat44_handoff_classify_node,
523                               nat44_handoff_classify_node_fn);
524 /* *INDENT-ON* */
525
526 /*
527  * fd.io coding-style-patch-verification: ON
528  *
529  * Local Variables:
530  * eval: (c-set-style "gnu")
531  * End:
532  */