nat: fix forwarding handoff workaround
[vpp.git] / src / plugins / nat / out2in_ed.c
index e9fbc59..95e2ea9 100644 (file)
 #include <vnet/ip/ip.h>
 #include <vnet/ethernet/ethernet.h>
 #include <vnet/fib/ip4_fib.h>
-#include <vnet/udp/udp.h>
+#include <vnet/udp/udp_local.h>
 #include <vppinfra/error.h>
 #include <nat/nat.h>
 #include <nat/lib/ipfix_logging.h>
 #include <nat/nat_inlines.h>
 #include <nat/nat44/inlines.h>
-#include <nat/nat_syslog.h>
+#include <nat/lib/nat_syslog.h>
 #include <nat/nat_ha.h>
 #include <nat/nat44/ed_inlines.h>
 
@@ -46,6 +46,11 @@ typedef struct
   u32 is_slow_path;
 } nat44_ed_out2in_trace_t;
 
+typedef struct
+{
+  u16 thread_next;
+} nat44_ed_out2in_handoff_trace_t;
+
 static u8 *
 format_nat44_ed_out2in_trace (u8 * s, va_list * args)
 {
@@ -502,10 +507,9 @@ create_bypass_for_fwd (snat_main_t * sm, vlib_buffer_t * b, ip4_header_t * ip,
   if (ip->protocol == IP_PROTOCOL_TCP)
     {
       tcp_header_t *tcp = ip4_next_header (ip);
-      if (nat44_set_tcp_session_state_o2i
-         (sm, now, s, tcp->flags, tcp->ack_number, tcp->seq_number,
-          thread_index))
-       return;
+      nat44_set_tcp_session_state_o2i (sm, now, s, tcp->flags,
+                                      tcp->ack_number, tcp->seq_number,
+                                      thread_index);
     }
 
   /* Accounting */
@@ -514,16 +518,24 @@ create_bypass_for_fwd (snat_main_t * sm, vlib_buffer_t * b, ip4_header_t * ip,
   nat44_session_update_lru (sm, s, thread_index);
 }
 
-static inline void
-create_bypass_for_fwd_worker (snat_main_t * sm, vlib_buffer_t * b,
-                             ip4_header_t * ip, u32 rx_fib_index)
+static_always_inline int
+create_bypass_for_fwd_worker (snat_main_t * sm,
+                             vlib_buffer_t * b, ip4_header_t * ip,
+                             u32 rx_fib_index, u32 thread_index)
 {
-  ip4_header_t ip_wkr = {
+  ip4_header_t tmp = {
     .src_address = ip->dst_address,
   };
-  u32 thread_index = sm->worker_in2out_cb (&ip_wkr, rx_fib_index, 0);
+  u32 index = sm->worker_in2out_cb (&tmp, rx_fib_index, 0);
+
+  if (index != thread_index)
+    {
+      vnet_buffer2 (b)->nat.thread_next = index;
+      return 1;
+    }
 
   create_bypass_for_fwd (sm, b, ip, rx_fib_index, thread_index);
+  return 0;
 }
 
 #ifndef CLIB_MARCH_VARIANT
@@ -591,10 +603,19 @@ icmp_match_out2in_ed (snat_main_t * sm, vlib_node_runtime_t * node,
              else
                {
                  if (sm->num_workers > 1)
-                   create_bypass_for_fwd_worker (sm, b, ip, rx_fib_index);
+                   {
+                     if (create_bypass_for_fwd_worker (sm, b, ip,
+                                                       rx_fib_index,
+                                                       thread_index))
+                       {
+                         next = NAT_NEXT_OUT2IN_ED_HANDOFF;
+                       }
+                   }
                  else
-                   create_bypass_for_fwd (sm, b, ip, rx_fib_index,
-                                          thread_index);
+                   {
+                     create_bypass_for_fwd (sm, b, ip, rx_fib_index,
+                                            thread_index);
+                   }
                }
            }
          goto out;
@@ -969,11 +990,14 @@ nat44_ed_out2in_fast_path_node_fn_inline (vlib_main_t * vm,
            }
          vlib_increment_simple_counter (&sm->counters.fastpath.out2in_ed.tcp,
                                         thread_index, sw_if_index0, 1);
-         if (nat44_set_tcp_session_state_o2i
-             (sm, now, s0, vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags,
-              vnet_buffer (b0)->ip.reass.tcp_ack_number,
-              vnet_buffer (b0)->ip.reass.tcp_seq_number, thread_index))
-           goto trace0;
+         nat44_set_tcp_session_state_o2i (sm, now, s0,
+                                          vnet_buffer (b0)->ip.
+                                          reass.icmp_type_or_tcp_flags,
+                                          vnet_buffer (b0)->ip.
+                                          reass.tcp_ack_number,
+                                          vnet_buffer (b0)->ip.
+                                          reass.tcp_seq_number,
+                                          thread_index);
        }
       else if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment
               && udp0->checksum)
@@ -1206,12 +1230,18 @@ nat44_ed_out2in_slow_path_node_fn_inline (vlib_main_t * vm,
                    }
                  else
                    {
-                     if (sm->num_workers > 1)
-                       create_bypass_for_fwd_worker (sm, b0, ip0,
-                                                     rx_fib_index0);
+                     if ((sm->num_workers > 1)
+                         && create_bypass_for_fwd_worker (sm, b0, ip0,
+                                                          rx_fib_index0,
+                                                          thread_index))
+                       {
+                         next[0] = NAT_NEXT_OUT2IN_ED_HANDOFF;
+                       }
                      else
-                       create_bypass_for_fwd (sm, b0, ip0, rx_fib_index0,
-                                              thread_index);
+                       {
+                         create_bypass_for_fwd (sm, b0, ip0, rx_fib_index0,
+                                                thread_index);
+                       }
                    }
                }
              goto trace0;
@@ -1291,11 +1321,14 @@ nat44_ed_out2in_slow_path_node_fn_inline (vlib_main_t * vm,
            }
          vlib_increment_simple_counter (&sm->counters.slowpath.out2in_ed.tcp,
                                         thread_index, sw_if_index0, 1);
-         if (nat44_set_tcp_session_state_o2i
-             (sm, now, s0, vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags,
-              vnet_buffer (b0)->ip.reass.tcp_ack_number,
-              vnet_buffer (b0)->ip.reass.tcp_seq_number, thread_index))
-           goto trace0;
+         nat44_set_tcp_session_state_o2i (sm, now, s0,
+                                          vnet_buffer (b0)->ip.
+                                          reass.icmp_type_or_tcp_flags,
+                                          vnet_buffer (b0)->ip.
+                                          reass.tcp_ack_number,
+                                          vnet_buffer (b0)->ip.
+                                          reass.tcp_seq_number,
+                                          thread_index);
        }
       else if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment
               && udp0->checksum)
@@ -1378,6 +1411,88 @@ nat44_ed_out2in_slow_path_node_fn_inline (vlib_main_t * vm,
   return frame->n_vectors;
 }
 
+static inline uword
+nat_handoff_node_fn_inline (vlib_main_t * vm,
+                           vlib_node_runtime_t * node,
+                           vlib_frame_t * frame, u32 fq_index)
+{
+  u32 n_enq, n_left_from, *from;
+
+  u16 thread_indices[VLIB_FRAME_SIZE], *ti = thread_indices;
+  vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
+
+  from = vlib_frame_vector_args (frame);
+  n_left_from = frame->n_vectors;
+
+  vlib_get_buffers (vm, from, b, n_left_from);
+
+  while (n_left_from >= 4)
+    {
+      if (PREDICT_TRUE (n_left_from >= 8))
+       {
+         vlib_prefetch_buffer_header (b[4], LOAD);
+         vlib_prefetch_buffer_header (b[5], LOAD);
+         vlib_prefetch_buffer_header (b[6], LOAD);
+         vlib_prefetch_buffer_header (b[7], LOAD);
+         CLIB_PREFETCH (&b[4]->data, CLIB_CACHE_LINE_BYTES, LOAD);
+         CLIB_PREFETCH (&b[5]->data, CLIB_CACHE_LINE_BYTES, LOAD);
+         CLIB_PREFETCH (&b[6]->data, CLIB_CACHE_LINE_BYTES, LOAD);
+         CLIB_PREFETCH (&b[7]->data, CLIB_CACHE_LINE_BYTES, LOAD);
+       }
+
+      ti[0] = vnet_buffer2 (b[0])->nat.thread_next;
+      ti[1] = vnet_buffer2 (b[1])->nat.thread_next;
+      ti[2] = vnet_buffer2 (b[2])->nat.thread_next;
+      ti[3] = vnet_buffer2 (b[3])->nat.thread_next;
+
+      b += 4;
+      ti += 4;
+      n_left_from -= 4;
+    }
+
+  while (n_left_from > 0)
+    {
+      ti[0] = vnet_buffer2 (b[0])->nat.thread_next;
+
+      b += 1;
+      ti += 1;
+      n_left_from -= 1;
+    }
+
+  if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+    {
+      u32 i;
+      b = bufs;
+      ti = thread_indices;
+
+      for (i = 0; i < frame->n_vectors; i++)
+       {
+         if (b[0]->flags & VLIB_BUFFER_IS_TRACED)
+           {
+             nat44_ed_out2in_handoff_trace_t *t =
+               vlib_add_trace (vm, node, b[0], sizeof (*t));
+             t->thread_next = ti[0];
+             b += 1;
+             ti += 1;
+           }
+         else
+           break;
+       }
+    }
+
+  n_enq = vlib_buffer_enqueue_to_thread (vm, fq_index, from, thread_indices,
+                                        frame->n_vectors, 1);
+
+  if (n_enq < frame->n_vectors)
+    {
+      vlib_node_increment_counter (vm, node->node_index,
+                                  NAT44_HANDOFF_ERROR_CONGESTION_DROP,
+                                  frame->n_vectors - n_enq);
+    }
+
+  return frame->n_vectors;
+}
+
 VLIB_NODE_FN (nat44_ed_out2in_node) (vlib_main_t * vm,
                                     vlib_node_runtime_t * node,
                                     vlib_frame_t * frame)
@@ -1425,6 +1540,35 @@ VLIB_REGISTER_NODE (nat44_ed_out2in_slowpath_node) = {
 };
 /* *INDENT-ON* */
 
+static u8 *
+format_nat44_ed_out2in_handoff_trace (u8 * s, va_list * args)
+{
+  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+  nat44_ed_out2in_handoff_trace_t *t =
+    va_arg (*args, nat44_ed_out2in_handoff_trace_t *);
+  return format (s, "out2in ed handoff thread_next index %d", t->thread_next);
+}
+
+VLIB_NODE_FN (nat44_ed_out2in_handoff_node) (vlib_main_t * vm,
+                                            vlib_node_runtime_t * node,
+                                            vlib_frame_t * frame)
+{
+  return nat_handoff_node_fn_inline (vm, node, frame,
+                                    snat_main.ed_out2in_node_index);
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (nat44_ed_out2in_handoff_node) = {
+  .name = "nat44-ed-out2in-handoff",
+  .vector_size = sizeof (u32),
+  .sibling_of = "nat-default",
+  .format_trace = format_nat44_ed_out2in_handoff_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+  .n_errors = 0,
+};
+/* *INDENT-ON* */
+
 static u8 *
 format_nat_pre_trace (u8 * s, va_list * args)
 {