nat: deal with flows instead of sessions
[vpp.git] / src / plugins / nat / nat_inlines.h
index 0254de3..3408e53 100644 (file)
@@ -21,7 +21,7 @@
 
 #include <vnet/fib/ip4_fib.h>
 #include <nat/nat.h>
-#include <nat/nat_ha.h>
+//#include <nat/nat44-ei/nat44_ei_ha.h>
 
 always_inline u64
 calc_nat_key (ip4_address_t addr, u16 port, u32 fib_index, u8 proto)
@@ -29,7 +29,7 @@ calc_nat_key (ip4_address_t addr, u16 port, u32 fib_index, u8 proto)
   ASSERT (fib_index <= (1 << 14) - 1);
   ASSERT (proto <= (1 << 3) - 1);
   return (u64) addr.as_u32 << 32 | (u64) port << 16 | fib_index << 3 |
-    (proto & 0x7);
+        (proto & 0x7);
 }
 
 always_inline void
@@ -105,120 +105,99 @@ nat_pre_node_fn_inline (vlib_main_t * vm,
                        vlib_node_runtime_t * node,
                        vlib_frame_t * frame, u32 def_next)
 {
-  u32 n_left_from, *from, *to_next;
-  u16 next_index;
+  u32 n_left_from, *from;
 
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;
-  next_index = node->cached_next_index;
 
-  while (n_left_from > 0)
+  vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
+  u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
+  vlib_get_buffers (vm, from, b, n_left_from);
+
+  while (n_left_from >= 2)
     {
-      u32 n_left_to_next;
+      u32 next0, next1;
+      u32 arc_next0, arc_next1;
+      vlib_buffer_t *b0, *b1;
 
-      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+      b0 = *b;
+      b++;
+      b1 = *b;
+      b++;
 
-      while (n_left_from >= 4 && n_left_to_next >= 2)
+      /* Prefetch next iteration. */
+      if (PREDICT_TRUE (n_left_from >= 4))
        {
-         u32 next0, next1;
-         u32 arc_next0, arc_next1;
-         u32 bi0, bi1;
-         vlib_buffer_t *b0, *b1;
-
-         /* Prefetch next iteration. */
-         {
-           vlib_buffer_t *p2, *p3;
+         vlib_buffer_t *p2, *p3;
 
-           p2 = vlib_get_buffer (vm, from[2]);
-           p3 = vlib_get_buffer (vm, from[3]);
-
-           vlib_prefetch_buffer_header (p2, LOAD);
-           vlib_prefetch_buffer_header (p3, LOAD);
-
-           CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, LOAD);
-           CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, LOAD);
-         }
+         p2 = *b;
+         p3 = *(b + 1);
 
-         /* speculatively enqueue b0 and b1 to the current next frame */
-         to_next[0] = bi0 = from[0];
-         to_next[1] = bi1 = from[1];
-         from += 2;
-         to_next += 2;
-         n_left_from -= 2;
-         n_left_to_next -= 2;
+         vlib_prefetch_buffer_header (p2, LOAD);
+         vlib_prefetch_buffer_header (p3, LOAD);
 
-         b0 = vlib_get_buffer (vm, bi0);
-         b1 = vlib_get_buffer (vm, bi1);
-
-         next0 = def_next;
-         next1 = def_next;
-
-         vnet_feature_next (&arc_next0, b0);
-         vnet_feature_next (&arc_next1, b1);
+         CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, LOAD);
+         CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, LOAD);
+       }
 
-         vnet_buffer2 (b0)->nat.arc_next = arc_next0;
-         vnet_buffer2 (b1)->nat.arc_next = arc_next1;
+      next0 = def_next;
+      next1 = def_next;
 
-         if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
-           {
-             if (b0->flags & VLIB_BUFFER_IS_TRACED)
-               {
-                 nat_pre_trace_t *t =
-                   vlib_add_trace (vm, node, b0, sizeof (*t));
-                 t->next_index = next0;
-                 t->arc_next_index = arc_next0;
-               }
-             if (b1->flags & VLIB_BUFFER_IS_TRACED)
-               {
-                 nat_pre_trace_t *t =
-                   vlib_add_trace (vm, node, b0, sizeof (*t));
-                 t->next_index = next1;
-                 t->arc_next_index = arc_next1;
-               }
-           }
+      vnet_feature_next (&arc_next0, b0);
+      vnet_feature_next (&arc_next1, b1);
 
-         /* verify speculative enqueues, maybe switch current next frame */
-         vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
-                                          to_next, n_left_to_next,
-                                          bi0, bi1, next0, next1);
-       }
+      vnet_buffer2 (b0)->nat.arc_next = arc_next0;
+      vnet_buffer2 (b1)->nat.arc_next = arc_next1;
 
-      while (n_left_from > 0 && n_left_to_next > 0)
+      if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
        {
-         u32 next0;
-         u32 arc_next0;
-         u32 bi0;
-         vlib_buffer_t *b0;
-
-         /* speculatively enqueue b0 to the current next frame */
-         bi0 = from[0];
-         to_next[0] = bi0;
-         from += 1;
-         to_next += 1;
-         n_left_from -= 1;
-         n_left_to_next -= 1;
-
-         b0 = vlib_get_buffer (vm, bi0);
-         next0 = def_next;
-         vnet_feature_next (&arc_next0, b0);
-         vnet_buffer2 (b0)->nat.arc_next = arc_next0;
-
-         if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
-                            && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+         if (b0->flags & VLIB_BUFFER_IS_TRACED)
            {
              nat_pre_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
              t->next_index = next0;
              t->arc_next_index = arc_next0;
            }
+         if (b1->flags & VLIB_BUFFER_IS_TRACED)
+           {
+             nat_pre_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+             t->next_index = next1;
+             t->arc_next_index = arc_next1;
+           }
+       }
+
+      n_left_from -= 2;
+      next[0] = next0;
+      next[1] = next1;
+      next += 2;
+    }
+
+  while (n_left_from > 0)
+    {
+      u32 next0;
+      u32 arc_next0;
+      vlib_buffer_t *b0;
+
+      b0 = *b;
+      b++;
+
+      next0 = def_next;
+      vnet_feature_next (&arc_next0, b0);
+      vnet_buffer2 (b0)->nat.arc_next = arc_next0;
 
-         /* verify speculative enqueue, maybe switch current next frame */
-         vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
-                                          to_next, n_left_to_next,
-                                          bi0, next0);
+      if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+                        && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+       {
+         nat_pre_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+         t->next_index = next0;
+         t->arc_next_index = arc_next0;
        }
 
-      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+      n_left_from--;
+      next[0] = next0;
+      next++;
     }
+  vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
+                              frame->n_vectors);
 
   return frame->n_vectors;
 }
@@ -248,16 +227,6 @@ is_interface_addr (snat_main_t * sm, vlib_node_runtime_t * node,
     return 0;
 }
 
-always_inline u8
-maximum_sessions_exceeded (snat_main_t * sm, u32 thread_index)
-{
-  if (pool_elts (sm->per_thread_data[thread_index].sessions) >=
-      sm->max_translations_per_thread)
-    return 1;
-
-  return 0;
-}
-
 always_inline void
 user_session_increment (snat_main_t * sm, snat_user_t * u, u8 is_static)
 {
@@ -330,10 +299,7 @@ nat44_delete_session (snat_main_t * sm, snat_session_t * ses,
     }
 }
 
-/** \brief Set TCP session state.
-    @return 1 if session was closed, otherwise 0
-*/
-always_inline int
+always_inline void
 nat44_set_tcp_session_state_i2o (snat_main_t * sm, f64 now,
                                 snat_session_t * ses, vlib_buffer_t * b,
                                 u32 thread_index)
@@ -363,7 +329,7 @@ nat44_set_tcp_session_state_i2o (snat_main_t * sm, f64 now,
          ses->state |= NAT44_SES_O2I_FIN_ACK;
          if (nat44_is_ses_closed (ses))
            {                   // if session is now closed, save the timestamp
-             ses->tcp_closed_timestamp = now + sm->tcp_transitory_timeout;
+             ses->tcp_closed_timestamp = now + sm->timeouts.tcp.transitory;
              ses->last_lru_update = now;
            }
        }
@@ -380,10 +346,9 @@ nat44_set_tcp_session_state_i2o (snat_main_t * sm, f64 now,
     }
   clib_dlist_remove (tsm->lru_pool, ses->lru_index);
   clib_dlist_addtail (tsm->lru_pool, ses->lru_head_index, ses->lru_index);
-  return 0;
 }
 
-always_inline int
+always_inline void
 nat44_set_tcp_session_state_o2i (snat_main_t * sm, f64 now,
                                 snat_session_t * ses, u8 tcp_flags,
                                 u32 tcp_ack_number, u32 tcp_seq_number,
@@ -410,7 +375,7 @@ nat44_set_tcp_session_state_o2i (snat_main_t * sm, f64 now,
        ses->state |= NAT44_SES_I2O_FIN_ACK;
       if (nat44_is_ses_closed (ses))
        {                       // if session is now closed, save the timestamp
-         ses->tcp_closed_timestamp = now + sm->tcp_transitory_timeout;
+         ses->tcp_closed_timestamp = now + sm->timeouts.tcp.transitory;
          ses->last_lru_update = now;
        }
     }
@@ -425,7 +390,6 @@ nat44_set_tcp_session_state_o2i (snat_main_t * sm, f64 now,
     }
   clib_dlist_remove (tsm->lru_pool, ses->lru_index);
   clib_dlist_addtail (tsm->lru_pool, ses->lru_head_index, ses->lru_index);
-  return 0;
 }
 
 always_inline u32
@@ -434,18 +398,18 @@ nat44_session_get_timeout (snat_main_t * sm, snat_session_t * s)
   switch (s->nat_proto)
     {
     case NAT_PROTOCOL_ICMP:
-      return sm->icmp_timeout;
+      return sm->timeouts.icmp;
     case NAT_PROTOCOL_UDP:
-      return sm->udp_timeout;
+      return sm->timeouts.udp;
     case NAT_PROTOCOL_TCP:
       {
        if (s->state)
-         return sm->tcp_transitory_timeout;
+         return sm->timeouts.tcp.transitory;
        else
-         return sm->tcp_established_timeout;
+         return sm->timeouts.tcp.established;
       }
     default:
-      return sm->udp_timeout;
+      return sm->timeouts.udp;
     }
 
   return 0;
@@ -458,10 +422,12 @@ nat44_session_update_counters (snat_session_t * s, f64 now, uword bytes,
   s->last_heard = now;
   s->total_pkts++;
   s->total_bytes += bytes;
+#if 0
   nat_ha_sref (&s->out2in.addr, s->out2in.port, &s->ext_host_addr,
               s->ext_host_port, s->nat_proto, s->out2in.fib_index,
               s->total_pkts, s->total_bytes, thread_index,
               &s->ha_last_refreshed, now);
+#endif
 }
 
 /** \brief Per-user LRU list maintenance */
@@ -520,20 +486,6 @@ ed_value_get_session_index (clib_bihash_kv_16_8_t * value)
   return value->value & ~(u32) 0;
 }
 
-always_inline void
-split_ed_value (clib_bihash_kv_16_8_t * value, u32 * thread_index,
-               u32 * session_index)
-{
-  if (thread_index)
-    {
-      *thread_index = ed_value_get_thread_index (value);
-    }
-  if (session_index)
-    {
-      *session_index = ed_value_get_session_index (value);
-    }
-}
-
 always_inline void
 split_ed_kv (clib_bihash_kv_16_8_t * kv,
             ip4_address_t * l_addr, ip4_address_t * r_addr, u8 * proto,
@@ -566,15 +518,12 @@ split_ed_kv (clib_bihash_kv_16_8_t * kv,
 }
 
 static_always_inline int
-get_icmp_i2o_ed_key (vlib_buffer_t * b, ip4_header_t * ip0, u32 rx_fib_index,
-                    u32 thread_index, u32 session_index,
-                    nat_protocol_t * nat_proto, u16 * l_port, u16 * r_port,
-                    clib_bihash_kv_16_8_t * kv)
+nat_get_icmp_session_lookup_values (vlib_buffer_t *b, ip4_header_t *ip0,
+                                   ip4_address_t *lookup_saddr,
+                                   u16 *lookup_sport,
+                                   ip4_address_t *lookup_daddr,
+                                   u16 *lookup_dport, u8 *lookup_protocol)
 {
-  u8 proto;
-  u16 _l_port, _r_port;
-  ip4_address_t *l_addr, *r_addr;
-
   icmp46_header_t *icmp0;
   icmp_echo_header_t *echo0, *inner_echo0 = 0;
   ip4_header_t *inner_ip0 = 0;
@@ -584,121 +533,43 @@ get_icmp_i2o_ed_key (vlib_buffer_t * b, ip4_header_t * ip0, u32 rx_fib_index,
   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
   echo0 = (icmp_echo_header_t *) (icmp0 + 1);
 
-  if (!icmp_type_is_error_message
-      (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags))
-    {
-      proto = IP_PROTOCOL_ICMP;
-      l_addr = &ip0->src_address;
-      r_addr = &ip0->dst_address;
-      _l_port = vnet_buffer (b)->ip.reass.l4_src_port;
-      _r_port = 0;
-    }
-  else
-    {
-      inner_ip0 = (ip4_header_t *) (echo0 + 1);
-      l4_header = ip4_next_header (inner_ip0);
-      proto = inner_ip0->protocol;
-      r_addr = &inner_ip0->src_address;
-      l_addr = &inner_ip0->dst_address;
-      switch (ip_proto_to_nat_proto (inner_ip0->protocol))
-       {
-       case NAT_PROTOCOL_ICMP:
-         inner_icmp0 = (icmp46_header_t *) l4_header;
-         inner_echo0 = (icmp_echo_header_t *) (inner_icmp0 + 1);
-         _r_port = 0;
-         _l_port = inner_echo0->identifier;
-         break;
-       case NAT_PROTOCOL_UDP:
-       case NAT_PROTOCOL_TCP:
-         _l_port = ((tcp_udp_header_t *) l4_header)->dst_port;
-         _r_port = ((tcp_udp_header_t *) l4_header)->src_port;
-         break;
-       default:
-         return NAT_IN2OUT_ED_ERROR_UNSUPPORTED_PROTOCOL;
-       }
-    }
-  init_ed_kv (kv, *l_addr, _l_port, *r_addr, _r_port, rx_fib_index, proto,
-             thread_index, session_index);
-  if (nat_proto)
-    {
-      *nat_proto = ip_proto_to_nat_proto (proto);
-    }
-  if (l_port)
-    {
-      *l_port = _l_port;
-    }
-  if (r_port)
-    {
-      *r_port = _r_port;
-    }
-  return 0;
-}
-
-static_always_inline int
-get_icmp_o2i_ed_key (vlib_buffer_t * b, ip4_header_t * ip0, u32 rx_fib_index,
-                    u32 thread_index, u32 session_index,
-                    nat_protocol_t * nat_proto, u16 * l_port, u16 * r_port,
-                    clib_bihash_kv_16_8_t * kv)
-{
-  icmp46_header_t *icmp0;
-  u8 proto;
-  ip4_address_t *l_addr, *r_addr;
-  u16 _l_port, _r_port;
-  icmp_echo_header_t *echo0, *inner_echo0 = 0;
-  ip4_header_t *inner_ip0;
-  void *l4_header = 0;
-  icmp46_header_t *inner_icmp0;
-
-  icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
-  echo0 = (icmp_echo_header_t *) (icmp0 + 1);
+  // avoid warning about unused variables in caller by setting to bogus values
+  *lookup_sport = 0;
+  *lookup_dport = 0;
 
   if (!icmp_type_is_error_message
       (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags))
     {
-      proto = IP_PROTOCOL_ICMP;
-      l_addr = &ip0->dst_address;
-      r_addr = &ip0->src_address;
-      _l_port = vnet_buffer (b)->ip.reass.l4_src_port;
-      _r_port = 0;
+      *lookup_protocol = IP_PROTOCOL_ICMP;
+      lookup_saddr->as_u32 = ip0->src_address.as_u32;
+      *lookup_sport = vnet_buffer (b)->ip.reass.l4_src_port;
+      lookup_daddr->as_u32 = ip0->dst_address.as_u32;
+      *lookup_dport = vnet_buffer (b)->ip.reass.l4_dst_port;
     }
   else
     {
       inner_ip0 = (ip4_header_t *) (echo0 + 1);
       l4_header = ip4_next_header (inner_ip0);
-      proto = inner_ip0->protocol;
-      l_addr = &inner_ip0->src_address;
-      r_addr = &inner_ip0->dst_address;
+      *lookup_protocol = inner_ip0->protocol;
+      lookup_saddr->as_u32 = inner_ip0->dst_address.as_u32;
+      lookup_daddr->as_u32 = inner_ip0->src_address.as_u32;
       switch (ip_proto_to_nat_proto (inner_ip0->protocol))
        {
        case NAT_PROTOCOL_ICMP:
          inner_icmp0 = (icmp46_header_t *) l4_header;
          inner_echo0 = (icmp_echo_header_t *) (inner_icmp0 + 1);
-         _l_port = inner_echo0->identifier;
-         _r_port = 0;
+         *lookup_sport = inner_echo0->identifier;
+         *lookup_dport = inner_echo0->identifier;
          break;
        case NAT_PROTOCOL_UDP:
        case NAT_PROTOCOL_TCP:
-         _l_port = ((tcp_udp_header_t *) l4_header)->src_port;
-         _r_port = ((tcp_udp_header_t *) l4_header)->dst_port;
+         *lookup_sport = ((tcp_udp_header_t *) l4_header)->dst_port;
+         *lookup_dport = ((tcp_udp_header_t *) l4_header)->src_port;
          break;
        default:
-         return -1;
+         return NAT_IN2OUT_ED_ERROR_UNSUPPORTED_PROTOCOL;
        }
     }
-  init_ed_kv (kv, *l_addr, _l_port, *r_addr, _r_port, rx_fib_index, proto,
-             thread_index, session_index);
-  if (nat_proto)
-    {
-      *nat_proto = ip_proto_to_nat_proto (proto);
-    }
-  if (l_port)
-    {
-      *l_port = _l_port;
-    }
-  if (r_port)
-    {
-      *r_port = _r_port;
-    }
   return 0;
 }
 
@@ -763,26 +634,17 @@ snat_not_translate_fast (snat_main_t * sm, vlib_node_runtime_t * node,
 
       snat_interface_t *i;
       /* *INDENT-OFF* */
-      pool_foreach (i, sm->interfaces, ({
+      pool_foreach (i, sm->interfaces)  {
         /* NAT packet aimed at outside interface */
        if ((nat_interface_is_outside (i)) && (sw_if_index == i->sw_if_index))
           return 0;
-      }));
+      }
       /* *INDENT-ON* */
     }
 
   return 1;
 }
 
-static inline void
-increment_v4_address (ip4_address_t * a)
-{
-  u32 v;
-
-  v = clib_net_to_host_u32 (a->as_u32) + 1;
-  a->as_u32 = clib_host_to_net_u32 (v);
-}
-
 static_always_inline u16
 snat_random_port (u16 min, u16 max)
 {