nat: ED: global session LRU list
[vpp.git] / src / plugins / nat / nat_inlines.h
index 64b1f25..e5f2d96 100644 (file)
 #include <nat/nat.h>
 #include <nat/nat_ha.h>
 
+static inline uword
+nat_pre_node_fn_inline (vlib_main_t * vm,
+                       vlib_node_runtime_t * node,
+                       vlib_frame_t * frame, u32 def_next)
+{
+  u32 n_left_from, *from, *to_next;
+  u16 next_index;
+
+  from = vlib_frame_vector_args (frame);
+  n_left_from = frame->n_vectors;
+  next_index = node->cached_next_index;
+
+  while (n_left_from > 0)
+    {
+      u32 n_left_to_next;
+
+      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+      while (n_left_from >= 4 && n_left_to_next >= 2)
+       {
+         u32 next0, next1;
+         u32 arc_next0, arc_next1;
+         u32 bi0, bi1;
+         vlib_buffer_t *b0, *b1;
+
+         /* Prefetch next iteration. */
+         {
+           vlib_buffer_t *p2, *p3;
+
+           p2 = vlib_get_buffer (vm, from[2]);
+           p3 = vlib_get_buffer (vm, from[3]);
+
+           vlib_prefetch_buffer_header (p2, LOAD);
+           vlib_prefetch_buffer_header (p3, LOAD);
+
+           CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
+           CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
+         }
+
+         /* speculatively enqueue b0 and b1 to the current next frame */
+         to_next[0] = bi0 = from[0];
+         to_next[1] = bi1 = from[1];
+         from += 2;
+         to_next += 2;
+         n_left_from -= 2;
+         n_left_to_next -= 2;
+
+         b0 = vlib_get_buffer (vm, bi0);
+         b1 = vlib_get_buffer (vm, bi1);
+
+         next0 = def_next;
+         next1 = def_next;
+
+         vnet_feature_next (&arc_next0, b0);
+         vnet_feature_next (&arc_next1, b1);
+
+         nat_buffer_opaque (b0)->arc_next = arc_next0;
+         nat_buffer_opaque (b1)->arc_next = arc_next1;
+
+         if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+           {
+             if (b0->flags & VLIB_BUFFER_IS_TRACED)
+               {
+                 nat_pre_trace_t *t =
+                   vlib_add_trace (vm, node, b0, sizeof (*t));
+                 t->next_index = next0;
+               }
+             if (b1->flags & VLIB_BUFFER_IS_TRACED)
+               {
+                 nat_pre_trace_t *t =
+                   vlib_add_trace (vm, node, b0, sizeof (*t));
+                 t->next_index = next0;
+               }
+           }
+
+         /* verify speculative enqueues, maybe switch current next frame */
+         vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+                                          to_next, n_left_to_next,
+                                          bi0, bi1, next0, next1);
+       }
+
+      while (n_left_from > 0 && n_left_to_next > 0)
+       {
+         u32 next0;
+         u32 arc_next0;
+         u32 bi0;
+         vlib_buffer_t *b0;
+
+         /* speculatively enqueue b0 to the current next frame */
+         bi0 = from[0];
+         to_next[0] = bi0;
+         from += 1;
+         to_next += 1;
+         n_left_from -= 1;
+         n_left_to_next -= 1;
+
+         b0 = vlib_get_buffer (vm, bi0);
+         next0 = def_next;
+         vnet_feature_next (&arc_next0, b0);
+         nat_buffer_opaque (b0)->arc_next = arc_next0;
+
+         if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+                            && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+           {
+             nat_pre_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+             t->next_index = next0;
+           }
+
+         /* verify speculative enqueue, maybe switch current next frame */
+         vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+                                          to_next, n_left_to_next,
+                                          bi0, next0);
+       }
+
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+
+  return frame->n_vectors;
+}
+
 always_inline u32
 ip_proto_to_snat_proto (u8 ip_proto)
 {
@@ -51,9 +171,9 @@ snat_proto_to_ip_proto (snat_protocol_t snat_proto)
 }
 
 static_always_inline u8
-icmp_is_error_message (icmp46_header_t * icmp)
+icmp_type_is_error_message (u8 icmp_type)
 {
-  switch (icmp->type)
+  switch (icmp_type)
     {
     case ICMP4_destination_unreachable:
     case ICMP4_time_exceeded:
@@ -166,30 +286,36 @@ nat44_delete_user_with_no_session (snat_main_t * sm, snat_user_t * u,
 }
 
 always_inline void
-nat44_delete_session (snat_main_t * sm, snat_session_t * ses,
-                     u32 thread_index)
+nat44_delete_session_internal (snat_main_t * sm, snat_session_t * ses,
+                              u32 thread_index, int global_lru_delete
+                              /* delete from global LRU list */ )
 {
   snat_main_per_thread_data_t *tsm = vec_elt_at_index (sm->per_thread_data,
                                                       thread_index);
   clib_bihash_kv_8_8_t kv, value;
-  snat_user_key_t u_key;
   snat_user_t *u;
-
-  nat_log_debug ("session deleted %U", format_snat_session, tsm, ses);
+  const snat_user_key_t u_key = {
+    .addr = ses->in2out.addr,
+    .fib_index = ses->in2out.fib_index
+  };
+  const u8 u_static = snat_is_session_static (ses);
 
   clib_dlist_remove (tsm->list_pool, ses->per_user_index);
   pool_put_index (tsm->list_pool, ses->per_user_index);
+  if (global_lru_delete)
+    {
+      clib_dlist_remove (tsm->global_lru_pool, ses->global_lru_index);
+    }
+  pool_put_index (tsm->global_lru_pool, ses->global_lru_index);
   pool_put (tsm->sessions, ses);
   vlib_set_simple_counter (&sm->total_sessions, thread_index, 0,
                           pool_elts (tsm->sessions));
 
-  u_key.addr = ses->in2out.addr;
-  u_key.fib_index = ses->in2out.fib_index;
   kv.key = u_key.as_u64;
   if (!clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value))
     {
       u = pool_elt_at_index (tsm->users, value.value);
-      if (snat_is_session_static (ses))
+      if (u_static)
        u->nstaticsessions--;
       else
        u->nsessions--;
@@ -198,74 +324,89 @@ nat44_delete_session (snat_main_t * sm, snat_session_t * ses,
     }
 }
 
+always_inline void
+nat44_delete_session (snat_main_t * sm, snat_session_t * ses,
+                     u32 thread_index)
+{
+  return nat44_delete_session_internal (sm, ses, thread_index, 1);
+}
+
+always_inline void
+nat44_ed_delete_session (snat_main_t * sm, snat_session_t * ses,
+                        u32 thread_index, int global_lru_delete
+                        /* delete from global LRU list */ )
+{
+  return nat44_delete_session_internal (sm, ses, thread_index,
+                                       global_lru_delete);
+}
+
 /** \brief Set TCP session state.
     @return 1 if session was closed, otherwise 0
 */
 always_inline int
-nat44_set_tcp_session_state_i2o (snat_main_t * sm, snat_session_t * ses,
-                                tcp_header_t * tcp, u32 thread_index)
+nat44_set_tcp_session_state_i2o (snat_main_t * sm, f64 now,
+                                snat_session_t * ses, vlib_buffer_t * b,
+                                u32 thread_index)
 {
-  if ((ses->state == 0) && (tcp->flags & TCP_FLAG_RST))
+  u8 tcp_flags = vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags;
+  u32 tcp_ack_number = vnet_buffer (b)->ip.reass.tcp_ack_number;
+  u32 tcp_seq_number = vnet_buffer (b)->ip.reass.tcp_seq_number;
+  if ((ses->state == 0) && (tcp_flags & TCP_FLAG_RST))
     ses->state = NAT44_SES_RST;
-  if ((ses->state == NAT44_SES_RST) && !(tcp->flags & TCP_FLAG_RST))
+  if ((ses->state == NAT44_SES_RST) && !(tcp_flags & TCP_FLAG_RST))
     ses->state = 0;
-  if ((tcp->flags & TCP_FLAG_ACK) && (ses->state & NAT44_SES_I2O_SYN) &&
+  if ((tcp_flags & TCP_FLAG_ACK) && (ses->state & NAT44_SES_I2O_SYN) &&
       (ses->state & NAT44_SES_O2I_SYN))
     ses->state = 0;
-  if (tcp->flags & TCP_FLAG_SYN)
+  if (tcp_flags & TCP_FLAG_SYN)
     ses->state |= NAT44_SES_I2O_SYN;
-  if (tcp->flags & TCP_FLAG_FIN)
+  if (tcp_flags & TCP_FLAG_FIN)
     {
-      ses->i2o_fin_seq = clib_net_to_host_u32 (tcp->seq_number);
+      ses->i2o_fin_seq = clib_net_to_host_u32 (tcp_seq_number);
       ses->state |= NAT44_SES_I2O_FIN;
     }
-  if ((tcp->flags & TCP_FLAG_ACK) && (ses->state & NAT44_SES_O2I_FIN))
-    {
-      if (clib_net_to_host_u32 (tcp->ack_number) > ses->o2i_fin_seq)
-       ses->state |= NAT44_SES_O2I_FIN_ACK;
-    }
-  if (nat44_is_ses_closed (ses)
-      && !(ses->flags & SNAT_SESSION_FLAG_OUTPUT_FEATURE))
+  if ((tcp_flags & TCP_FLAG_ACK) && (ses->state & NAT44_SES_O2I_FIN))
     {
-      nat_log_debug ("TCP close connection %U", format_snat_session,
-                    &sm->per_thread_data[thread_index], ses);
-      nat_free_session_data (sm, ses, thread_index, 0);
-      nat44_delete_session (sm, ses, thread_index);
-      return 1;
+      if (clib_net_to_host_u32 (tcp_ack_number) > ses->o2i_fin_seq)
+       {
+         ses->state |= NAT44_SES_O2I_FIN_ACK;
+         if (nat44_is_ses_closed (ses))
+           {                   // if session is now closed, save the timestamp
+             ses->tcp_close_timestamp = now + sm->tcp_transitory_timeout;
+           }
+       }
     }
   return 0;
 }
 
 always_inline int
-nat44_set_tcp_session_state_o2i (snat_main_t * sm, snat_session_t * ses,
-                                tcp_header_t * tcp, u32 thread_index)
+nat44_set_tcp_session_state_o2i (snat_main_t * sm, f64 now,
+                                snat_session_t * ses, u8 tcp_flags,
+                                u32 tcp_ack_number, u32 tcp_seq_number,
+                                u32 thread_index)
 {
-  if ((ses->state == 0) && (tcp->flags & TCP_FLAG_RST))
+  if ((ses->state == 0) && (tcp_flags & TCP_FLAG_RST))
     ses->state = NAT44_SES_RST;
-  if ((ses->state == NAT44_SES_RST) && !(tcp->flags & TCP_FLAG_RST))
+  if ((ses->state == NAT44_SES_RST) && !(tcp_flags & TCP_FLAG_RST))
     ses->state = 0;
-  if ((tcp->flags & TCP_FLAG_ACK) && (ses->state & NAT44_SES_I2O_SYN) &&
+  if ((tcp_flags & TCP_FLAG_ACK) && (ses->state & NAT44_SES_I2O_SYN) &&
       (ses->state & NAT44_SES_O2I_SYN))
     ses->state = 0;
-  if (tcp->flags & TCP_FLAG_SYN)
+  if (tcp_flags & TCP_FLAG_SYN)
     ses->state |= NAT44_SES_O2I_SYN;
-  if (tcp->flags & TCP_FLAG_FIN)
+  if (tcp_flags & TCP_FLAG_FIN)
     {
-      ses->o2i_fin_seq = clib_net_to_host_u32 (tcp->seq_number);
+      ses->o2i_fin_seq = clib_net_to_host_u32 (tcp_seq_number);
       ses->state |= NAT44_SES_O2I_FIN;
     }
-  if ((tcp->flags & TCP_FLAG_ACK) && (ses->state & NAT44_SES_I2O_FIN))
+  if ((tcp_flags & TCP_FLAG_ACK) && (ses->state & NAT44_SES_I2O_FIN))
     {
-      if (clib_net_to_host_u32 (tcp->ack_number) > ses->i2o_fin_seq)
+      if (clib_net_to_host_u32 (tcp_ack_number) > ses->i2o_fin_seq)
        ses->state |= NAT44_SES_I2O_FIN_ACK;
-    }
-  if (nat44_is_ses_closed (ses))
-    {
-      nat_log_debug ("TCP close connection %U", format_snat_session,
-                    &sm->per_thread_data[thread_index], ses);
-      nat_free_session_data (sm, ses, thread_index, 0);
-      nat44_delete_session (sm, ses, thread_index);
-      return 1;
+      if (nat44_is_ses_closed (ses))
+       {                       // if session is now closed, save the timestamp
+         ses->tcp_close_timestamp = now + sm->tcp_transitory_timeout;
+       }
     }
   return 0;
 }
@@ -311,10 +452,22 @@ always_inline void
 nat44_session_update_lru (snat_main_t * sm, snat_session_t * s,
                          u32 thread_index)
 {
-  clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
-                    s->per_user_index);
-  clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
-                     s->per_user_list_head_index, s->per_user_index);
+  /* don't update too often - timeout is in a magnitude of seconds anyway */
+  if (s->last_heard > s->last_lru_update + 1)
+    {
+      clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
+                        s->per_user_index);
+      clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
+                         s->per_user_list_head_index, s->per_user_index);
+
+      clib_dlist_remove (sm->per_thread_data[thread_index].global_lru_pool,
+                        s->global_lru_index);
+      clib_dlist_addtail (sm->per_thread_data[thread_index].global_lru_pool,
+                         sm->
+                         per_thread_data[thread_index].global_lru_head_index,
+                         s->global_lru_index);
+      s->last_lru_update = s->last_heard;
+    }
 }
 
 always_inline void
@@ -349,6 +502,109 @@ make_sm_kv (clib_bihash_kv_8_8_t * kv, ip4_address_t * addr, u8 proto,
   kv->value = ~0ULL;
 }
 
+static_always_inline int
+get_icmp_i2o_ed_key (vlib_buffer_t * b, ip4_header_t * ip0,
+                    nat_ed_ses_key_t * p_key0)
+{
+  icmp46_header_t *icmp0;
+  nat_ed_ses_key_t key0;
+  icmp_echo_header_t *echo0, *inner_echo0 = 0;
+  ip4_header_t *inner_ip0 = 0;
+  void *l4_header = 0;
+  icmp46_header_t *inner_icmp0;
+
+  icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
+  echo0 = (icmp_echo_header_t *) (icmp0 + 1);
+
+  if (!icmp_type_is_error_message
+      (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags))
+    {
+      key0.proto = IP_PROTOCOL_ICMP;
+      key0.l_addr = ip0->src_address;
+      key0.r_addr = ip0->dst_address;
+      key0.l_port = vnet_buffer (b)->ip.reass.l4_src_port;     // TODO should this be src or dst?
+      key0.r_port = 0;
+    }
+  else
+    {
+      inner_ip0 = (ip4_header_t *) (echo0 + 1);
+      l4_header = ip4_next_header (inner_ip0);
+      key0.proto = inner_ip0->protocol;
+      key0.r_addr = inner_ip0->src_address;
+      key0.l_addr = inner_ip0->dst_address;
+      switch (ip_proto_to_snat_proto (inner_ip0->protocol))
+       {
+       case SNAT_PROTOCOL_ICMP:
+         inner_icmp0 = (icmp46_header_t *) l4_header;
+         inner_echo0 = (icmp_echo_header_t *) (inner_icmp0 + 1);
+         key0.r_port = 0;
+         key0.l_port = inner_echo0->identifier;
+         break;
+       case SNAT_PROTOCOL_UDP:
+       case SNAT_PROTOCOL_TCP:
+         key0.l_port = ((tcp_udp_header_t *) l4_header)->dst_port;
+         key0.r_port = ((tcp_udp_header_t *) l4_header)->src_port;
+         break;
+       default:
+         return NAT_IN2OUT_ED_ERROR_UNSUPPORTED_PROTOCOL;
+       }
+    }
+  *p_key0 = key0;
+  return 0;
+}
+
+
+static_always_inline int
+get_icmp_o2i_ed_key (vlib_buffer_t * b, ip4_header_t * ip0,
+                    nat_ed_ses_key_t * p_key0)
+{
+  icmp46_header_t *icmp0;
+  nat_ed_ses_key_t key0;
+  icmp_echo_header_t *echo0, *inner_echo0 = 0;
+  ip4_header_t *inner_ip0;
+  void *l4_header = 0;
+  icmp46_header_t *inner_icmp0;
+
+  icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
+  echo0 = (icmp_echo_header_t *) (icmp0 + 1);
+
+  if (!icmp_type_is_error_message
+      (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags))
+    {
+      key0.proto = IP_PROTOCOL_ICMP;
+      key0.l_addr = ip0->dst_address;
+      key0.r_addr = ip0->src_address;
+      key0.l_port = vnet_buffer (b)->ip.reass.l4_src_port;     // TODO should this be src or dst?
+      key0.r_port = 0;
+    }
+  else
+    {
+      inner_ip0 = (ip4_header_t *) (echo0 + 1);
+      l4_header = ip4_next_header (inner_ip0);
+      key0.proto = inner_ip0->protocol;
+      key0.l_addr = inner_ip0->src_address;
+      key0.r_addr = inner_ip0->dst_address;
+      switch (ip_proto_to_snat_proto (inner_ip0->protocol))
+       {
+       case SNAT_PROTOCOL_ICMP:
+         inner_icmp0 = (icmp46_header_t *) l4_header;
+         inner_echo0 = (icmp_echo_header_t *) (inner_icmp0 + 1);
+         key0.l_port = inner_echo0->identifier;
+         key0.r_port = 0;
+         break;
+       case SNAT_PROTOCOL_UDP:
+       case SNAT_PROTOCOL_TCP:
+         key0.l_port = ((tcp_udp_header_t *) l4_header)->src_port;
+         key0.r_port = ((tcp_udp_header_t *) l4_header)->dst_port;
+         break;
+       default:
+         return -1;
+       }
+    }
+  *p_key0 = key0;
+  return 0;
+}
+
 always_inline void
 mss_clamping (snat_main_t * sm, tcp_header_t * tcp, ip_csum_t * sum)
 {
@@ -457,18 +713,27 @@ snat_not_translate_fast (snat_main_t * sm, vlib_node_runtime_t * node,
        return 1;
 
       snat_interface_t *i;
-      pool_foreach (i, sm->interfaces, (
-                                        {
-                                        /* NAT packet aimed at outside interface */
-                                        if ((nat_interface_is_outside (i))
-                                            && (sw_if_index ==
-                                                i->sw_if_index)) return 0;}
-                   ));
+      /* *INDENT-OFF* */
+      pool_foreach (i, sm->interfaces, ({
+        /* NAT packet aimed at outside interface */
+       if ((nat_interface_is_outside (i)) && (sw_if_index == i->sw_if_index))
+          return 0;
+      }));
+      /* *INDENT-ON* */
     }
 
   return 1;
 }
 
+static inline void
+increment_v4_address (ip4_address_t * a)
+{
+  u32 v;
+
+  v = clib_net_to_host_u32 (a->as_u32) + 1;
+  a->as_u32 = clib_host_to_net_u32 (v);
+}
+
 #endif /* __included_nat_inlines_h__ */
 
 /*