acl-plugin: avoid crash in multithreaded setup adding/deleting ACLs with traffic...
[vpp.git] / src / plugins / snat / in2out.c
index 89054a0..abe0d9d 100644 (file)
@@ -88,13 +88,19 @@ vlib_node_registration_t snat_in2out_slowpath_node;
 vlib_node_registration_t snat_in2out_fast_node;
 vlib_node_registration_t snat_in2out_worker_handoff_node;
 vlib_node_registration_t snat_det_in2out_node;
+vlib_node_registration_t snat_in2out_output_node;
+vlib_node_registration_t snat_in2out_output_slowpath_node;
+vlib_node_registration_t snat_in2out_output_worker_handoff_node;
+vlib_node_registration_t snat_hairpin_dst_node;
+vlib_node_registration_t snat_hairpin_src_node;
+
 
 #define foreach_snat_in2out_error                       \
 _(UNSUPPORTED_PROTOCOL, "Unsupported protocol")         \
 _(IN2OUT_PACKETS, "Good in2out packets processed")      \
 _(OUT_OF_PORTS, "Out of ports")                         \
 _(BAD_OUTSIDE_FIB, "Outside VRF ID not found")          \
-_(BAD_ICMP_TYPE, "icmp type not echo-request")          \
+_(BAD_ICMP_TYPE, "unsupported ICMP type")               \
 _(NO_TRANSLATION, "No translation")
   
 typedef enum {
@@ -118,6 +124,14 @@ typedef enum {
   SNAT_IN2OUT_N_NEXT,
 } snat_in2out_next_t;
 
+typedef enum {
+  SNAT_HAIRPIN_SRC_NEXT_DROP,
+  SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT,
+  SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH,
+  SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT,
+  SNAT_HAIRPIN_SRC_N_NEXT,
+} snat_hairpin_next_t;
+
 /**
  * @brief Check if packet should be translated
  *
@@ -194,7 +208,7 @@ snat_not_translate (snat_main_t * sm, vlib_node_runtime_t *node,
   if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
     {
       /* or is static mappings */
-      if (!snat_static_mapping_match(sm, key0, &sm0, 1))
+      if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0))
         return 0;
     }
   else
@@ -297,28 +311,53 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
                                  session_index);
       } while (snat_is_session_static (s));
 
-      /* Remove in2out, out2in keys */
-      kv0.key = s->in2out.as_u64;
-      if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 0 /* is_add */))
-          clib_warning ("in2out key delete failed");
-      kv0.key = s->out2in.as_u64;
-      if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 0 /* is_add */))
-          clib_warning ("out2in key delete failed");
-
-      /* log NAT event */
-      snat_ipfix_logging_nat44_ses_delete(s->in2out.addr.as_u32,
-                                          s->out2in.addr.as_u32,
-                                          s->in2out.protocol,
-                                          s->in2out.port,
-                                          s->out2in.port,
-                                          s->in2out.fib_index);
-
-      snat_free_outside_address_and_port 
-        (sm, &s->out2in, s->outside_address_index);
+      if (snat_is_unk_proto_session (s))
+        {
+          clib_bihash_kv_16_8_t up_kv;
+          snat_unk_proto_ses_key_t key;
+
+          /* Remove from lookup tables */
+          key.l_addr = s->in2out.addr;
+          key.r_addr = s->ext_host_addr;
+          key.fib_index = s->in2out.fib_index;
+          key.proto = s->in2out.port;
+          up_kv.key[0] = key.as_u64[0];
+          up_kv.key[1] = key.as_u64[1];
+          if (clib_bihash_add_del_16_8 (&sm->in2out_unk_proto, &up_kv, 0))
+            clib_warning ("in2out key del failed");
+
+          key.l_addr = s->out2in.addr;
+          key.fib_index = s->out2in.fib_index;
+          up_kv.key[0] = key.as_u64[0];
+          up_kv.key[1] = key.as_u64[1];
+          if (clib_bihash_add_del_16_8 (&sm->out2in_unk_proto, &up_kv, 0))
+            clib_warning ("out2in key del failed");
+        }
+      else
+        {
+          /* Remove in2out, out2in keys */
+          kv0.key = s->in2out.as_u64;
+          if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 0 /* is_add */))
+              clib_warning ("in2out key delete failed");
+          kv0.key = s->out2in.as_u64;
+          if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 0 /* is_add */))
+              clib_warning ("out2in key delete failed");
+
+          /* log NAT event */
+          snat_ipfix_logging_nat44_ses_delete(s->in2out.addr.as_u32,
+                                              s->out2in.addr.as_u32,
+                                              s->in2out.protocol,
+                                              s->in2out.port,
+                                              s->out2in.port,
+                                              s->in2out.fib_index);
+
+          snat_free_outside_address_and_port
+            (sm, &s->out2in, s->outside_address_index);
+        }
       s->outside_address_index = ~0;
 
-      if (snat_alloc_outside_address_and_port (sm, rx_fib_index0, &key1,
-                                               &address_index))
+      if (snat_alloc_outside_address_and_port (sm, rx_fib_index0, thread_index,
+                                               &key1, &address_index))
         {
           ASSERT(0);
 
@@ -332,11 +371,12 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
       u8 static_mapping = 1;
 
       /* First try to match static mapping by local address and port */
-      if (snat_static_mapping_match (sm, *key0, &key1, 0))
+      if (snat_static_mapping_match (sm, *key0, &key1, 0, 0))
         {
           static_mapping = 0;
           /* Try to create dynamic translation */
-          if (snat_alloc_outside_address_and_port (sm, rx_fib_index0, &key1,
+          if (snat_alloc_outside_address_and_port (sm, rx_fib_index0,
+                                                   thread_index, &key1,
                                                    &address_index))
             {
               b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
@@ -383,6 +423,7 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
   s->out2in = key1;
   s->out2in.protocol = key0->protocol;
   s->out2in.fib_index = outside_fib_index;
+  s->ext_host_addr.as_u32 = ip0->dst_address.as_u32;
   *sessionp = s;
 
   /* Add to translation hashes */
@@ -489,8 +530,13 @@ u32 icmp_match_in2out_slow(snat_main_t *sm, vlib_node_runtime_t *node,
   clib_bihash_kv_8_8_t kv0, value0;
   u32 next0 = ~0;
   int err;
+  u32 iph_offset0 = 0;
 
-  ip0 = vlib_buffer_get_current (b0);
+  if (PREDICT_FALSE(vnet_buffer(b0)->sw_if_index[VLIB_TX] != ~0))
+    {
+      iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
+    }
+  ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) + iph_offset0);
   icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
   sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
   rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
@@ -509,14 +555,16 @@ u32 icmp_match_in2out_slow(snat_main_t *sm, vlib_node_runtime_t *node,
   if (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0))
     {
       if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0, ip0,
-          IP_PROTOCOL_ICMP, rx_fib_index0)))
+          IP_PROTOCOL_ICMP, rx_fib_index0) &&
+          vnet_buffer(b0)->sw_if_index[VLIB_TX] == ~0))
         {
           dont_translate = 1;
           goto out;
         }
 
-      if (icmp_is_error_message (icmp0))
+      if (PREDICT_FALSE(icmp_is_error_message (icmp0)))
         {
+          b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
           next0 = SNAT_IN2OUT_NEXT_DROP;
           goto out;
         }
@@ -528,15 +576,18 @@ u32 icmp_match_in2out_slow(snat_main_t *sm, vlib_node_runtime_t *node,
         goto out;
     }
   else
-    s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
-                            value0.value);
-
-  if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
-                    !icmp_is_error_message (icmp0)))
     {
-      b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
-      next0 = SNAT_IN2OUT_NEXT_DROP;
-      goto out;
+      if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
+                        icmp0->type != ICMP4_echo_reply &&
+                        !icmp_is_error_message (icmp0)))
+        {
+          b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
+          next0 = SNAT_IN2OUT_NEXT_DROP;
+          goto out;
+        }
+
+      s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
+                              value0.value);
     }
 
 out:
@@ -574,6 +625,7 @@ u32 icmp_match_in2out_fast(snat_main_t *sm, vlib_node_runtime_t *node,
   snat_session_key_t key0;
   snat_session_key_t sm0;
   u8 dont_translate = 0;
+  u8 is_addr_only;
   u32 next0 = ~0;
   int err;
 
@@ -591,7 +643,7 @@ u32 icmp_match_in2out_fast(snat_main_t *sm, vlib_node_runtime_t *node,
     }
   key0.fib_index = rx_fib_index0;
 
-  if (snat_static_mapping_match(sm, key0, &sm0, 0))
+  if (snat_static_mapping_match(sm, key0, &sm0, 0, &is_addr_only))
     {
       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
           IP_PROTOCOL_ICMP, rx_fib_index0)))
@@ -612,14 +664,12 @@ u32 icmp_match_in2out_fast(snat_main_t *sm, vlib_node_runtime_t *node,
     }
 
   if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request &&
+                    (icmp0->type != ICMP4_echo_reply || !is_addr_only) &&
                     !icmp_is_error_message (icmp0)))
     {
-      if (icmp0->type != ICMP4_echo_reply || key0.port != sm0.port)
-        {
-          b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
-          next0 = SNAT_IN2OUT_NEXT_DROP;
-          goto out;
-        }
+      b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
+      next0 = SNAT_IN2OUT_NEXT_DROP;
+      goto out;
     }
 
 out:
@@ -675,7 +725,8 @@ static inline u32 icmp_in2out (snat_main_t *sm,
 
   old_addr0 = ip0->src_address.as_u32;
   new_addr0 = ip0->src_address.as_u32 = sm0.addr.as_u32;
-  vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
+  if (vnet_buffer(b0)->sw_if_index[VLIB_TX] == ~0)
+    vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
 
   sum0 = ip0->checksum;
   sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t,
@@ -792,7 +843,7 @@ snat_hairpinning (snat_main_t *sm,
   if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
     {
       /* or static mappings */
-      if (!snat_static_mapping_match(sm, key0, &sm0, 1))
+      if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0))
         {
           new_dst_addr0 = sm0.addr.as_u32;
           new_dst_port0 = sm0.port;
@@ -851,7 +902,91 @@ snat_hairpinning (snat_main_t *sm,
               udp0->checksum = 0;
             }
         }
+      else
+        {
+          if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
+            {
+              sum0 = tcp0->checksum;
+              sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
+                                     ip4_header_t, dst_address);
+              tcp0->checksum = ip_csum_fold(sum0);
+            }
+        }
+    }
+}
+
+static inline void
+snat_icmp_hairpinning (snat_main_t *sm,
+                       vlib_buffer_t * b0,
+                       ip4_header_t * ip0,
+                       icmp46_header_t * icmp0)
+{
+  snat_session_key_t key0, sm0;
+  clib_bihash_kv_8_8_t kv0, value0;
+  snat_worker_key_t k0;
+  u32 new_dst_addr0 = 0, old_dst_addr0, si, ti = 0;
+  ip_csum_t sum0;
+  snat_session_t *s0;
+
+  if (!icmp_is_error_message (icmp0))
+    {
+      icmp_echo_header_t *echo0 = (icmp_echo_header_t *)(icmp0+1);
+      u16 icmp_id0 = echo0->identifier;
+      key0.addr = ip0->dst_address;
+      key0.port = icmp_id0;
+      key0.protocol = SNAT_PROTOCOL_ICMP;
+      key0.fib_index = sm->outside_fib_index;
+      kv0.key = key0.as_u64;
+
+      /* Check if destination is in active sessions */
+      if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
+        {
+          /* or static mappings */
+          if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0))
+            {
+              new_dst_addr0 = sm0.addr.as_u32;
+              vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
+            }
+        }
+      else
+        {
+          si = value0.value;
+          if (sm->num_workers > 1)
+            {
+              k0.addr = ip0->dst_address;
+              k0.port = icmp_id0;
+              k0.fib_index = sm->outside_fib_index;
+              kv0.key = k0.as_u64;
+              if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0))
+                ASSERT(0);
+              else
+                ti = value0.value;
+            }
+          else
+            ti = sm->num_workers;
+
+          s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
+          new_dst_addr0 = s0->in2out.addr.as_u32;
+          vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
+          echo0->identifier = s0->in2out.port;
+          sum0 = icmp0->checksum;
+          sum0 = ip_csum_update (sum0, icmp_id0, s0->in2out.port,
+                                 icmp_echo_header_t, identifier);
+          icmp0->checksum = ip_csum_fold (sum0);
+        }
+
+      /* Destination is behind the same NAT, use internal address and port */
+      if (new_dst_addr0)
+        {
+          old_dst_addr0 = ip0->dst_address.as_u32;
+          ip0->dst_address.as_u32 = new_dst_addr0;
+          sum0 = ip0->checksum;
+          sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
+                                 ip4_header_t, dst_address);
+          ip0->checksum = ip_csum_fold (sum0);
+        }
     }
+
 }
 
 static inline u32 icmp_in2out_slow_path (snat_main_t *sm,
@@ -871,6 +1006,9 @@ static inline u32 icmp_in2out_slow_path (snat_main_t *sm,
   snat_session_t * s0 = *p_s0;
   if (PREDICT_TRUE(next0 != SNAT_IN2OUT_NEXT_DROP && s0))
     {
+      /* Hairpinning */
+      if (vnet_buffer(b0)->sw_if_index[VLIB_TX] == 0)
+        snat_icmp_hairpinning(sm, b0, ip0, icmp0);
       /* Accounting */
       s0->last_heard = now;
       s0->total_pkts++;
@@ -887,11 +1025,352 @@ static inline u32 icmp_in2out_slow_path (snat_main_t *sm,
     }
   return next0;
 }
+static inline void
+snat_hairpinning_unknown_proto (snat_main_t *sm,
+                                vlib_buffer_t * b,
+                                ip4_header_t * ip)
+{
+  u32 old_addr, new_addr = 0, ti = 0;
+  clib_bihash_kv_8_8_t kv, value;
+  clib_bihash_kv_16_8_t s_kv, s_value;
+  snat_unk_proto_ses_key_t key;
+  snat_session_key_t m_key;
+  snat_worker_key_t w_key;
+  snat_static_mapping_t *m;
+  ip_csum_t sum;
+  snat_session_t *s;
+
+  old_addr = ip->dst_address.as_u32;
+  key.l_addr.as_u32 = ip->dst_address.as_u32;
+  key.r_addr.as_u32 = ip->src_address.as_u32;
+  key.fib_index = sm->outside_fib_index;
+  key.proto = ip->protocol;
+  key.rsvd[0] = key.rsvd[1] = key.rsvd[2] = 0;
+  s_kv.key[0] = key.as_u64[0];
+  s_kv.key[1] = key.as_u64[1];
+  if (clib_bihash_search_16_8 (&sm->out2in_unk_proto, &s_kv, &s_value))
+    {
+      m_key.addr = ip->dst_address;
+      m_key.fib_index = sm->outside_fib_index;
+      m_key.port = 0;
+      m_key.protocol = 0;
+      kv.key = m_key.as_u64;
+      if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
+        return;
+
+      m = pool_elt_at_index (sm->static_mappings, value.value);
+      if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
+        vnet_buffer(b)->sw_if_index[VLIB_TX] = m->fib_index;
+      new_addr = ip->dst_address.as_u32 = m->local_addr.as_u32;
+    }
+  else
+    {
+      if (sm->num_workers > 1)
+        {
+          w_key.addr = ip->dst_address;
+          w_key.port = 0;
+          w_key.fib_index = sm->outside_fib_index;
+          kv.key = w_key.as_u64;
+          if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv, &value))
+            return;
+          else
+            ti = value.value;
+        }
+      else
+        ti = sm->num_workers;
+
+      s = pool_elt_at_index (sm->per_thread_data[ti].sessions, s_value.value);
+      if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
+        vnet_buffer(b)->sw_if_index[VLIB_TX] = s->in2out.fib_index;
+      new_addr = ip->dst_address.as_u32 = s->in2out.addr.as_u32;
+    }
+  sum = ip->checksum;
+  sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
+  ip->checksum = ip_csum_fold (sum);
+}
+
+static void
+snat_in2out_unknown_proto (snat_main_t *sm,
+                           vlib_buffer_t * b,
+                           ip4_header_t * ip,
+                           u32 rx_fib_index,
+                           u32 thread_index,
+                           f64 now,
+                           vlib_main_t * vm)
+{
+  clib_bihash_kv_8_8_t kv, value;
+  clib_bihash_kv_16_8_t s_kv, s_value;
+  snat_static_mapping_t *m;
+  snat_session_key_t m_key;
+  u32 old_addr, new_addr = 0;
+  ip_csum_t sum;
+  snat_user_key_t u_key;
+  snat_user_t *u;
+  dlist_elt_t *head, *elt, *oldest;
+  snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
+  u32 elt_index, head_index, ses_index, oldest_index;
+  snat_session_t * s;
+  snat_unk_proto_ses_key_t key;
+  u32 address_index = ~0;
+  int i;
+  u8 is_sm = 0;
+
+  old_addr = ip->src_address.as_u32;
+
+  key.l_addr = ip->src_address;
+  key.r_addr = ip->dst_address;
+  key.fib_index = rx_fib_index;
+  key.proto = ip->protocol;
+  key.rsvd[0] = key.rsvd[1] = key.rsvd[2] = 0;
+  s_kv.key[0] = key.as_u64[0];
+  s_kv.key[1] = key.as_u64[1];
+
+  if (!clib_bihash_search_16_8 (&sm->in2out_unk_proto, &s_kv, &s_value))
+    {
+      s = pool_elt_at_index (tsm->sessions, s_value.value);
+      new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
+    }
+  else
+    {
+      u_key.addr = ip->src_address;
+      u_key.fib_index = rx_fib_index;
+      kv.key = u_key.as_u64;
+
+      /* Ever heard of the "user" = src ip4 address before? */
+      if (clib_bihash_search_8_8 (&sm->user_hash, &kv, &value))
+        {
+          /* no, make a new one */
+          pool_get (tsm->users, u);
+          memset (u, 0, sizeof (*u));
+          u->addr = ip->src_address;
+          u->fib_index = rx_fib_index;
+
+          pool_get (tsm->list_pool, head);
+          u->sessions_per_user_list_head_index = head - tsm->list_pool;
+
+          clib_dlist_init (tsm->list_pool,
+                           u->sessions_per_user_list_head_index);
+
+          kv.value = u - tsm->users;
+
+          /* add user */
+          clib_bihash_add_del_8_8 (&sm->user_hash, &kv, 1);
+        }
+      else
+        {
+          u = pool_elt_at_index (tsm->users, value.value);
+        }
+
+      m_key.addr = ip->src_address;
+      m_key.port = 0;
+      m_key.protocol = 0;
+      m_key.fib_index = rx_fib_index;
+      kv.key = m_key.as_u64;
+
+      /* Try to find static mapping first */
+      if (!clib_bihash_search_8_8 (&sm->static_mapping_by_local, &kv, &value))
+        {
+          m = pool_elt_at_index (sm->static_mappings, value.value);
+          new_addr = ip->src_address.as_u32 = m->external_addr.as_u32;
+          is_sm = 1;
+          goto create_ses;
+        }
+      /* Fallback to 3-tuple key */
+      else
+        {
+          /* Choose same out address as for TCP/UDP session to same destination */
+          if (!clib_bihash_search_8_8 (&sm->user_hash, &kv, &value))
+            {
+              head_index = u->sessions_per_user_list_head_index;
+              head = pool_elt_at_index (tsm->list_pool, head_index);
+              elt_index = head->next;
+              elt = pool_elt_at_index (tsm->list_pool, elt_index);
+              ses_index = elt->value;
+              while (ses_index != ~0)
+                {
+                  s =  pool_elt_at_index (tsm->sessions, ses_index);
+                  elt_index = elt->next;
+                  elt = pool_elt_at_index (tsm->list_pool, elt_index);
+                  ses_index = elt->value;
+
+                  if (s->ext_host_addr.as_u32 == ip->dst_address.as_u32)
+                    {
+                      new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
+                      address_index = s->outside_address_index;
+
+                      key.fib_index = sm->outside_fib_index;
+                      key.l_addr.as_u32 = new_addr;
+                      s_kv.key[0] = key.as_u64[0];
+                      s_kv.key[1] = key.as_u64[1];
+                      if (clib_bihash_search_16_8 (&sm->out2in_unk_proto, &s_kv, &s_value))
+                        break;
+
+                      goto create_ses;
+                    }
+                }
+            }
+          key.fib_index = sm->outside_fib_index;
+          for (i = 0; i < vec_len (sm->addresses); i++)
+            {
+              key.l_addr.as_u32 = sm->addresses[i].addr.as_u32;
+              s_kv.key[0] = key.as_u64[0];
+              s_kv.key[1] = key.as_u64[1];
+              if (clib_bihash_search_16_8 (&sm->out2in_unk_proto, &s_kv, &s_value))
+                {
+                  new_addr = ip->src_address.as_u32 = key.l_addr.as_u32;
+                  address_index = i;
+                  goto create_ses;
+                }
+            }
+          return;
+        }
+
+create_ses:
+      /* Over quota? Recycle the least recently used dynamic translation */
+      if (u->nsessions >= sm->max_translations_per_user && !is_sm)
+        {
+          /* Remove the oldest dynamic translation */
+          do {
+              oldest_index = clib_dlist_remove_head (
+                tsm->list_pool, u->sessions_per_user_list_head_index);
+
+              ASSERT (oldest_index != ~0);
+
+              /* add it back to the end of the LRU list */
+              clib_dlist_addtail (tsm->list_pool,
+                                  u->sessions_per_user_list_head_index,
+                                  oldest_index);
+              /* Get the list element */
+              oldest = pool_elt_at_index (tsm->list_pool, oldest_index);
+
+              /* Get the session index from the list element */
+              ses_index = oldest->value;
+
+              /* Get the session */
+              s = pool_elt_at_index (tsm->sessions, ses_index);
+          } while (snat_is_session_static (s));
+
+          if (snat_is_unk_proto_session (s))
+            {
+              /* Remove from lookup tables */
+              key.l_addr = s->in2out.addr;
+              key.r_addr = s->ext_host_addr;
+              key.fib_index = s->in2out.fib_index;
+              key.proto = s->in2out.port;
+              s_kv.key[0] = key.as_u64[0];
+              s_kv.key[1] = key.as_u64[1];
+              if (clib_bihash_add_del_16_8 (&sm->in2out_unk_proto, &s_kv, 0))
+                clib_warning ("in2out key del failed");
+
+              key.l_addr = s->out2in.addr;
+              key.fib_index = s->out2in.fib_index;
+              s_kv.key[0] = key.as_u64[0];
+              s_kv.key[1] = key.as_u64[1];
+              if (clib_bihash_add_del_16_8 (&sm->out2in_unk_proto, &s_kv, 0))
+                clib_warning ("out2in key del failed");
+            }
+          else
+            {
+              /* log NAT event */
+              snat_ipfix_logging_nat44_ses_delete(s->in2out.addr.as_u32,
+                                                  s->out2in.addr.as_u32,
+                                                  s->in2out.protocol,
+                                                  s->in2out.port,
+                                                  s->out2in.port,
+                                                  s->in2out.fib_index);
+
+              snat_free_outside_address_and_port (sm, &s->out2in,
+                                                  s->outside_address_index);
+
+              /* Remove in2out, out2in keys */
+              kv.key = s->in2out.as_u64;
+              if (clib_bihash_add_del_8_8 (&sm->in2out, &kv, 0))
+                clib_warning ("in2out key del failed");
+              kv.key = s->out2in.as_u64;
+              if (clib_bihash_add_del_8_8 (&sm->out2in, &kv, 0))
+                clib_warning ("out2in key del failed");
+            }
+        }
+      else
+        {
+          /* Create a new session */
+          pool_get (tsm->sessions, s);
+          memset (s, 0, sizeof (*s));
+
+          /* Create list elts */
+          pool_get (tsm->list_pool, elt);
+          clib_dlist_init (tsm->list_pool, elt - tsm->list_pool);
+          elt->value = s - tsm->sessions;
+          s->per_user_index = elt - tsm->list_pool;
+          s->per_user_list_head_index = u->sessions_per_user_list_head_index;
+          clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
+                              s->per_user_index);
+        }
+
+      s->ext_host_addr.as_u32 = ip->dst_address.as_u32;
+      s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
+      s->outside_address_index = address_index;
+      s->out2in.addr.as_u32 = new_addr;
+      s->out2in.fib_index = sm->outside_fib_index;
+      s->in2out.addr.as_u32 = old_addr;
+      s->in2out.fib_index = rx_fib_index;
+      s->in2out.port = s->out2in.port = ip->protocol;
+      if (is_sm)
+        {
+          u->nstaticsessions++;
+          s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
+        }
+      else
+        {
+          u->nsessions++;
+        }
+
+      /* Add to lookup tables */
+      key.l_addr.as_u32 = old_addr;
+      key.r_addr = ip->dst_address;
+      key.proto = ip->protocol;
+      key.fib_index = rx_fib_index;
+      s_kv.key[0] = key.as_u64[0];
+      s_kv.key[1] = key.as_u64[1];
+      s_kv.value = s - tsm->sessions;
+      if (clib_bihash_add_del_16_8 (&sm->in2out_unk_proto, &s_kv, 1))
+        clib_warning ("in2out key add failed");
+
+      key.l_addr.as_u32 = new_addr;
+      key.fib_index = sm->outside_fib_index;
+      s_kv.key[0] = key.as_u64[0];
+      s_kv.key[1] = key.as_u64[1];
+      if (clib_bihash_add_del_16_8 (&sm->out2in_unk_proto, &s_kv, 1))
+        clib_warning ("out2in key add failed");
+  }
+
+  /* Update IP checksum */
+  sum = ip->checksum;
+  sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
+  ip->checksum = ip_csum_fold (sum);
+
+  /* Accounting */
+  s->last_heard = now;
+  s->total_pkts++;
+  s->total_bytes += vlib_buffer_length_in_chain (vm, b);
+  /* Per-user LRU list maintenance */
+  clib_dlist_remove (tsm->list_pool, s->per_user_index);
+  clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
+                      s->per_user_index);
+
+  /* Hairpinning */
+  if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
+    snat_hairpinning_unknown_proto(sm, b, ip);
+
+  if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
+    vnet_buffer(b)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
+}
 
 static inline uword
 snat_in2out_node_fn_inline (vlib_main_t * vm,
                             vlib_node_runtime_t * node,
-                            vlib_frame_t * frame, int is_slow_path)
+                            vlib_frame_t * frame, int is_slow_path,
+                            int is_output_feature)
 {
   u32 n_left_from, * from, * to_next;
   snat_in2out_next_t next_index;
@@ -933,6 +1412,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
           u32 proto0, proto1;
           snat_session_t * s0 = 0, * s1 = 0;
           clib_bihash_kv_8_8_t kv0, value0, kv1, value1;
+          u32 iph_offset0 = 0, iph_offset1 = 0;
           
          /* Prefetch next iteration. */
          {
@@ -959,7 +1439,12 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
          b0 = vlib_get_buffer (vm, bi0);
          b1 = vlib_get_buffer (vm, bi1);
 
-          ip0 = vlib_buffer_get_current (b0);
+          if (is_output_feature)
+            iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
+
+          ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
+                 iph_offset0);
+
           udp0 = ip4_next_header (ip0);
           tcp0 = (tcp_header_t *) udp0;
           icmp0 = (icmp46_header_t *) udp0;
@@ -986,8 +1471,12 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
           if (is_slow_path)
             {
               if (PREDICT_FALSE (proto0 == ~0))
-                goto trace00;
-              
+                {
+                  snat_in2out_unknown_proto (sm, b0, ip0, rx_fib_index0,
+                                             thread_index, now, vm);
+                  goto trace00;
+                }
+
               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
                 {
                   next0 = icmp_in2out_slow_path 
@@ -1016,8 +1505,8 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
             {
               if (is_slow_path)
                 {
-                  if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0, ip0,
-                      proto0, rx_fib_index0)))
+                  if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0,
+                      ip0, proto0, rx_fib_index0)) && !is_output_feature)
                     goto trace00;
 
                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
@@ -1038,7 +1527,8 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
           old_addr0 = ip0->src_address.as_u32;
           ip0->src_address = s0->out2in.addr;
           new_addr0 = ip0->src_address.as_u32;
-          vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
+          if (!is_output_feature)
+            vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
 
           sum0 = ip0->checksum;
           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
@@ -1069,7 +1559,8 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
             }
 
           /* Hairpinning */
-          snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
+          if (!is_output_feature)
+            snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
 
           /* Accounting */
           s0->last_heard = now;
@@ -1101,7 +1592,12 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
 
           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
 
-          ip1 = vlib_buffer_get_current (b1);
+          if (is_output_feature)
+            iph_offset1 = vnet_buffer (b1)->ip.save_rewrite_length;
+
+          ip1 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b1) +
+                 iph_offset1);
+
           udp1 = ip4_next_header (ip1);
           tcp1 = (tcp_header_t *) udp1;
           icmp1 = (icmp46_header_t *) udp1;
@@ -1126,8 +1622,12 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
           if (is_slow_path)
             {
               if (PREDICT_FALSE (proto1 == ~0))
-                goto trace01;
-              
+                {
+                  snat_in2out_unknown_proto (sm, b1, ip1, rx_fib_index1,
+                                             thread_index, now, vm);
+                  goto trace01;
+                }
+
               if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
                 {
                   next1 = icmp_in2out_slow_path 
@@ -1156,8 +1656,8 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
             {
               if (is_slow_path)
                 {
-                  if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index1, ip1,
-                      proto1, rx_fib_index1)))
+                  if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index1,
+                      ip1, proto1, rx_fib_index1)) && !is_output_feature)
                     goto trace01;
 
                   next1 = slow_path (sm, b1, ip1, rx_fib_index1, &key1,
@@ -1178,7 +1678,8 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
           old_addr1 = ip1->src_address.as_u32;
           ip1->src_address = s1->out2in.addr;
           new_addr1 = ip1->src_address.as_u32;
-          vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->out2in.fib_index;
+          if (!is_output_feature)
+            vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->out2in.fib_index;
 
           sum1 = ip1->checksum;
           sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
@@ -1209,7 +1710,8 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
             }
 
           /* Hairpinning */
-          snat_hairpinning (sm, b1, ip1, udp1, tcp1, proto1);
+          if (!is_output_feature)
+            snat_hairpinning (sm, b1, ip1, udp1, tcp1, proto1);
 
           /* Accounting */
           s1->last_heard = now;
@@ -1264,7 +1766,8 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
           u32 proto0;
           snat_session_t * s0 = 0;
           clib_bihash_kv_8_8_t kv0, value0;
-          
+          u32 iph_offset0 = 0;
+
           /* speculatively enqueue b0 to the current next frame */
          bi0 = from[0];
          to_next[0] = bi0;
@@ -1276,7 +1779,12 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
          b0 = vlib_get_buffer (vm, bi0);
           next0 = SNAT_IN2OUT_NEXT_LOOKUP;
 
-          ip0 = vlib_buffer_get_current (b0);
+          if (is_output_feature)
+            iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length;
+
+          ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
+                 iph_offset0);
+
           udp0 = ip4_next_header (ip0);
           tcp0 = (tcp_header_t *) udp0;
           icmp0 = (icmp46_header_t *) udp0;
@@ -1301,8 +1809,12 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
           if (is_slow_path)
             {
               if (PREDICT_FALSE (proto0 == ~0))
-                goto trace0;
-              
+                {
+                  snat_in2out_unknown_proto (sm, b0, ip0, rx_fib_index0,
+                                             thread_index, now, vm);
+                  goto trace0;
+                }
+
               if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
                 {
                   next0 = icmp_in2out_slow_path 
@@ -1331,8 +1843,8 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
             {
               if (is_slow_path)
                 {
-                  if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0, ip0,
-                      proto0, rx_fib_index0)))
+                  if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0,
+                      ip0, proto0, rx_fib_index0)) && !is_output_feature)
                     goto trace0;
 
                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
@@ -1354,7 +1866,8 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
           old_addr0 = ip0->src_address.as_u32;
           ip0->src_address = s0->out2in.addr;
           new_addr0 = ip0->src_address.as_u32;
-          vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
+          if (!is_output_feature)
+            vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
 
           sum0 = ip0->checksum;
           sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
@@ -1385,7 +1898,8 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
             }
 
           /* Hairpinning */
-          snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
+          if (!is_output_feature)
+            snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
 
           /* Accounting */
           s0->last_heard = now;
@@ -1437,7 +1951,7 @@ snat_in2out_fast_path_fn (vlib_main_t * vm,
                           vlib_node_runtime_t * node,
                           vlib_frame_t * frame)
 {
-  return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */);
+  return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */, 0);
 }
 
 VLIB_REGISTER_NODE (snat_in2out_node) = {
@@ -1446,12 +1960,12 @@ VLIB_REGISTER_NODE (snat_in2out_node) = {
   .vector_size = sizeof (u32),
   .format_trace = format_snat_in2out_trace,
   .type = VLIB_NODE_TYPE_INTERNAL,
-  
+
   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
   .error_strings = snat_in2out_error_strings,
 
   .runtime_data_bytes = sizeof (snat_runtime_t),
-  
+
   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
 
   /* edit / add dispositions here */
@@ -1465,12 +1979,46 @@ VLIB_REGISTER_NODE (snat_in2out_node) = {
 
 VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_node, snat_in2out_fast_path_fn);
 
+static uword
+snat_in2out_output_fast_path_fn (vlib_main_t * vm,
+                                 vlib_node_runtime_t * node,
+                                 vlib_frame_t * frame)
+{
+  return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */, 1);
+}
+
+VLIB_REGISTER_NODE (snat_in2out_output_node) = {
+  .function = snat_in2out_output_fast_path_fn,
+  .name = "snat-in2out-output",
+  .vector_size = sizeof (u32),
+  .format_trace = format_snat_in2out_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+
+  .n_errors = ARRAY_LEN(snat_in2out_error_strings),
+  .error_strings = snat_in2out_error_strings,
+
+  .runtime_data_bytes = sizeof (snat_runtime_t),
+
+  .n_next_nodes = SNAT_IN2OUT_N_NEXT,
+
+  /* edit / add dispositions here */
+  .next_nodes = {
+    [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
+    [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output",
+    [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-output-slowpath",
+    [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+  },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_output_node,
+                              snat_in2out_output_fast_path_fn);
+
 static uword
 snat_in2out_slow_path_fn (vlib_main_t * vm,
                           vlib_node_runtime_t * node,
                           vlib_frame_t * frame)
 {
-  return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */);
+  return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */, 0);
 }
 
 VLIB_REGISTER_NODE (snat_in2out_slowpath_node) = {
@@ -1479,12 +2027,12 @@ VLIB_REGISTER_NODE (snat_in2out_slowpath_node) = {
   .vector_size = sizeof (u32),
   .format_trace = format_snat_in2out_trace,
   .type = VLIB_NODE_TYPE_INTERNAL,
-  
+
   .n_errors = ARRAY_LEN(snat_in2out_error_strings),
   .error_strings = snat_in2out_error_strings,
 
   .runtime_data_bytes = sizeof (snat_runtime_t),
-  
+
   .n_next_nodes = SNAT_IN2OUT_N_NEXT,
 
   /* edit / add dispositions here */
@@ -1496,7 +2044,42 @@ VLIB_REGISTER_NODE (snat_in2out_slowpath_node) = {
   },
 };
 
-VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_slowpath_node, snat_in2out_slow_path_fn);
+VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_slowpath_node,
+                              snat_in2out_slow_path_fn);
+
+static uword
+snat_in2out_output_slow_path_fn (vlib_main_t * vm,
+                                 vlib_node_runtime_t * node,
+                                 vlib_frame_t * frame)
+{
+  return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */, 1);
+}
+
+VLIB_REGISTER_NODE (snat_in2out_output_slowpath_node) = {
+  .function = snat_in2out_output_slow_path_fn,
+  .name = "snat-in2out-output-slowpath",
+  .vector_size = sizeof (u32),
+  .format_trace = format_snat_in2out_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+
+  .n_errors = ARRAY_LEN(snat_in2out_error_strings),
+  .error_strings = snat_in2out_error_strings,
+
+  .runtime_data_bytes = sizeof (snat_runtime_t),
+
+  .n_next_nodes = SNAT_IN2OUT_N_NEXT,
+
+  /* edit / add dispositions here */
+  .next_nodes = {
+    [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
+    [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output",
+    [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-output-slowpath",
+    [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+  },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_output_slowpath_node,
+                              snat_in2out_output_slow_path_fn);
 
 /**************************/
 /*** deterministic mode ***/
@@ -1511,7 +2094,7 @@ snat_det_in2out_node_fn (vlib_main_t * vm,
   u32 pkts_processed = 0;
   snat_main_t * sm = &snat_main;
   u32 now = (u32) vlib_time_now (vm);
-  u32 thread_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;
@@ -1613,11 +2196,12 @@ snat_det_in2out_node_fn (vlib_main_t * vm,
 
           snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0);
 
-          ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src);
+          key0.ext_host_addr = ip0->dst_address;
+          key0.ext_host_port = tcp0->dst;
+
+          ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src, key0);
           if (PREDICT_FALSE(!ses0))
             {
-              key0.ext_host_addr = ip0->dst_address;
-              key0.ext_host_port = tcp0->dst;
               for (i0 = 0; i0 < dm0->ports_per_host; i0++)
                 {
                   key0.out_port = clib_host_to_net_u16 (lo_port0 +
@@ -1631,8 +2215,13 @@ snat_det_in2out_node_fn (vlib_main_t * vm,
                 }
               if (PREDICT_FALSE(!ses0))
                 {
-                  next0 = SNAT_IN2OUT_NEXT_DROP;
-                  b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
+                  /* too many sessions for user, send ICMP error packet */
+
+                  vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+                  icmp4_error_set_vnet_buffer (b0, ICMP4_destination_unreachable,
+                                               ICMP4_destination_unreachable_destination_unreachable_host,
+                                               0);
+                  next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
                   goto trace0;
                 }
             }
@@ -1687,16 +2276,16 @@ snat_det_in2out_node_fn (vlib_main_t * vm,
           switch(ses0->state)
             {
             case SNAT_SESSION_UDP_ACTIVE:
-                ses0->expire = now + SNAT_UDP_TIMEOUT;
+                ses0->expire = now + sm->udp_timeout;
                 break;
             case SNAT_SESSION_TCP_SYN_SENT:
             case SNAT_SESSION_TCP_FIN_WAIT:
             case SNAT_SESSION_TCP_CLOSE_WAIT:
             case SNAT_SESSION_TCP_LAST_ACK:
-                ses0->expire = now + SNAT_TCP_TRANSITORY_TIMEOUT;
+                ses0->expire = now + sm->tcp_transitory_timeout;
                 break;
             case SNAT_SESSION_TCP_ESTABLISHED:
-                ses0->expire = now + SNAT_TCP_ESTABLISHED_TIMEOUT;
+                ses0->expire = now + sm->tcp_established_timeout;
                 break;
             }
 
@@ -1757,11 +2346,12 @@ snat_det_in2out_node_fn (vlib_main_t * vm,
 
           snat_det_forward(dm1, &ip1->src_address, &new_addr1, &lo_port1);
 
-          ses1 = snat_det_find_ses_by_in(dm1, &ip1->src_address, tcp1->src);
+          key1.ext_host_addr = ip1->dst_address;
+          key1.ext_host_port = tcp1->dst;
+
+          ses1 = snat_det_find_ses_by_in(dm1, &ip1->src_address, tcp1->src, key1);
           if (PREDICT_FALSE(!ses1))
             {
-              key1.ext_host_addr = ip1->dst_address;
-              key1.ext_host_port = tcp1->dst;
               for (i1 = 0; i1 < dm1->ports_per_host; i1++)
                 {
                   key1.out_port = clib_host_to_net_u16 (lo_port1 +
@@ -1775,8 +2365,13 @@ snat_det_in2out_node_fn (vlib_main_t * vm,
                 }
               if (PREDICT_FALSE(!ses1))
                 {
-                  next1 = SNAT_IN2OUT_NEXT_DROP;
-                  b1->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
+                  /* too many sessions for user, send ICMP error packet */
+
+                  vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+                  icmp4_error_set_vnet_buffer (b1, ICMP4_destination_unreachable,
+                                               ICMP4_destination_unreachable_destination_unreachable_host,
+                                               0);
+                  next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
                   goto trace1;
                 }
             }
@@ -1831,16 +2426,16 @@ snat_det_in2out_node_fn (vlib_main_t * vm,
           switch(ses1->state)
             {
             case SNAT_SESSION_UDP_ACTIVE:
-                ses1->expire = now + SNAT_UDP_TIMEOUT;
+                ses1->expire = now + sm->udp_timeout;
                 break;
             case SNAT_SESSION_TCP_SYN_SENT:
             case SNAT_SESSION_TCP_FIN_WAIT:
             case SNAT_SESSION_TCP_CLOSE_WAIT:
             case SNAT_SESSION_TCP_LAST_ACK:
-                ses1->expire = now + SNAT_TCP_TRANSITORY_TIMEOUT;
+                ses1->expire = now + sm->tcp_transitory_timeout;
                 break;
             case SNAT_SESSION_TCP_ESTABLISHED:
-                ses1->expire = now + SNAT_TCP_ESTABLISHED_TIMEOUT;
+                ses1->expire = now + sm->tcp_established_timeout;
                 break;
             }
 
@@ -1937,11 +2532,12 @@ snat_det_in2out_node_fn (vlib_main_t * vm,
 
           snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0);
 
-          ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src);
+          key0.ext_host_addr = ip0->dst_address;
+          key0.ext_host_port = tcp0->dst;
+
+          ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src, key0);
           if (PREDICT_FALSE(!ses0))
             {
-              key0.ext_host_addr = ip0->dst_address;
-              key0.ext_host_port = tcp0->dst;
               for (i0 = 0; i0 < dm0->ports_per_host; i0++)
                 {
                   key0.out_port = clib_host_to_net_u16 (lo_port0 +
@@ -1955,8 +2551,13 @@ snat_det_in2out_node_fn (vlib_main_t * vm,
                 }
               if (PREDICT_FALSE(!ses0))
                 {
-                  next0 = SNAT_IN2OUT_NEXT_DROP;
-                  b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
+                  /* too many sessions for user, send ICMP error packet */
+
+                  vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+                  icmp4_error_set_vnet_buffer (b0, ICMP4_destination_unreachable,
+                                               ICMP4_destination_unreachable_destination_unreachable_host,
+                                               0);
+                  next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR;
                   goto trace00;
                 }
             }
@@ -2011,16 +2612,16 @@ snat_det_in2out_node_fn (vlib_main_t * vm,
           switch(ses0->state)
             {
             case SNAT_SESSION_UDP_ACTIVE:
-                ses0->expire = now + SNAT_UDP_TIMEOUT;
+                ses0->expire = now + sm->udp_timeout;
                 break;
             case SNAT_SESSION_TCP_SYN_SENT:
             case SNAT_SESSION_TCP_FIN_WAIT:
             case SNAT_SESSION_TCP_CLOSE_WAIT:
             case SNAT_SESSION_TCP_LAST_ACK:
-                ses0->expire = now + SNAT_TCP_TRANSITORY_TIMEOUT;
+                ses0->expire = now + sm->tcp_transitory_timeout;
                 break;
             case SNAT_SESSION_TCP_ESTABLISHED:
-                ses0->expire = now + SNAT_TCP_ESTABLISHED_TIMEOUT;
+                ses0->expire = now + sm->tcp_established_timeout;
                 break;
             }
 
@@ -2171,7 +2772,10 @@ u32 icmp_match_in2out_det(snat_main_t *sm, vlib_node_runtime_t *node,
 
   snat_det_forward(dm0, &in_addr, &new_addr0, &lo_port0);
 
-  ses0 = snat_det_find_ses_by_in(dm0, &in_addr, in_port);
+  key0.ext_host_addr = ip0->dst_address;
+  key0.ext_host_port = 0;
+
+  ses0 = snat_det_find_ses_by_in(dm0, &in_addr, in_port, key0);
   if (PREDICT_FALSE(!ses0))
     {
       if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
@@ -2186,8 +2790,6 @@ u32 icmp_match_in2out_det(snat_main_t *sm, vlib_node_runtime_t *node,
           next0 = SNAT_IN2OUT_NEXT_DROP;
           goto out;
         }
-      key0.ext_host_addr = ip0->dst_address;
-      key0.ext_host_port = 0;
       for (i0 = 0; i0 < dm0->ports_per_host; i0++)
         {
           key0.out_port = clib_host_to_net_u16 (lo_port0 +
@@ -2215,6 +2817,11 @@ u32 icmp_match_in2out_det(snat_main_t *sm, vlib_node_runtime_t *node,
       goto out;
     }
 
+  u32 now = (u32) vlib_time_now (sm->vlib_main);
+
+  ses0->state = SNAT_SESSION_ICMP_ACTIVE;
+  ses0->expire = now + sm->icmp_timeout;
+
 out:
   *p_proto = protocol;
   if (ses0)
@@ -2234,10 +2841,11 @@ out:
 /**********************/
 /*** worker handoff ***/
 /**********************/
-static uword
-snat_in2out_worker_handoff_fn (vlib_main_t * vm,
-                               vlib_node_runtime_t * node,
-                               vlib_frame_t * frame)
+static inline uword
+snat_in2out_worker_handoff_fn_inline (vlib_main_t * vm,
+                                      vlib_node_runtime_t * node,
+                                      vlib_frame_t * frame,
+                                      u8 is_output)
 {
   snat_main_t *sm = &snat_main;
   vlib_thread_main_t *tm = vlib_get_thread_main ();
@@ -2252,9 +2860,22 @@ snat_in2out_worker_handoff_fn (vlib_main_t * vm,
   u32 next_worker_index = 0;
   u32 current_worker_index = ~0;
   u32 thread_index = vlib_get_thread_index ();
+  u32 fq_index;
+  u32 to_node_index;
 
   ASSERT (vec_len (sm->workers));
 
+  if (is_output)
+    {
+      fq_index = sm->fq_in2out_output_index;
+      to_node_index = sm->in2out_output_node_index;
+    }
+  else
+    {
+      fq_index = sm->fq_in2out_index;
+      to_node_index = sm->in2out_node_index;
+    }
+
   if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
     {
       vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
@@ -2298,7 +2919,7 @@ snat_in2out_worker_handoff_fn (vlib_main_t * vm,
               if (hf)
                 hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
 
-              hf = vlib_get_worker_handoff_queue_elt (sm->fq_in2out_index,
+              hf = vlib_get_worker_handoff_queue_elt (fq_index,
                                                       next_worker_index,
                                                       handoff_queue_elt_by_worker_index);
 
@@ -2327,7 +2948,7 @@ snat_in2out_worker_handoff_fn (vlib_main_t * vm,
           /* if this is 1st frame */
           if (!f)
             {
-              f = vlib_get_frame_to_node (vm, sm->in2out_node_index);
+              f = vlib_get_frame_to_node (vm, to_node_index);
               to_next = vlib_frame_vector_args (f);
             }
 
@@ -2347,7 +2968,7 @@ snat_in2out_worker_handoff_fn (vlib_main_t * vm,
     }
 
   if (f)
-    vlib_put_frame_to_node (vm, sm->in2out_node_index, f);
+    vlib_put_frame_to_node (vm, to_node_index, f);
 
   if (hf)
     hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
@@ -2378,13 +2999,21 @@ snat_in2out_worker_handoff_fn (vlib_main_t * vm,
   return frame->n_vectors;
 }
 
+static uword
+snat_in2out_worker_handoff_fn (vlib_main_t * vm,
+                               vlib_node_runtime_t * node,
+                               vlib_frame_t * frame)
+{
+  return snat_in2out_worker_handoff_fn_inline (vm, node, frame, 0);
+}
+
 VLIB_REGISTER_NODE (snat_in2out_worker_handoff_node) = {
   .function = snat_in2out_worker_handoff_fn,
   .name = "snat-in2out-worker-handoff",
   .vector_size = sizeof (u32),
   .format_trace = format_snat_in2out_worker_handoff_trace,
   .type = VLIB_NODE_TYPE_INTERNAL,
-  
+
   .n_next_nodes = 1,
 
   .next_nodes = {
@@ -2392,7 +3021,241 @@ VLIB_REGISTER_NODE (snat_in2out_worker_handoff_node) = {
   },
 };
 
-VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_worker_handoff_node, snat_in2out_worker_handoff_fn);
+VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_worker_handoff_node,
+                              snat_in2out_worker_handoff_fn);
+
+static uword
+snat_in2out_output_worker_handoff_fn (vlib_main_t * vm,
+                                      vlib_node_runtime_t * node,
+                                      vlib_frame_t * frame)
+{
+  return snat_in2out_worker_handoff_fn_inline (vm, node, frame, 1);
+}
+
+VLIB_REGISTER_NODE (snat_in2out_output_worker_handoff_node) = {
+  .function = snat_in2out_output_worker_handoff_fn,
+  .name = "snat-in2out-output-worker-handoff",
+  .vector_size = sizeof (u32),
+  .format_trace = format_snat_in2out_worker_handoff_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+
+  .n_next_nodes = 1,
+
+  .next_nodes = {
+    [0] = "error-drop",
+  },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_output_worker_handoff_node,
+                              snat_in2out_output_worker_handoff_fn);
+
+static_always_inline int
+is_hairpinning (snat_main_t *sm, ip4_address_t * dst_addr)
+{
+  snat_address_t * ap;
+  clib_bihash_kv_8_8_t kv, value;
+  snat_session_key_t m_key;
+
+  vec_foreach (ap, sm->addresses)
+    {
+      if (ap->addr.as_u32 == dst_addr->as_u32)
+        return 1;
+    }
+
+  m_key.addr.as_u32 = dst_addr->as_u32;
+  m_key.fib_index = sm->outside_fib_index;
+  m_key.port = 0;
+  m_key.protocol = 0;
+  kv.key = m_key.as_u64;
+  if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
+    return 1;
+
+  return 0;
+}
+
+static uword
+snat_hairpin_dst_fn (vlib_main_t * vm,
+                     vlib_node_runtime_t * node,
+                     vlib_frame_t * frame)
+{
+  u32 n_left_from, * from, * to_next;
+  snat_in2out_next_t next_index;
+  u32 pkts_processed = 0;
+  snat_main_t * sm = &snat_main;
+
+  from = vlib_frame_vector_args (frame);
+  n_left_from = frame->n_vectors;
+  next_index = node->cached_next_index;
+
+  while (n_left_from > 0)
+    {
+      u32 n_left_to_next;
+
+      vlib_get_next_frame (vm, node, next_index,
+                          to_next, n_left_to_next);
+
+      while (n_left_from > 0 && n_left_to_next > 0)
+       {
+          u32 bi0;
+         vlib_buffer_t * b0;
+          u32 next0;
+          ip4_header_t * ip0;
+          u32 proto0;
+
+          /* speculatively enqueue b0 to the current next frame */
+         bi0 = from[0];
+         to_next[0] = bi0;
+         from += 1;
+         to_next += 1;
+         n_left_from -= 1;
+         n_left_to_next -= 1;
+
+         b0 = vlib_get_buffer (vm, bi0);
+          next0 = SNAT_IN2OUT_NEXT_LOOKUP;
+          ip0 = vlib_buffer_get_current (b0);
+
+          proto0 = ip_proto_to_snat_proto (ip0->protocol);
+
+          vnet_buffer (b0)->snat.flags = 0;
+          if (PREDICT_FALSE (is_hairpinning (sm, &ip0->dst_address)))
+            {
+              if (proto0 == SNAT_PROTOCOL_TCP || proto0 == SNAT_PROTOCOL_UDP)
+                {
+                  udp_header_t * udp0 = ip4_next_header (ip0);
+                  tcp_header_t * tcp0 = (tcp_header_t *) udp0;
+
+                  snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
+                }
+              else if (proto0 == SNAT_PROTOCOL_ICMP)
+                {
+                  icmp46_header_t * icmp0 = ip4_next_header (ip0);
+
+                  snat_icmp_hairpinning (sm, b0, ip0, icmp0);
+                }
+              else
+                {
+                  snat_hairpinning_unknown_proto (sm, b0, ip0);
+                }
+
+              vnet_buffer (b0)->snat.flags = SNAT_FLAG_HAIRPINNING;
+              clib_warning("is hairpinning");
+            }
+
+          pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
+
+          /* verify speculative enqueue, maybe switch current next frame */
+         vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+                                          to_next, n_left_to_next,
+                                          bi0, next0);
+         }
+
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+
+  vlib_node_increment_counter (vm, snat_hairpin_dst_node.index,
+                               SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
+                               pkts_processed);
+  return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (snat_hairpin_dst_node) = {
+  .function = snat_hairpin_dst_fn,
+  .name = "snat-hairpin-dst",
+  .vector_size = sizeof (u32),
+  .type = VLIB_NODE_TYPE_INTERNAL,
+  .n_errors = ARRAY_LEN(snat_in2out_error_strings),
+  .error_strings = snat_in2out_error_strings,
+  .n_next_nodes = 2,
+  .next_nodes = {
+    [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
+    [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
+  },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (snat_hairpin_dst_node,
+                              snat_hairpin_dst_fn);
+
+static uword
+snat_hairpin_src_fn (vlib_main_t * vm,
+                     vlib_node_runtime_t * node,
+                     vlib_frame_t * frame)
+{
+  u32 n_left_from, * from, * to_next;
+  snat_in2out_next_t next_index;
+  u32 pkts_processed = 0;
+  snat_main_t *sm = &snat_main;
+
+  from = vlib_frame_vector_args (frame);
+  n_left_from = frame->n_vectors;
+  next_index = node->cached_next_index;
+
+  while (n_left_from > 0)
+    {
+      u32 n_left_to_next;
+
+      vlib_get_next_frame (vm, node, next_index,
+                          to_next, n_left_to_next);
+
+      while (n_left_from > 0 && n_left_to_next > 0)
+       {
+          u32 bi0;
+         vlib_buffer_t * b0;
+          u32 next0;
+
+          /* speculatively enqueue b0 to the current next frame */
+         bi0 = from[0];
+         to_next[0] = bi0;
+         from += 1;
+         to_next += 1;
+         n_left_from -= 1;
+         n_left_to_next -= 1;
+
+         b0 = vlib_get_buffer (vm, bi0);
+          next0 = SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT;
+
+          if (PREDICT_FALSE ((vnet_buffer (b0)->snat.flags) & SNAT_FLAG_HAIRPINNING))
+            {
+              if (PREDICT_TRUE (sm->num_workers > 1))
+                next0 = SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH;
+              else
+                next0 = SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT;
+            }
+
+          pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
+
+          /* verify speculative enqueue, maybe switch current next frame */
+         vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+                                          to_next, n_left_to_next,
+                                          bi0, next0);
+         }
+
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+
+  vlib_node_increment_counter (vm, snat_hairpin_src_node.index,
+                               SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
+                               pkts_processed);
+  return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (snat_hairpin_src_node) = {
+  .function = snat_hairpin_src_fn,
+  .name = "snat-hairpin-src",
+  .vector_size = sizeof (u32),
+  .type = VLIB_NODE_TYPE_INTERNAL,
+  .n_errors = ARRAY_LEN(snat_in2out_error_strings),
+  .error_strings = snat_in2out_error_strings,
+  .n_next_nodes = SNAT_HAIRPIN_SRC_N_NEXT,
+  .next_nodes = {
+     [SNAT_HAIRPIN_SRC_NEXT_DROP] = "error-drop",
+     [SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT] = "snat-in2out-output",
+     [SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT] = "interface-output",
+     [SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH] = "snat-in2out-output-worker-handoff",
+  },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (snat_hairpin_src_node,
+                              snat_hairpin_src_fn);
 
 static uword
 snat_in2out_fast_static_map_fn (vlib_main_t * vm,
@@ -2480,7 +3343,7 @@ snat_in2out_fast_static_map_fn (vlib_main_t * vm,
           key0.port = udp0->src_port;
           key0.fib_index = rx_fib_index0;
 
-          if (snat_static_mapping_match(sm, key0, &sm0, 0))
+          if (snat_static_mapping_match(sm, key0, &sm0, 0, 0))
             {
               b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
               next0= SNAT_IN2OUT_NEXT_DROP;