NAT: Destination NAT44 with load-balancing (VPP-954) 96/8296/2
authorMatus Fabian <matfabia@cisco.com>
Mon, 4 Sep 2017 09:17:18 +0000 (02:17 -0700)
committerOle Trøan <otroan@employees.org>
Wed, 6 Sep 2017 08:49:25 +0000 (08:49 +0000)
added load-balancing static mappings with unequal load support

Change-Id: Ie505e41f24d46f812b94dd28bdafe3dc170a6060
Signed-off-by: Matus Fabian <matfabia@cisco.com>
src/plugins/nat/in2out.c
src/plugins/nat/nat.api
src/plugins/nat/nat.c
src/plugins/nat/nat.h
src/plugins/nat/nat_api.c
src/plugins/nat/out2in.c
test/test_nat.py
test/vpp_papi_provider.py

index bb18639..c51d4fb 100644 (file)
@@ -314,23 +314,25 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
       if (snat_is_unk_proto_session (s))
         {
           clib_bihash_kv_16_8_t up_kv;
-          snat_unk_proto_ses_key_t key;
+          nat_ed_ses_key_t key;
 
           /* Remove from lookup tables */
           key.l_addr = s->in2out.addr;
           key.r_addr = s->ext_host_addr;
           key.fib_index = s->in2out.fib_index;
           key.proto = s->in2out.port;
+          key.rsvd = 0;
+          key.l_port = 0;
           up_kv.key[0] = key.as_u64[0];
           up_kv.key[1] = key.as_u64[1];
-          if (clib_bihash_add_del_16_8 (&sm->in2out_unk_proto, &up_kv, 0))
+          if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &up_kv, 0))
             clib_warning ("in2out key del failed");
 
           key.l_addr = s->out2in.addr;
           key.fib_index = s->out2in.fib_index;
           up_kv.key[0] = key.as_u64[0];
           up_kv.key[1] = key.as_u64[1];
-          if (clib_bihash_add_del_16_8 (&sm->out2in_unk_proto, &up_kv, 0))
+          if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &up_kv, 0))
             clib_warning ("out2in key del failed");
         }
       else
@@ -1033,7 +1035,7 @@ snat_hairpinning_unknown_proto (snat_main_t *sm,
   u32 old_addr, new_addr = 0, ti = 0;
   clib_bihash_kv_8_8_t kv, value;
   clib_bihash_kv_16_8_t s_kv, s_value;
-  snat_unk_proto_ses_key_t key;
+  nat_ed_ses_key_t key;
   snat_session_key_t m_key;
   snat_worker_key_t w_key;
   snat_static_mapping_t *m;
@@ -1045,10 +1047,11 @@ snat_hairpinning_unknown_proto (snat_main_t *sm,
   key.r_addr.as_u32 = ip->src_address.as_u32;
   key.fib_index = sm->outside_fib_index;
   key.proto = ip->protocol;
-  key.rsvd[0] = key.rsvd[1] = key.rsvd[2] = 0;
+  key.rsvd = 0;
+  key.l_port = 0;
   s_kv.key[0] = key.as_u64[0];
   s_kv.key[1] = key.as_u64[1];
-  if (clib_bihash_search_16_8 (&sm->out2in_unk_proto, &s_kv, &s_value))
+  if (clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
     {
       m_key.addr = ip->dst_address;
       m_key.fib_index = sm->outside_fib_index;
@@ -1110,7 +1113,7 @@ snat_in2out_unknown_proto (snat_main_t *sm,
   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
   u32 elt_index, head_index, ses_index, oldest_index;
   snat_session_t * s;
-  snat_unk_proto_ses_key_t key;
+  nat_ed_ses_key_t key;
   u32 address_index = ~0;
   int i;
   u8 is_sm = 0;
@@ -1121,11 +1124,12 @@ snat_in2out_unknown_proto (snat_main_t *sm,
   key.r_addr = ip->dst_address;
   key.fib_index = rx_fib_index;
   key.proto = ip->protocol;
-  key.rsvd[0] = key.rsvd[1] = key.rsvd[2] = 0;
+  key.rsvd = 0;
+  key.l_port = 0;
   s_kv.key[0] = key.as_u64[0];
   s_kv.key[1] = key.as_u64[1];
 
-  if (!clib_bihash_search_16_8 (&sm->in2out_unk_proto, &s_kv, &s_value))
+  if (!clib_bihash_search_16_8 (&sm->in2out_ed, &s_kv, &s_value))
     {
       s = pool_elt_at_index (tsm->sessions, s_value.value);
       new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
@@ -1202,7 +1206,7 @@ snat_in2out_unknown_proto (snat_main_t *sm,
                       key.l_addr.as_u32 = new_addr;
                       s_kv.key[0] = key.as_u64[0];
                       s_kv.key[1] = key.as_u64[1];
-                      if (clib_bihash_search_16_8 (&sm->out2in_unk_proto, &s_kv, &s_value))
+                      if (clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
                         break;
 
                       goto create_ses;
@@ -1215,7 +1219,7 @@ snat_in2out_unknown_proto (snat_main_t *sm,
               key.l_addr.as_u32 = sm->addresses[i].addr.as_u32;
               s_kv.key[0] = key.as_u64[0];
               s_kv.key[1] = key.as_u64[1];
-              if (clib_bihash_search_16_8 (&sm->out2in_unk_proto, &s_kv, &s_value))
+              if (clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
                 {
                   new_addr = ip->src_address.as_u32 = key.l_addr.as_u32;
                   address_index = i;
@@ -1259,14 +1263,14 @@ create_ses:
               key.proto = s->in2out.port;
               s_kv.key[0] = key.as_u64[0];
               s_kv.key[1] = key.as_u64[1];
-              if (clib_bihash_add_del_16_8 (&sm->in2out_unk_proto, &s_kv, 0))
+              if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &s_kv, 0))
                 clib_warning ("in2out key del failed");
 
               key.l_addr = s->out2in.addr;
               key.fib_index = s->out2in.fib_index;
               s_kv.key[0] = key.as_u64[0];
               s_kv.key[1] = key.as_u64[1];
-              if (clib_bihash_add_del_16_8 (&sm->out2in_unk_proto, &s_kv, 0))
+              if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 0))
                 clib_warning ("out2in key del failed");
             }
           else
@@ -1333,14 +1337,14 @@ create_ses:
       s_kv.key[0] = key.as_u64[0];
       s_kv.key[1] = key.as_u64[1];
       s_kv.value = s - tsm->sessions;
-      if (clib_bihash_add_del_16_8 (&sm->in2out_unk_proto, &s_kv, 1))
+      if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &s_kv, 1))
         clib_warning ("in2out key add failed");
 
       key.l_addr.as_u32 = new_addr;
       key.fib_index = sm->outside_fib_index;
       s_kv.key[0] = key.as_u64[0];
       s_kv.key[1] = key.as_u64[1];
-      if (clib_bihash_add_del_16_8 (&sm->out2in_unk_proto, &s_kv, 1))
+      if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1))
         clib_warning ("out2in key add failed");
   }
 
@@ -1366,6 +1370,153 @@ create_ses:
     vnet_buffer(b)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
 }
 
+static void
+snat_in2out_lb (snat_main_t *sm,
+                vlib_buffer_t * b,
+                ip4_header_t * ip,
+                u32 rx_fib_index,
+                u32 thread_index,
+                f64 now,
+                vlib_main_t * vm)
+{
+  nat_ed_ses_key_t key;
+  clib_bihash_kv_16_8_t s_kv, s_value;
+  udp_header_t *udp = ip4_next_header (ip);
+  tcp_header_t *tcp = (tcp_header_t *) udp;
+  snat_session_t *s = 0;
+  snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
+  u32 old_addr, new_addr;
+  u16 new_port, old_port;
+  ip_csum_t sum;
+  u32 proto = ip_proto_to_snat_proto (ip->protocol);
+  snat_session_key_t e_key, l_key;
+  clib_bihash_kv_8_8_t kv, value;
+  snat_user_key_t u_key;
+  snat_user_t *u;
+  dlist_elt_t *head, *elt;
+
+  old_addr = ip->src_address.as_u32;
+
+  key.l_addr = ip->src_address;
+  key.r_addr = ip->dst_address;
+  key.fib_index = rx_fib_index;
+  key.proto = ip->protocol;
+  key.rsvd = 0;
+  key.l_port = udp->src_port;
+  s_kv.key[0] = key.as_u64[0];
+  s_kv.key[1] = key.as_u64[1];
+
+  if (!clib_bihash_search_16_8 (&sm->in2out_ed, &s_kv, &s_value))
+    {
+      s = pool_elt_at_index (tsm->sessions, s_value.value);
+    }
+  else
+    {
+      l_key.addr = ip->src_address;
+      l_key.port = udp->src_port;
+      l_key.protocol = proto;
+      l_key.fib_index = rx_fib_index;
+      if (snat_static_mapping_match(sm, l_key, &e_key, 0, 0))
+        return;
+
+      u_key.addr = ip->src_address;
+      u_key.fib_index = rx_fib_index;
+      kv.key = u_key.as_u64;
+
+      /* Ever heard of the "user" = src ip4 address before? */
+      if (clib_bihash_search_8_8 (&sm->user_hash, &kv, &value))
+        {
+          /* no, make a new one */
+          pool_get (tsm->users, u);
+          memset (u, 0, sizeof (*u));
+          u->addr = ip->src_address;
+          u->fib_index = rx_fib_index;
+
+          pool_get (tsm->list_pool, head);
+          u->sessions_per_user_list_head_index = head - tsm->list_pool;
+
+          clib_dlist_init (tsm->list_pool,
+                           u->sessions_per_user_list_head_index);
+
+          kv.value = u - tsm->users;
+
+          /* add user */
+          if (clib_bihash_add_del_8_8 (&sm->user_hash, &kv, 1))
+            clib_warning ("user key add failed");
+        }
+      else
+        {
+          u = pool_elt_at_index (tsm->users, value.value);
+        }
+
+      /* Create a new session */
+      pool_get (tsm->sessions, s);
+      memset (s, 0, sizeof (*s));
+
+      s->ext_host_addr.as_u32 = ip->dst_address.as_u32;
+      s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
+      s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
+      s->outside_address_index = ~0;
+      s->in2out = l_key;
+      s->out2in = e_key;
+      u->nstaticsessions++;
+
+      /* Create list elts */
+      pool_get (tsm->list_pool, elt);
+      clib_dlist_init (tsm->list_pool, elt - tsm->list_pool);
+      elt->value = s - tsm->sessions;
+      s->per_user_index = elt - tsm->list_pool;
+      s->per_user_list_head_index = u->sessions_per_user_list_head_index;
+      clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
+                          s->per_user_index);
+
+      /* Add to lookup tables */
+      s_kv.value = s - tsm->sessions;
+      if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &s_kv, 1))
+        clib_warning ("in2out-ed key add failed");
+
+      key.l_addr = e_key.addr;
+      key.fib_index = e_key.fib_index;
+      key.l_port = e_key.port;
+      s_kv.key[0] = key.as_u64[0];
+      s_kv.key[1] = key.as_u64[1];
+      if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1))
+        clib_warning ("out2in-ed key add failed");
+    }
+
+  new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32;
+
+  /* Update IP checksum */
+  sum = ip->checksum;
+  sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
+  ip->checksum = ip_csum_fold (sum);
+
+  if (PREDICT_TRUE(proto == SNAT_PROTOCOL_TCP))
+    {
+      old_port = tcp->src_port;
+      tcp->src_port = s->out2in.port;
+      new_port = tcp->src_port;
+
+      sum = tcp->checksum;
+      sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address);
+      sum = ip_csum_update (sum, old_port, new_port, ip4_header_t, length);
+      tcp->checksum = ip_csum_fold(sum);
+    }
+  else
+    {
+      udp->src_port = s->out2in.port;
+      udp->checksum = 0;
+    }
+
+  if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0)
+    vnet_buffer(b)->sw_if_index[VLIB_TX] = sm->outside_fib_index;
+
+  /* Accounting */
+  s->last_heard = now;
+  s->total_pkts++;
+  s->total_bytes += vlib_buffer_length_in_chain (vm, b);
+}
+
 static inline uword
 snat_in2out_node_fn_inline (vlib_main_t * vm,
                             vlib_node_runtime_t * node,
@@ -1521,8 +1672,28 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
                 }
             }
           else
-            s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
-                                    value0.value);
+            {
+              if (PREDICT_FALSE (value0.value == ~0ULL))
+                {
+                  if (is_slow_path)
+                    {
+                      snat_in2out_lb(sm, b0, ip0, rx_fib_index0, thread_index,
+                                     now, vm);
+                      goto trace00;
+                    }
+                  else
+                    {
+                      next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
+                      goto trace00;
+                    }
+                }
+              else
+                {
+                  s0 = pool_elt_at_index (
+                    sm->per_thread_data[thread_index].sessions,
+                    value0.value);
+                }
+            }
 
           old_addr0 = ip0->src_address.as_u32;
           ip0->src_address = s0->out2in.addr;
@@ -1672,8 +1843,28 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
                 }
             }
           else
-            s1 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
-                                    value1.value);
+            {
+              if (PREDICT_FALSE (value1.value == ~0ULL))
+                {
+                  if (is_slow_path)
+                    {
+                      snat_in2out_lb(sm, b1, ip1, rx_fib_index1, thread_index,
+                                     now, vm);
+                      goto trace01;
+                    }
+                  else
+                    {
+                      next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
+                      goto trace01;
+                    }
+                }
+              else
+                {
+                  s1 = pool_elt_at_index (
+                    sm->per_thread_data[thread_index].sessions,
+                    value1.value);
+                }
+            }
 
           old_addr1 = ip1->src_address.as_u32;
           ip1->src_address = s1->out2in.addr;
@@ -1860,8 +2051,28 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
                 }
             }
           else
-            s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
-                                    value0.value);
+            {
+              if (PREDICT_FALSE (value0.value == ~0ULL))
+                {
+                  if (is_slow_path)
+                    {
+                      snat_in2out_lb(sm, b0, ip0, rx_fib_index0, thread_index,
+                                     now, vm);
+                      goto trace0;
+                    }
+                  else
+                    {
+                      next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
+                      goto trace0;
+                    }
+                }
+              else
+                {
+                  s0 = pool_elt_at_index (
+                    sm->per_thread_data[thread_index].sessions,
+                    value0.value);
+                }
+            }
 
           old_addr0 = ip0->src_address.as_u32;
           ip0->src_address = s0->out2in.addr;
index 7245cb0..d7a4a9e 100644 (file)
@@ -1025,6 +1025,39 @@ define nat44_user_session_details {
   u32 total_pkts;
 };
 
+typeonly manual_endian define nat44_lb_addr_port {
+  u8 addr[4];
+  u16 port;
+  u8 probability;
+};
+
+autoreply manual_endian define nat44_add_del_lb_static_mapping {
+  u32 client_index;
+  u32 context;
+  u8 is_add;
+  u8 external_addr[4];
+  u16 external_port;
+  u8 protocol;
+  u32 vrf_id;
+  u8 local_num;
+  vl_api_nat44_lb_addr_port_t locals[local_num];
+};
+
+define nat44_lb_static_mapping_dump {
+  u32 client_index;
+  u32 context;
+};
+
+manual_endian define nat44_lb_static_mapping_details {
+  u32 context;
+  u8 external_addr[4];
+  u16 external_port;
+  u8 protocol;
+  u32 vrf_id;
+  u8 local_num;
+  vl_api_nat44_lb_addr_port_t locals[local_num];
+};
+
 /*
  * Deterministic NAT (CGN) APIs
  */
index f9ecb94..fabd0bc 100644 (file)
@@ -506,15 +506,16 @@ int snat_add_static_mapping(ip4_address_t l_addr, ip4_address_t e_addr,
                       if (snat_is_unk_proto_session (s))
                         {
                           clib_bihash_kv_16_8_t up_kv;
-                          snat_unk_proto_ses_key_t up_key;
+                          nat_ed_ses_key_t up_key;
                           up_key.l_addr = s->in2out.addr;
                           up_key.r_addr = s->ext_host_addr;
                           up_key.fib_index = s->in2out.fib_index;
                           up_key.proto = s->in2out.port;
-                          up_key.rsvd[0] = up_key.rsvd[1] = up_key.rsvd[2] = 0;
+                          up_key.rsvd = 0;
+                          up_key.l_port = 0;
                           up_kv.key[0] = up_key.as_u64[0];
                           up_kv.key[1] = up_key.as_u64[1];
-                          if (clib_bihash_add_del_16_8 (&sm->in2out_unk_proto,
+                          if (clib_bihash_add_del_16_8 (&sm->in2out_ed,
                                                         &up_kv, 0))
                             clib_warning ("in2out key del failed");
 
@@ -522,7 +523,7 @@ int snat_add_static_mapping(ip4_address_t l_addr, ip4_address_t e_addr,
                           up_key.fib_index = s->out2in.fib_index;
                           up_kv.key[0] = up_key.as_u64[0];
                           up_kv.key[1] = up_key.as_u64[1];
-                          if (clib_bihash_add_del_16_8 (&sm->out2in_unk_proto,
+                          if (clib_bihash_add_del_16_8 (&sm->out2in_ed,
                                                         &up_kv, 0))
                             clib_warning ("out2in key del failed");
 
@@ -589,6 +590,243 @@ delete:
   return 0;
 }
 
+int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
+                                     snat_protocol_t proto, u32 vrf_id,
+                                     nat44_lb_addr_port_t *locals, u8 is_add)
+{
+  snat_main_t * sm = &snat_main;
+  snat_static_mapping_t *m;
+  snat_session_key_t m_key;
+  clib_bihash_kv_8_8_t kv, value;
+  u32 fib_index;
+  snat_address_t *a = 0;
+  int i;
+  nat44_lb_addr_port_t *local;
+  snat_user_key_t w_key0;
+  snat_worker_key_t w_key1;
+  u32 worker_index = 0;
+
+  m_key.addr = e_addr;
+  m_key.port = e_port;
+  m_key.protocol = proto;
+  m_key.fib_index = sm->outside_fib_index;
+  kv.key = m_key.as_u64;
+  if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
+    m = 0;
+  else
+    m = pool_elt_at_index (sm->static_mappings, value.value);
+
+  if (is_add)
+    {
+      if (m)
+        return VNET_API_ERROR_VALUE_EXIST;
+
+      if (vec_len (locals) < 2)
+        return VNET_API_ERROR_INVALID_VALUE;
+
+      fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
+                                                     vrf_id);
+
+      /* Find external address in allocated addresses and reserve port for
+         address and port pair mapping when dynamic translations enabled */
+      if (!sm->static_mapping_only)
+        {
+          for (i = 0; i < vec_len (sm->addresses); i++)
+            {
+              if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
+                {
+                  a = sm->addresses + i;
+                  /* External port must be unused */
+                  switch (proto)
+                    {
+#define _(N, j, n, s) \
+                    case SNAT_PROTOCOL_##N: \
+                      if (clib_bitmap_get_no_check (a->busy_##n##_port_bitmap, e_port)) \
+                        return VNET_API_ERROR_INVALID_VALUE; \
+                      clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, e_port, 1); \
+                      if (e_port > 1024) \
+                        a->busy_##n##_ports++; \
+                      break;
+                      foreach_snat_protocol
+#undef _
+                    default:
+                      clib_warning("unknown_protocol");
+                      return VNET_API_ERROR_INVALID_VALUE_2;
+                    }
+                  break;
+                }
+            }
+          /* External address must be allocated */
+          if (!a)
+            return VNET_API_ERROR_NO_SUCH_ENTRY;
+        }
+
+      pool_get (sm->static_mappings, m);
+      memset (m, 0, sizeof (*m));
+      m->external_addr = e_addr;
+      m->addr_only = 0;
+      m->vrf_id = vrf_id;
+      m->fib_index = fib_index;
+      m->external_port = e_port;
+      m->proto = proto;
+
+      m_key.addr = m->external_addr;
+      m_key.port = m->external_port;
+      m_key.protocol = m->proto;
+      m_key.fib_index = sm->outside_fib_index;
+      kv.key = m_key.as_u64;
+      kv.value = m - sm->static_mappings;
+      if (clib_bihash_add_del_8_8(&sm->static_mapping_by_external, &kv, 1))
+        {
+          clib_warning ("static_mapping_by_external key add failed");
+          return VNET_API_ERROR_UNSPECIFIED;
+        }
+      m_key.port = clib_host_to_net_u16 (m->external_port);
+      kv.key = m_key.as_u64;
+      kv.value = ~0ULL;
+      if (clib_bihash_add_del_8_8(&sm->out2in, &kv, 1))
+        {
+          clib_warning ("static_mapping_by_local key add failed");
+          return VNET_API_ERROR_UNSPECIFIED;
+        }
+
+      m_key.fib_index = m->fib_index;
+
+      /* Assign worker */
+      if (sm->workers)
+        {
+          w_key0.addr = locals[0].addr;
+          w_key0.fib_index = fib_index;
+          kv.key = w_key0.as_u64;
+
+          if (clib_bihash_search_8_8 (&sm->worker_by_in, &kv, &value))
+            worker_index = sm->first_worker_index +
+              sm->workers[sm->next_worker++ % vec_len (sm->workers)];
+          else
+            worker_index = value.value;
+
+          w_key1.addr = m->external_addr;
+          w_key1.port = clib_host_to_net_u16 (m->external_port);
+          w_key1.fib_index = sm->outside_fib_index;
+          kv.key = w_key1.as_u64;
+          kv.value = worker_index;
+          if (clib_bihash_add_del_8_8 (&sm->worker_by_out, &kv, 1))
+            {
+              clib_warning ("worker-by-out add key failed");
+              return VNET_API_ERROR_UNSPECIFIED;
+            }
+        }
+
+      for (i = 0; i < vec_len (locals); i++)
+        {
+          m_key.addr = locals[i].addr;
+          m_key.port = locals[i].port;
+          kv.key = m_key.as_u64;
+          kv.value = m - sm->static_mappings;
+          clib_bihash_add_del_8_8(&sm->static_mapping_by_local, &kv, 1);
+          locals[i].prefix = locals[i - 1].prefix + locals[i].probability;
+          vec_add1 (m->locals, locals[i]);
+          m_key.port = clib_host_to_net_u16 (locals[i].port);
+          kv.key = m_key.as_u64;
+          kv.value = ~0ULL;
+          if (clib_bihash_add_del_8_8(&sm->in2out, &kv, 1))
+            {
+              clib_warning ("in2out key add failed");
+              return VNET_API_ERROR_UNSPECIFIED;
+            }
+          /* Assign worker */
+          if (sm->workers)
+            {
+              w_key0.addr = locals[i].addr;
+              w_key0.fib_index = fib_index;
+              kv.key = w_key0.as_u64;
+              kv.value = worker_index;
+              if (clib_bihash_add_del_8_8 (&sm->worker_by_in, &kv, 1))
+                {
+                  clib_warning ("worker-by-in key add failed");
+                  return VNET_API_ERROR_UNSPECIFIED;
+                }
+            }
+        }
+    }
+  else
+    {
+      if (!m)
+        return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+      fib_table_unlock (m->fib_index, FIB_PROTOCOL_IP4);
+
+      /* Free external address port */
+      if (!sm->static_mapping_only)
+        {
+          for (i = 0; i < vec_len (sm->addresses); i++)
+            {
+              if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
+                {
+                  a = sm->addresses + i;
+                  switch (proto)
+                    {
+#define _(N, j, n, s) \
+                    case SNAT_PROTOCOL_##N: \
+                      clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, e_port, 0); \
+                      if (e_port > 1024) \
+                        a->busy_##n##_ports--; \
+                      break;
+                      foreach_snat_protocol
+#undef _
+                    default:
+                      clib_warning("unknown_protocol");
+                      return VNET_API_ERROR_INVALID_VALUE_2;
+                    }
+                  break;
+                }
+            }
+        }
+
+      m_key.addr = m->external_addr;
+      m_key.port = m->external_port;
+      m_key.protocol = m->proto;
+      m_key.fib_index = sm->outside_fib_index;
+      kv.key = m_key.as_u64;
+      if (clib_bihash_add_del_8_8(&sm->static_mapping_by_external, &kv, 0))
+        {
+          clib_warning ("static_mapping_by_external key del failed");
+          return VNET_API_ERROR_UNSPECIFIED;
+        }
+      m_key.port = clib_host_to_net_u16 (m->external_port);
+      kv.key = m_key.as_u64;
+      if (clib_bihash_add_del_8_8(&sm->out2in, &kv, 0))
+        {
+          clib_warning ("outi2in key del failed");
+          return VNET_API_ERROR_UNSPECIFIED;
+        }
+
+      vec_foreach (local, m->locals)
+        {
+          m_key.addr = local->addr;
+          m_key.port = local->port;
+          m_key.fib_index = m->fib_index;
+          kv.key = m_key.as_u64;
+          if (clib_bihash_add_del_8_8(&sm->static_mapping_by_local, &kv, 0))
+            {
+              clib_warning ("static_mapping_by_local key del failed");
+              return VNET_API_ERROR_UNSPECIFIED;
+            }
+          m_key.port = clib_host_to_net_u16 (local->port);
+          kv.key = m_key.as_u64;
+          if (clib_bihash_add_del_8_8(&sm->in2out, &kv, 0))
+            {
+              clib_warning ("in2out key del failed");
+              return VNET_API_ERROR_UNSPECIFIED;
+            }
+        }
+
+      pool_put (sm->static_mappings, m);
+    }
+
+  return 0;
+}
+
 int snat_del_address (snat_main_t *sm, ip4_address_t addr, u8 delete_sm)
 {
   snat_address_t *a = 0;
@@ -649,15 +887,16 @@ int snat_del_address (snat_main_t *sm, ip4_address_t addr, u8 delete_sm)
                 if (snat_is_unk_proto_session (ses))
                   {
                     clib_bihash_kv_16_8_t up_kv;
-                    snat_unk_proto_ses_key_t up_key;
+                    nat_ed_ses_key_t up_key;
                     up_key.l_addr = ses->in2out.addr;
                     up_key.r_addr = ses->ext_host_addr;
                     up_key.fib_index = ses->in2out.fib_index;
                     up_key.proto = ses->in2out.port;
-                    up_key.rsvd[0] = up_key.rsvd[1] = up_key.rsvd[2] = 0;
+                    up_key.rsvd = 0;
+                    up_key.l_port = 0;
                     up_kv.key[0] = up_key.as_u64[0];
                     up_kv.key[1] = up_key.as_u64[1];
-                    if (clib_bihash_add_del_16_8 (&sm->in2out_unk_proto,
+                    if (clib_bihash_add_del_16_8 (&sm->in2out_ed,
                                                   &up_kv, 0))
                       clib_warning ("in2out key del failed");
 
@@ -665,7 +904,7 @@ int snat_del_address (snat_main_t *sm, ip4_address_t addr, u8 delete_sm)
                     up_key.fib_index = ses->out2in.fib_index;
                     up_kv.key[0] = up_key.as_u64[0];
                     up_kv.key[1] = up_key.as_u64[1];
-                    if (clib_bihash_add_del_16_8 (&sm->out2in_unk_proto,
+                    if (clib_bihash_add_del_16_8 (&sm->out2in_ed,
                                                   &up_kv, 0))
                       clib_warning ("out2in key del failed");
                   }
@@ -1048,6 +1287,7 @@ int snat_static_mapping_match (snat_main_t * sm,
   snat_static_mapping_t *m;
   snat_session_key_t m_key;
   clib_bihash_8_8_t *mapping_hash = &sm->static_mapping_by_local;
+  u32 rand, lo = 0, hi, mid;
 
   if (by_external)
     mapping_hash = &sm->static_mapping_by_external;
@@ -1073,11 +1313,29 @@ int snat_static_mapping_match (snat_main_t * sm,
 
   if (by_external)
     {
-      mapping->addr = m->local_addr;
-      /* Address only mapping doesn't change port */
-      mapping->port = m->addr_only ? match.port
-        : clib_host_to_net_u16 (m->local_port);
+      if (vec_len (m->locals))
+        {
+          hi = vec_len (m->locals) - 1;
+          rand = 1 + (random_u32 (&sm->random_seed) % m->locals[hi].prefix);
+          while (lo < hi)
+            {
+              mid = ((hi - 1) >> 1) + lo;
+              (rand > m->locals[mid].prefix) ? (lo = mid + 1) : (hi = mid);
+            }
+          if (!(m->locals[lo].prefix >= rand))
+            return 1;
+          mapping->addr = m->locals[lo].addr;
+          mapping->port = clib_host_to_net_u16 (m->locals[lo].port);
+        }
+      else
+        {
+          mapping->addr = m->local_addr;
+          /* Address only mapping doesn't change port */
+          mapping->port = m->addr_only ? match.port
+            : clib_host_to_net_u16 (m->local_port);
+        }
       mapping->fib_index = m->fib_index;
+      mapping->protocol = m->proto;
     }
   else
     {
@@ -1517,6 +1775,101 @@ VLIB_CLI_COMMAND (add_static_mapping_command, static) = {
     "nat44 add static mapping tcp|udp|icmp local <addr> [<port>] external <addr> [<port>] [vrf <table-id>] [del]",
 };
 
+static clib_error_t *
+add_lb_static_mapping_command_fn (vlib_main_t * vm,
+                                  unformat_input_t * input,
+                                  vlib_cli_command_t * cmd)
+{
+  unformat_input_t _line_input, *line_input = &_line_input;
+  clib_error_t * error = 0;
+  ip4_address_t l_addr, e_addr;
+  u32 l_port = 0, e_port = 0, vrf_id = 0, probability = 0;
+  int is_add = 1;
+  int rv;
+  snat_protocol_t proto;
+  u8 proto_set = 0;
+  nat44_lb_addr_port_t *locals = 0, local;
+
+  /* Get a line of input. */
+  if (!unformat_user (input, unformat_line_input, line_input))
+    return 0;
+
+  while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (line_input, "local %U:%u probability %u",
+                    unformat_ip4_address, &l_addr, &l_port, &probability))
+        {
+          memset (&local, 0, sizeof (local));
+          local.addr = l_addr;
+          local.port = (u16) l_port;
+          local.probability = (u8) probability;
+          vec_add1 (locals, local);
+        }
+      else if (unformat (line_input, "external %U:%u", unformat_ip4_address,
+                         &e_addr, &e_port))
+        ;
+      else if (unformat (line_input, "vrf %u", &vrf_id))
+        ;
+      else if (unformat (line_input, "protocol %U", unformat_snat_protocol,
+                         &proto))
+        proto_set = 1;
+      else if (unformat (line_input, "del"))
+        is_add = 0;
+      else
+        {
+          error = clib_error_return (0, "unknown input: '%U'",
+            format_unformat_error, line_input);
+          goto done;
+        }
+    }
+
+  if (vec_len (locals) < 2)
+    {
+      error = clib_error_return (0, "at least two local must be set");
+      goto done;
+    }
+
+  if (!proto_set)
+    {
+      error = clib_error_return (0, "missing protocol");
+      goto done;
+    }
+
+  rv = nat44_add_del_lb_static_mapping (e_addr, (u16) e_port, proto, vrf_id,
+                                        locals, is_add);
+
+  switch (rv)
+    {
+    case VNET_API_ERROR_INVALID_VALUE:
+      error = clib_error_return (0, "External port already in use.");
+      goto done;
+    case VNET_API_ERROR_NO_SUCH_ENTRY:
+      if (is_add)
+        error = clib_error_return (0, "External addres must be allocated.");
+      else
+        error = clib_error_return (0, "Mapping not exist.");
+      goto done;
+    case VNET_API_ERROR_VALUE_EXIST:
+      error = clib_error_return (0, "Mapping already exist.");
+      goto done;
+    default:
+      break;
+    }
+
+done:
+  unformat_free (line_input);
+  vec_free (locals);
+
+  return error;
+}
+
+VLIB_CLI_COMMAND (add_lb_static_mapping_command, static) = {
+  .path = "nat44 add load-balancing static mapping",
+  .function = add_lb_static_mapping_command_fn,
+  .short_help =
+    "nat44 add load-balancing static mapping protocol tcp|udp external <addr>:<port> local <addr>:<port> probability <n> [vrf <table-id>] [del]",
+};
+
 static clib_error_t *
 set_workers_command_fn (vlib_main_t * vm,
                         unformat_input_t * input,
@@ -1839,10 +2192,10 @@ snat_config (vlib_main_t * vm, unformat_input_t * input)
           clib_bihash_init_8_8 (&sm->user_hash, "users", user_buckets,
                                 user_memory_size);
 
-          clib_bihash_init_16_8 (&sm->in2out_unk_proto, "in2out-unk-proto",
+          clib_bihash_init_16_8 (&sm->in2out_ed, "in2out-ed",
                                  translation_buckets, translation_memory_size);
 
-          clib_bihash_init_16_8 (&sm->out2in_unk_proto, "out2in-unk-proto",
+          clib_bihash_init_16_8 (&sm->out2in_ed, "out2in-ed",
                                  translation_buckets, translation_memory_size);
         }
       else
@@ -1884,18 +2237,10 @@ u8 * format_snat_session_state (u8 * s, va_list * args)
 u8 * format_snat_key (u8 * s, va_list * args)
 {
   snat_session_key_t * key = va_arg (*args, snat_session_key_t *);
-  char * protocol_string = "unknown";
-  static char *protocol_strings[] = {
-      "UDP",
-      "TCP",
-      "ICMP",
-  };
-
-  if (key->protocol < ARRAY_LEN(protocol_strings))
-      protocol_string = protocol_strings[key->protocol];
 
-  s = format (s, "%U proto %s port %d fib %d",
-              format_ip4_address, &key->addr, protocol_string,
+  s = format (s, "%U proto %U port %d fib %d",
+              format_ip4_address, &key->addr,
+              format_snat_protocol, key->protocol,
               clib_net_to_host_u16 (key->port), key->fib_index);
   return s;
 }
@@ -1919,6 +2264,9 @@ u8 * format_snat_session (u8 * s, va_list * args)
       s = format (s, "  i2o %U\n", format_snat_key, &sess->in2out);
       s = format (s, "    o2i %U\n", format_snat_key, &sess->out2in);
     }
+  if (sess->ext_host_addr.as_u32)
+      s = format (s, "       external host %U\n",
+                  format_ip4_address, &sess->ext_host_addr);
   s = format (s, "       last heard %.2f\n", sess->last_heard);
   s = format (s, "       total pkts %d, total bytes %lld\n",
               sess->total_pkts, sess->total_bytes);
@@ -1926,6 +2274,8 @@ u8 * format_snat_session (u8 * s, va_list * args)
     s = format (s, "       static translation\n");
   else
     s = format (s, "       dynamic translation\n");
+  if (sess->flags & SNAT_SESSION_FLAG_LOAD_BALANCING)
+    s = format (s, "       load-balancing\n");
 
   return s;
 }
@@ -1973,6 +2323,7 @@ u8 * format_snat_user (u8 * s, va_list * args)
 u8 * format_snat_static_mapping (u8 * s, va_list * args)
 {
   snat_static_mapping_t *m = va_arg (*args, snat_static_mapping_t *);
+  nat44_lb_addr_port_t *local;
 
   if (m->addr_only)
       s = format (s, "local %U external %U vrf %d",
@@ -1980,12 +2331,25 @@ u8 * format_snat_static_mapping (u8 * s, va_list * args)
                   format_ip4_address, &m->external_addr,
                   m->vrf_id);
   else
-      s = format (s, "%U local %U:%d external %U:%d vrf %d",
-                  format_snat_protocol, m->proto,
-                  format_ip4_address, &m->local_addr, m->local_port,
-                  format_ip4_address, &m->external_addr, m->external_port,
-                  m->vrf_id);
-
+   {
+      if (vec_len (m->locals))
+        {
+          s = format (s, "%U vrf %d external %U:%d",
+                      format_snat_protocol, m->proto,
+                      m->vrf_id,
+                      format_ip4_address, &m->external_addr, m->external_port);
+          vec_foreach (local, m->locals)
+            s = format (s, "\n  local %U:%d probability %d\%",
+                        format_ip4_address, &local->addr, local->port,
+                        local->probability);
+        }
+      else
+        s = format (s, "%U local %U:%d external %U:%d vrf %d",
+                    format_snat_protocol, m->proto,
+                    format_ip4_address, &m->local_addr, m->local_port,
+                    format_ip4_address, &m->external_addr, m->external_port,
+                    m->vrf_id);
+   }
   return s;
 }
 
@@ -2208,6 +2572,10 @@ show_snat_command_fn (vlib_main_t * vm,
                                verbose - 1);
               vlib_cli_output (vm, "%U", format_bihash_8_8, &sm->out2in,
                                verbose - 1);
+              vlib_cli_output (vm, "%U", format_bihash_16_8, &sm->in2out_ed,
+                               verbose - 1);
+              vlib_cli_output (vm, "%U", format_bihash_16_8, &sm->out2in_ed,
+                               verbose - 1);
               vlib_cli_output (vm, "%U", format_bihash_8_8, &sm->worker_by_in,
                                verbose - 1);
               vlib_cli_output (vm, "%U", format_bihash_8_8, &sm->worker_by_out,
index 04c466d..8935144 100644 (file)
@@ -62,12 +62,13 @@ typedef struct {
       ip4_address_t l_addr;
       ip4_address_t r_addr;
       u32 fib_index;
+      u16 l_port;
       u8 proto;
-      u8 rsvd[3];
+      u8 rsvd;
     };
     u64 as_u64[2];
   };
-} snat_unk_proto_ses_key_t;
+} nat_ed_ses_key_t;
 
 typedef struct {
   union
@@ -139,6 +140,7 @@ typedef enum {
 
 #define SNAT_SESSION_FLAG_STATIC_MAPPING 1
 #define SNAT_SESSION_FLAG_UNKNOWN_PROTO  2
+#define SNAT_SESSION_FLAG_LOAD_BALANCING 4
 
 typedef CLIB_PACKED(struct {
   snat_session_key_t out2in;    /* 0-15 */
@@ -205,6 +207,13 @@ typedef struct {
   snat_det_session_t * sessions;
 } snat_det_map_t;
 
+typedef struct {
+  ip4_address_t addr;
+  u16 port;
+  u8 probability;
+  u8 prefix;
+} nat44_lb_addr_port_t;
+
 typedef struct {
   ip4_address_t local_addr;
   ip4_address_t external_addr;
@@ -214,6 +223,7 @@ typedef struct {
   u32 vrf_id;
   u32 fib_index;
   snat_protocol_t proto;
+  nat44_lb_addr_port_t *locals;
 } snat_static_mapping_t;
 
 typedef struct {
@@ -264,9 +274,9 @@ typedef struct snat_main_s {
   clib_bihash_8_8_t out2in;
   clib_bihash_8_8_t in2out;
 
-  /* Unknown protocol sessions lookup tables */
-  clib_bihash_16_8_t out2in_unk_proto;
-  clib_bihash_16_8_t in2out_unk_proto;
+  /* Endpoint address dependent sessions lookup tables */
+  clib_bihash_16_8_t out2in_ed;
+  clib_bihash_16_8_t in2out_ed;
 
   /* Find-a-user => src address lookup */
   clib_bihash_8_8_t user_hash;
@@ -496,6 +506,9 @@ int snat_interface_add_del_output_feature(u32 sw_if_index, u8 is_inside,
 int snat_add_interface_address(snat_main_t *sm, u32 sw_if_index, int is_del);
 uword unformat_snat_protocol(unformat_input_t * input, va_list * args);
 u8 * format_snat_protocol(u8 * s, va_list * args);
+int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
+                                     snat_protocol_t proto, u32 vrf_id,
+                                     nat44_lb_addr_port_t *locals, u8 is_add);
 
 static_always_inline u8
 icmp_is_error_message (icmp46_header_t * icmp)
index 0a2141f..fa20f2c 100644 (file)
 #include <nat/nat_msg_enum.h>
 #include <vnet/fib/fib_table.h>
 
+#define vl_api_nat44_lb_addr_port_t_endian vl_noop_handler
+#define vl_api_nat44_add_del_lb_static_mapping_t_endian vl_noop_handler
+#define vl_api_nat44_nat44_lb_static_mapping_details_t_endian vl_noop_handler
+
 /* define message structures */
 #define vl_typedefs
 #include <nat/nat_all_api_h.h>
@@ -465,7 +469,8 @@ static void
   /* *INDENT-OFF* */
   pool_foreach (m, sm->static_mappings,
   ({
-      send_snat_static_mapping_details (m, q, mp->context);
+      if (!vec_len(m->locals))
+        send_snat_static_mapping_details (m, q, mp->context);
   }));
   /* *INDENT-ON* */
 
@@ -1888,7 +1893,8 @@ vl_api_nat44_static_mapping_dump_t_handler (vl_api_nat44_static_mapping_dump_t
   /* *INDENT-OFF* */
   pool_foreach (m, sm->static_mappings,
   ({
-      send_nat44_static_mapping_details (m, q, mp->context);
+      if (!vec_len(m->locals))
+        send_nat44_static_mapping_details (m, q, mp->context);
   }));
   /* *INDENT-ON* */
 
@@ -2136,6 +2142,131 @@ vl_api_nat44_user_session_dump_t_print (vl_api_nat44_user_session_dump_t * mp,
   FINISH;
 }
 
+static nat44_lb_addr_port_t *
+unformat_nat44_lb_addr_port (vl_api_nat44_lb_addr_port_t * addr_port_pairs,
+                            u8 addr_port_pair_num)
+{
+  u8 i;
+  nat44_lb_addr_port_t *lb_addr_port_pairs = 0, lb_addr_port;
+  vl_api_nat44_lb_addr_port_t *ap;
+
+  for (i = 0; i < addr_port_pair_num; i++)
+    {
+      ap = &addr_port_pairs[i];
+      memset (&lb_addr_port, 0, sizeof (lb_addr_port));
+      clib_memcpy (&lb_addr_port.addr, ap->addr, 4);
+      lb_addr_port.port = clib_net_to_host_u16 (ap->port);
+      lb_addr_port.probability = ap->probability;
+      vec_add1 (lb_addr_port_pairs, lb_addr_port);
+    }
+
+  return lb_addr_port_pairs;
+}
+
+static void
+  vl_api_nat44_add_del_lb_static_mapping_t_handler
+  (vl_api_nat44_add_del_lb_static_mapping_t * mp)
+{
+  snat_main_t *sm = &snat_main;
+  vl_api_nat44_add_del_lb_static_mapping_reply_t *rmp;
+  int rv = 0;
+  nat44_lb_addr_port_t *locals = 0;
+  ip4_address_t e_addr;
+  snat_protocol_t proto;
+
+  locals = unformat_nat44_lb_addr_port (mp->locals, mp->local_num);
+  clib_memcpy (&e_addr, mp->external_addr, 4);
+  proto = ip_proto_to_snat_proto (mp->protocol);
+
+  rv =
+    nat44_add_del_lb_static_mapping (e_addr,
+                                    clib_net_to_host_u16 (mp->external_port),
+                                    proto, clib_net_to_host_u32 (mp->vrf_id),
+                                    locals, mp->is_add);
+
+  vec_free (locals);
+
+  REPLY_MACRO (VL_API_NAT44_ADD_DEL_LB_STATIC_MAPPING_REPLY);
+}
+
+static void *vl_api_nat44_add_del_lb_static_mapping_t_print
+  (vl_api_nat44_add_del_lb_static_mapping_t * mp, void *handle)
+{
+  u8 *s;
+
+  s = format (0, "SCRIPT: nat44_add_del_lb_static_mapping ");
+  s = format (s, "is_add %d\n", mp->is_add);
+
+  FINISH;
+}
+
+static void
+send_nat44_lb_static_mapping_details (snat_static_mapping_t * m,
+                                     unix_shared_memory_queue_t * q,
+                                     u32 context)
+{
+  vl_api_nat44_lb_static_mapping_details_t *rmp;
+  snat_main_t *sm = &snat_main;
+  nat44_lb_addr_port_t *ap;
+  vl_api_nat44_lb_addr_port_t *locals;
+
+  rmp =
+    vl_msg_api_alloc (sizeof (*rmp) +
+                     (vec_len (m->locals) * sizeof (nat44_lb_addr_port_t)));
+  memset (rmp, 0, sizeof (*rmp));
+  rmp->_vl_msg_id =
+    ntohs (VL_API_NAT44_LB_STATIC_MAPPING_DETAILS + sm->msg_id_base);
+
+  clib_memcpy (rmp->external_addr, &(m->external_addr), 4);
+  rmp->external_port = ntohs (m->external_port);
+  rmp->protocol = snat_proto_to_ip_proto (m->proto);
+  rmp->vrf_id = ntohl (m->vrf_id);
+  rmp->context = context;
+
+  locals = (vl_api_nat44_lb_addr_port_t *) rmp->locals;
+  vec_foreach (ap, m->locals)
+  {
+    clib_memcpy (locals->addr, &(ap->addr), 4);
+    locals->port = htons (ap->port);
+    locals->probability = ap->probability;
+    locals++;
+    rmp->local_num++;
+  }
+
+  vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+  vl_api_nat44_lb_static_mapping_dump_t_handler
+  (vl_api_nat44_lb_static_mapping_dump_t * mp)
+{
+  unix_shared_memory_queue_t *q;
+  snat_main_t *sm = &snat_main;
+  snat_static_mapping_t *m;
+
+  q = vl_api_client_index_to_input_queue (mp->client_index);
+  if (q == 0)
+    return;
+
+  /* *INDENT-OFF* */
+  pool_foreach (m, sm->static_mappings,
+  ({
+      if (vec_len(m->locals))
+        send_nat44_lb_static_mapping_details (m, q, mp->context);
+  }));
+  /* *INDENT-ON* */
+}
+
+static void *vl_api_nat44_lb_static_mapping_dump_t_print
+  (vl_api_nat44_lb_static_mapping_dump_t * mp, void *handle)
+{
+  u8 *s;
+
+  s = format (0, "SCRIPT: nat44_lb_static_mapping_dump ");
+
+  FINISH;
+}
+
 /*******************************/
 /*** Deterministic NAT (CGN) ***/
 /*******************************/
@@ -3159,6 +3290,8 @@ _(NAT44_INTERFACE_ADD_DEL_OUTPUT_FEATURE,                               \
   nat44_interface_add_del_output_feature)                               \
 _(NAT44_INTERFACE_OUTPUT_FEATURE_DUMP,                                  \
   nat44_interface_output_feature_dump)                                  \
+_(NAT44_ADD_DEL_LB_STATIC_MAPPING, nat44_add_del_lb_static_mapping)     \
+_(NAT44_LB_STATIC_MAPPING_DUMP, nat44_lb_static_mapping_dump)           \
 _(NAT_DET_ADD_DEL_MAP, nat_det_add_del_map)                             \
 _(NAT_DET_FORWARD, nat_det_forward)                                     \
 _(NAT_DET_REVERSE, nat_det_reverse)                                     \
index 6795006..55a750e 100644 (file)
@@ -630,7 +630,7 @@ snat_out2in_unknown_proto (snat_main_t *sm,
   snat_session_key_t m_key;
   u32 old_addr, new_addr;
   ip_csum_t sum;
-  snat_unk_proto_ses_key_t key;
+  nat_ed_ses_key_t key;
   snat_session_t * s;
   snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
   snat_user_key_t u_key;
@@ -643,11 +643,12 @@ snat_out2in_unknown_proto (snat_main_t *sm,
   key.r_addr = ip->src_address;
   key.fib_index = rx_fib_index;
   key.proto = ip->protocol;
-  key.rsvd[0] = key.rsvd[1] = key.rsvd[2] = 0;
+  key.rsvd = 0;
+  key.l_port = 0;
   s_kv.key[0] = key.as_u64[0];
   s_kv.key[1] = key.as_u64[1];
 
-  if (!clib_bihash_search_16_8 (&sm->out2in_unk_proto, &s_kv, &s_value))
+  if (!clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
     {
       s = pool_elt_at_index (tsm->sessions, s_value.value);
       new_addr = ip->dst_address.as_u32 = s->in2out.addr.as_u32;
@@ -721,14 +722,14 @@ snat_out2in_unknown_proto (snat_main_t *sm,
 
       /* Add to lookup tables */
       s_kv.value = s - tsm->sessions;
-      if (clib_bihash_add_del_16_8 (&sm->out2in_unk_proto, &s_kv, 1))
+      if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1))
         clib_warning ("out2in key add failed");
 
       key.l_addr = ip->dst_address;
       key.fib_index = m->fib_index;
       s_kv.key[0] = key.as_u64[0];
       s_kv.key[1] = key.as_u64[1];
-      if (clib_bihash_add_del_16_8 (&sm->in2out_unk_proto, &s_kv, 1))
+      if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &s_kv, 1))
         clib_warning ("in2out key add failed");
    }
 
@@ -749,6 +750,152 @@ snat_out2in_unknown_proto (snat_main_t *sm,
                       s->per_user_index);
 }
 
+static void
+snat_out2in_lb (snat_main_t *sm,
+                vlib_buffer_t * b,
+                ip4_header_t * ip,
+                u32 rx_fib_index,
+                u32 thread_index,
+                f64 now,
+                vlib_main_t * vm)
+{
+  nat_ed_ses_key_t key;
+  clib_bihash_kv_16_8_t s_kv, s_value;
+  udp_header_t *udp = ip4_next_header (ip);
+  tcp_header_t *tcp = (tcp_header_t *) udp;
+  snat_session_t *s = 0;
+  snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
+  snat_session_key_t e_key, l_key;
+  clib_bihash_kv_8_8_t kv, value;
+  u32 old_addr, new_addr;
+  u32 proto = ip_proto_to_snat_proto (ip->protocol);
+  u16 new_port, old_port;
+  ip_csum_t sum;
+  snat_user_key_t u_key;
+  snat_user_t *u;
+  dlist_elt_t *head, *elt;
+
+  old_addr = ip->dst_address.as_u32;
+
+  key.l_addr = ip->dst_address;
+  key.r_addr = ip->src_address;
+  key.fib_index = rx_fib_index;
+  key.proto = ip->protocol;
+  key.rsvd = 0;
+  key.l_port = udp->dst_port;
+  s_kv.key[0] = key.as_u64[0];
+  s_kv.key[1] = key.as_u64[1];
+
+  if (!clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value))
+    {
+      s = pool_elt_at_index (tsm->sessions, s_value.value);
+    }
+  else
+    {
+      e_key.addr = ip->dst_address;
+      e_key.port = udp->dst_port;
+      e_key.protocol = proto;
+      e_key.fib_index = rx_fib_index;
+      if (snat_static_mapping_match(sm, e_key, &l_key, 1, 0))
+        return;
+
+      u_key.addr = l_key.addr;
+      u_key.fib_index = l_key.fib_index;
+      kv.key = u_key.as_u64;
+
+      /* Ever heard of the "user" = src ip4 address before? */
+      if (clib_bihash_search_8_8 (&sm->user_hash, &kv, &value))
+        {
+          /* no, make a new one */
+          pool_get (tsm->users, u);
+          memset (u, 0, sizeof (*u));
+          u->addr = l_key.addr;
+          u->fib_index = l_key.fib_index;
+
+          pool_get (tsm->list_pool, head);
+          u->sessions_per_user_list_head_index = head - tsm->list_pool;
+
+          clib_dlist_init (tsm->list_pool,
+                           u->sessions_per_user_list_head_index);
+
+          kv.value = u - tsm->users;
+
+          /* add user */
+          if (clib_bihash_add_del_8_8 (&sm->user_hash, &kv, 1))
+            clib_warning ("user key add failed");
+        }
+      else
+        {
+          u = pool_elt_at_index (tsm->users, value.value);
+        }
+
+      /* Create a new session */
+      pool_get (tsm->sessions, s);
+      memset (s, 0, sizeof (*s));
+
+      s->ext_host_addr.as_u32 = ip->src_address.as_u32;
+      s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
+      s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
+      s->outside_address_index = ~0;
+      s->out2in = e_key;
+      s->in2out = l_key;
+      u->nstaticsessions++;
+
+      /* Create list elts */
+      pool_get (tsm->list_pool, elt);
+      clib_dlist_init (tsm->list_pool, elt - tsm->list_pool);
+      elt->value = s - tsm->sessions;
+      s->per_user_index = elt - tsm->list_pool;
+      s->per_user_list_head_index = u->sessions_per_user_list_head_index;
+      clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index,
+                          s->per_user_index);
+
+      /* Add to lookup tables */
+      s_kv.value = s - tsm->sessions;
+      if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1))
+        clib_warning ("out2in-ed key add failed");
+
+      key.l_addr = l_key.addr;
+      key.fib_index = l_key.fib_index;
+      key.l_port = l_key.port;
+      s_kv.key[0] = key.as_u64[0];
+      s_kv.key[1] = key.as_u64[1];
+      if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &s_kv, 1))
+        clib_warning ("in2out-ed key add failed");
+    }
+
+  new_addr = ip->dst_address.as_u32 = s->in2out.addr.as_u32;
+
+  /* Update IP checksum */
+  sum = ip->checksum;
+  sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
+  ip->checksum = ip_csum_fold (sum);
+
+  if (PREDICT_TRUE(proto == SNAT_PROTOCOL_TCP))
+    {
+      old_port = tcp->dst_port;
+      tcp->dst_port = s->in2out.port;
+      new_port = tcp->dst_port;
+
+      sum = tcp->checksum;
+      sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
+      sum = ip_csum_update (sum, old_port, new_port, ip4_header_t, length);
+      tcp->checksum = ip_csum_fold(sum);
+    }
+  else
+    {
+      udp->dst_port = s->in2out.port;
+      udp->checksum = 0;
+    }
+
+  vnet_buffer(b)->sw_if_index[VLIB_TX] = s->in2out.fib_index;
+
+  /* Accounting */
+  s->last_heard = now;
+  s->total_pkts++;
+  s->total_bytes += vlib_buffer_length_in_chain (vm, b);
+}
+
 static uword
 snat_out2in_node_fn (vlib_main_t * vm,
                  vlib_node_runtime_t * node,
@@ -894,8 +1041,20 @@ snat_out2in_node_fn (vlib_main_t * vm,
                 }
             }
           else
-            s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
-                                    value0.value);
+            {
+              if (PREDICT_FALSE (value0.value == ~0ULL))
+                {
+                  snat_out2in_lb(sm, b0, ip0, rx_fib_index0, thread_index, now,
+                                 vm);
+                  goto trace0;
+                }
+              else
+                {
+                  s0 = pool_elt_at_index (
+                    sm->per_thread_data[thread_index].sessions,
+                    value0.value);
+                }
+            }
 
           old_addr0 = ip0->dst_address.as_u32;
           ip0->dst_address = s0->in2out.addr;
@@ -1033,8 +1192,20 @@ snat_out2in_node_fn (vlib_main_t * vm,
                 }
             }
           else
-            s1 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
-                                    value1.value);
+            {
+              if (PREDICT_FALSE (value1.value == ~0ULL))
+                {
+                  snat_out2in_lb(sm, b1, ip1, rx_fib_index1, thread_index, now,
+                                 vm);
+                  goto trace1;
+                }
+              else
+                {
+                  s1 = pool_elt_at_index (
+                    sm->per_thread_data[thread_index].sessions,
+                    value1.value);
+                }
+            }
 
           old_addr1 = ip1->dst_address.as_u32;
           ip1->dst_address = s1->in2out.addr;
@@ -1209,8 +1380,20 @@ snat_out2in_node_fn (vlib_main_t * vm,
                 }
             }
           else
-            s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
-                                    value0.value);
+            {
+              if (PREDICT_FALSE (value0.value == ~0ULL))
+                {
+                  snat_out2in_lb(sm, b0, ip0, rx_fib_index0, thread_index, now,
+                                 vm);
+                  goto trace00;
+                }
+              else
+                {
+                  s0 = pool_elt_at_index (
+                    sm->per_thread_data[thread_index].sessions,
+                    value0.value);
+                }
+            }
 
           old_addr0 = ip0->dst_address.as_u32;
           ip0->dst_address = s0->in2out.addr;
index 0d622b0..de07019 100644 (file)
@@ -15,6 +15,7 @@ from scapy.packet import bind_layers
 from util import ppp
 from ipfix import IPFIX, Set, Template, Data, IPFIXDecoder
 from time import sleep
+from util import ip4_range
 
 
 class MethodHolder(VppTestCase):
@@ -633,6 +634,15 @@ class TestNAT44(MethodHolder):
                 protocol=sm.protocol,
                 is_add=0)
 
+        lb_static_mappings = self.vapi.nat44_lb_static_mapping_dump()
+        for lb_sm in lb_static_mappings:
+            self.vapi.nat44_add_del_lb_static_mapping(
+                lb_sm.external_addr,
+                lb_sm.external_port,
+                lb_sm.protocol,
+                lb_sm.vrf_id,
+                is_add=0)
+
         adresses = self.vapi.nat44_address_dump()
         for addr in adresses:
             self.vapi.nat44_add_del_address_range(addr.ip_address,
@@ -1037,6 +1047,97 @@ class TestNAT44(MethodHolder):
         self.pg_start()
         self.pg3.assert_nothing_captured()
 
+    def test_static_lb(self):
+        """ NAT44 local service load balancing """
+        external_addr_n = socket.inet_pton(socket.AF_INET, self.nat_addr)
+        external_port = 80
+        local_port = 8080
+        server1 = self.pg0.remote_hosts[0]
+        server2 = self.pg0.remote_hosts[1]
+
+        locals = [{'addr': server1.ip4n,
+                   'port': local_port,
+                   'probability': 70},
+                  {'addr': server2.ip4n,
+                   'port': local_port,
+                   'probability': 30}]
+
+        self.nat44_add_address(self.nat_addr)
+        self.vapi.nat44_add_del_lb_static_mapping(external_addr_n,
+                                                  external_port,
+                                                  IP_PROTOS.tcp,
+                                                  local_num=len(locals),
+                                                  locals=locals)
+        self.vapi.nat44_interface_add_del_feature(self.pg0.sw_if_index)
+        self.vapi.nat44_interface_add_del_feature(self.pg1.sw_if_index,
+                                                  is_inside=0)
+
+        # from client to service
+        p = (Ether(src=self.pg1.remote_mac, dst=self.pg1.local_mac) /
+             IP(src=self.pg1.remote_ip4, dst=self.nat_addr) /
+             TCP(sport=12345, dport=external_port))
+        self.pg1.add_stream(p)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+        capture = self.pg0.get_capture(1)
+        p = capture[0]
+        server = None
+        try:
+            ip = p[IP]
+            tcp = p[TCP]
+            self.assertIn(ip.dst, [server1.ip4, server2.ip4])
+            if ip.dst == server1.ip4:
+                server = server1
+            else:
+                server = server2
+            self.assertEqual(tcp.dport, local_port)
+            self.check_tcp_checksum(p)
+            self.check_ip_checksum(p)
+        except:
+            self.logger.error(ppp("Unexpected or invalid packet:", p))
+            raise
+
+        # from service back to client
+        p = (Ether(src=server.mac, dst=self.pg0.local_mac) /
+             IP(src=server.ip4, dst=self.pg1.remote_ip4) /
+             TCP(sport=local_port, dport=12345))
+        self.pg0.add_stream(p)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+        capture = self.pg1.get_capture(1)
+        p = capture[0]
+        try:
+            ip = p[IP]
+            tcp = p[TCP]
+            self.assertEqual(ip.src, self.nat_addr)
+            self.assertEqual(tcp.sport, external_port)
+            self.check_tcp_checksum(p)
+            self.check_ip_checksum(p)
+        except:
+            self.logger.error(ppp("Unexpected or invalid packet:", p))
+            raise
+
+        # multiple clients
+        server1_n = 0
+        server2_n = 0
+        clients = ip4_range(self.pg1.remote_ip4, 10, 20)
+        pkts = []
+        for client in clients:
+            p = (Ether(src=self.pg1.remote_mac, dst=self.pg1.local_mac) /
+                 IP(src=client, dst=self.nat_addr) /
+                 TCP(sport=12345, dport=external_port))
+            pkts.append(p)
+        self.pg1.add_stream(pkts)
+        self.pg_enable_capture(self.pg_interfaces)
+        self.pg_start()
+        capture = self.pg0.get_capture(len(pkts))
+        for p in capture:
+            if p[IP].dst == server1.ip4:
+                server1_n += 1
+            else:
+                server2_n += 1
+        self.assertTrue(server1_n > server2_n)
+
     def test_multiple_inside_interfaces(self):
         """ NAT44 multiple non-overlapping address space inside interfaces """
 
index 61db4d6..03238b9 100644 (file)
@@ -1238,6 +1238,36 @@ class VppPapiProvider(object):
         """
         return self.api(self.papi.nat44_user_dump, {})
 
+    def nat44_add_del_lb_static_mapping(
+            self,
+            external_addr,
+            external_port,
+            protocol,
+            vrf_id=0,
+            local_num=0,
+            locals=None,
+            is_add=1):
+        """Add/delete NAT44 load balancing static mapping
+
+        :param is_add - 1 if add, 0 if delete
+        """
+        return self.api(
+            self.papi.nat44_add_del_lb_static_mapping,
+            {'is_add': is_add,
+             'external_addr': external_addr,
+             'external_port': external_port,
+             'protocol': protocol,
+             'vrf_id': vrf_id,
+             'local_num': local_num,
+             'locals': locals})
+
+    def nat44_lb_static_mapping_dump(self):
+        """Dump NAT44 load balancing static mappings
+
+        :return: Dictionary of NAT44 load balancing static mapping
+        """
+        return self.api(self.papi.nat44_lb_static_mapping_dump, {})
+
     def nat_det_add_del_map(
             self,
             in_addr,